def test_with_multiple_output_formats(self): """Simple muscle call with multiple output formats""" input_file = "Fasta/f002" output_html = "temp_f002.html" output_clwstrict = "temp_f002.clw" self.assertTrue(os.path.isfile(input_file)) records = list(SeqIO.parse(input_file, "fasta")) records.sort(key=lambda rec: rec.id) #Prepare the command... use Clustal output (with a MUSCLE header) cmdline = MuscleCommandline(muscle_exe, input=input_file, clw=True, htmlout=output_html, clwstrictout=output_clwstrict) self.assertEqual( str(cmdline).rstrip(), muscle_exe + " -in Fasta/f002 -clw -htmlout temp_f002.html" + " -clwstrictout temp_f002.clw") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) #Clustalw on stdout: align = AlignIO.read(child.stdout, "clustal") align.sort() #Didn't use -quiet so there should be progress reports on stderr, self.assertTrue(child.stderr.read().strip().startswith("MUSCLE")) return_code = child.wait() self.assertEqual(return_code, 0) self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) child.stdout.close() child.stderr.close() del child handle = open(output_html, "rU") html = handle.read().strip().upper() handle.close() self.assertTrue(html.startswith("<HTML")) self.assertTrue(html.endswith("</HTML>")) #ClustalW strict: align = AlignIO.read(output_clwstrict, "clustal") align.sort() self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) os.remove(output_html) os.remove(output_clwstrict)
def muscle_msa(config, input_sequence_file, output_alignment_file): """Calculates a MSA using MUSCLE's Biopython wrapper""" muscle_bin = config['ALIGN']['MUSCLE_BIN'] muscle_cline = MuscleCommandline(muscle_bin, input=input_sequence_file, out=output_alignment_file) if not os.path.exists(muscle_bin): logger.critical( "The path defined for the MUSCLE binary is not correct. Check the configuration file!" ) raise SystemExit stdout, stderr = muscle_cline() MultipleSeqAlignment = AlignIO.read(output_alignment_file, "fasta") return MultipleSeqAlignment
def multialign_genomic_templates(fastafile): """Uses MUSCLE to return the multi-aligned genomic data.""" from Bio.Align.Applications import MuscleCommandline try: from StringIO import StringIO except ImportError: from io import StringIO from Bio import AlignIO muscle_cline = MuscleCommandline(input=fastafile) stdout, stderr = muscle_cline() multialign = AlignIO.read(StringIO(stdout), "fasta") return multialign
def align(self): if self.align_software == 'mafft': mafft_cline = MafftCommandline(cmd=self.mafft_path, input=self.pair_pep_file, auto=True) stdout, stderr = mafft_cline() align = AlignIO.read(StringIO(stdout), "fasta") AlignIO.write(align, self.prot_align_file, "fasta") if self.align_software == 'muscle': muscle_cline = MuscleCommandline(cmd=self.muscle_path, input=self.pair_pep_file, out=self.prot_align_file, seqtype="protein", clwstrict=True) stdout, stderr = muscle_cline()
def GenerateTree(matrix_name, multifasta_filename, show_pdf): results = open("results/tress.txt", "a") cline = MuscleCommandline(input=multifasta_filename, out=matrix_name + ".fasta", matrix=matrix_name) stdout, stderr = cline() os.system(MEGA_location + "megacc True -d " + matrix_name + ".fasta -o output/" + matrix_name + " -a " + method) tree = Phylo.read("output/" + matrix_name + "_consensus.nwk", "newick") results.write("KMAT Tree:\n") Phylo.draw_ascii(tree, file=results) results.close() if (GeneratePDF): Phylo.draw(tree, do_show=show_pdf) return True
def test_Muscle_profile_simple(self): """Simple round-trip through app doing a profile alignment""" cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("out", self.outfile3) cmdline.set_parameter("profile", True) cmdline.set_parameter("in1", self.infile2) cmdline.set_parameter("in2", self.infile3) self.assertEqual( str(cmdline), muscle_exe + " -out Fasta/temp_align_out3.fa" + " -profile -in1 Fasta/fa01 -in2 Fasta/f001") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) output, error = cmdline() self.assertEqual(output, "") self.assertTrue("ERROR" not in error) self.assertTrue(error.strip().startswith("MUSCLE"), output)
def run_align(seqs: dict, in_path: str, out_path: str, muscle_path=muscle_path): with open(in_path, 'w') as f: SeqIO.write(seqs, f, 'fasta') muscle_cline = MuscleCommandline(muscle_path, input=in_path, out=out_path) muscle_cline() alignment = SeqIO.to_dict(SeqIO.parse(out_path, 'fasta')) return alignment
def test_long(self): """Simple muscle call using long file.""" # Create a large input file by converting some of another example file temp_large_fasta_file = "temp_cw_prot.fasta" records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40] SeqIO.write(records, temp_large_fasta_file, "fasta") # Prepare the command... cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("in", temp_large_fasta_file) # Use fast options cmdline.set_parameter("maxiters", 1) cmdline.set_parameter("diags", True) # Default None treated as False! # Use clustal output cmdline.set_parameter("clwstrict", True) # Default None treated as False! # Shoudn't need this, but just to make sure it is accepted cmdline.set_parameter("maxhours", 0.1) # No progress reports to stderr cmdline.set_parameter("quiet", True) # Default None treated as False! self.assertEqual( str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in temp_cw_prot.fasta -diags -maxhours 0.1" + " -maxiters 1 -clwstrict -quiet", ) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen( str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) align = AlignIO.read(child.stdout, "clustal") align.sort() records.sort(key=lambda rec: rec.id) # noqa: E731 self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-", ""), str(old.seq)) # See if quiet worked: self.assertEqual("", child.stderr.read().strip()) return_code = child.wait() self.assertEqual(return_code, 0) child.stdout.close() child.stderr.close() del child os.remove(temp_large_fasta_file)
def align_sequences(muscle_exe, seq1, seq2, seq1_id=None, seq2_id=None, temp_input_file=None, temp_output_file=None): """ Align two protein sequences using Muscle. Args: muscle_exe (str): path to muscle tool seq1 (str): protein sequence 1 seq2 (str): protein sequence 2 seq_id1 (str): if set, the id for sequence 1 seq_id2 (str): if set, the id for sequence 2 temp_input_file (str): if set, the path to the alignment input file temp_output_file (str): if set, the path to the alignment output file """ # create temp files for running alignment temp_dir = "temp_alignment" gen.create_output_directories(temp_dir) # create the random alignment files random_alignment = random.random() if not temp_input_file: temp_input_file = "{0}/protein_alignment_input_{1}.fasta".format( temp_dir, random_alignment) if not temp_output_file: temp_output_file = "{0}/protein_alignment_output_{1}.fasta".format( temp_dir, random_alignment) # in case the sequence ids are not set if not seq1_id: seq1_id = "seq_id_{0}_1".format(random.random()) if not seq2_id: seq2_id = "{0}_2".format(seq1_id[:-2]) # write the temporary alignment file with open(temp_input_file, "w") as temp_file: temp_file.write(">{0}\n{1}\n>{2}\n{3}\n".format( seq1_id, seq1, seq2_id, seq2)) # run muscle alignment muscle_output = MuscleCommandline(muscle_exe, input=temp_input_file, out=temp_output_file) # get object muscle_output() return temp_input_file, temp_output_file
def muscle_alignment(seqs): """Align sequences with muscle""" from Bio import SeqIO, AlignIO filename = 'temp.fa' SeqIO.write(seqs, filename, "fasta") name = os.path.splitext(filename)[0] from Bio.Align.Applications import MuscleCommandline cline = MuscleCommandline(input=filename, out=name + '.txt') try: stdout, stderr = cline() except: print('muscle not installed?') return align = AlignIO.read(name + '.txt', 'fasta') return align
def raw_sequence(self): filename = create_seq_file(self.data['sequences']) muscle = MuscleCommandline(input=filename) stdout, stderr = muscle() align = AlignIO.read(StringIO(stdout), "fasta") summary_align = AlignInfo.SummaryInfo(align) consensus = summary_align.gap_consensus(threshold=0.55, ambiguous='N') add_result(self, "Consenus sequence", str(consensus)) add_result(self, "Sequence length", len(consensus)) remove_temp_file(filename) return self.results
def alignEm(refSeq, record): try: muscle_cline = MuscleCommandline("../muscle", maxiters=1, diags=True) child = subprocess.Popen(str(muscle_cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=False, shell=True) SeqIO.write([refSeq, record], child.stdin, "fasta") child.stdin.close() align = AlignIO.read(child.stdout, "fasta") return Levenshtein.ratio(str(align[0].seq), str(align[1].seq)) except: return 0
def file_seq(self): filename = create_seq_file(self.data['sequences']) threshold = float(self.data.get('threshold')) muscle = MuscleCommandline(input=filename) stdout, stderr = muscle() align = AlignIO.read(StringIO(stdout), "fasta") summary_align = AlignInfo.SummaryInfo(align) consensus = summary_align.gap_consensus(threshold=threshold, ambiguous='N') remove_temp_file(filename) return f'>consensus sequence {len(consensus)} bp\n' + str(consensus)
def muscle_profile_align(fa1, fa2): ''' Uses muscle to profile-align two fastas fa1, fa2: filenames of fastas to profile-align. Must exist on disk when command is called Returns a MultipleSeqAlignment object ''' muscle_cmd = MuscleCommandline(in1 = fa1, in2 = fa2, profile = True ) exaln = AlignIO.read(StringIO(muscle_cmd()[0]), format = "fasta") return exaln
def muscle_alignment(path): """Performs MUSCLE alignment using the command line tool and writes to output file. CAVE: filenames can not include special characters such as (*, /, &)""" for files in os.listdir(path): if "fasta" in files: file_path = path + "/{0}".format(files) output_file = str(files) muscle_cline = MuscleCommandline(input=file_path, out=output_file) child = subprocess.Popen(str(muscle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=True) child.wait() with open(output_file) as align_handle: align = AlignIO.read(align_handle, "fasta")
def test_Muscle_profile_with_options(self): """Profile alignment, and switch and valued options""" #Using some keyword arguments, note -stable isn't supported in v3.8 cmdline = MuscleCommandline(muscle_exe, out=self.outfile4, in1=self.infile2, in2=self.infile3, profile=True, stable=True, cluster1="neighborjoining") self.assertEqual( str(cmdline), muscle_exe + " -out Fasta/temp_align_out4.fa" + " -profile -in1 Fasta/fa01 -in2 Fasta/f001" + " -cluster1 neighborjoining -stable") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) """
def codon_align(self, alignment_tool="mafft", prune=True, discard_premature_stops=False): ''' takes a nucleotide alignment, translates it, aligns the amino acids, pads the gaps note that this suppresses any compensated frameshift mutations Parameters: - alignment_tool: ['mafft', 'muscle'] the commandline tool to use ''' cwd = os.getcwd() make_dir(self.run_dir) os.chdir(self.run_dir) # translate aa_seqs = {} for seq in self.seqs.values(): tempseq = seq.seq.translate(table="Bacterial") # use only sequences that translate without trouble if not discard_premature_stops or '*' not in str( tempseq)[:-1] or prune == False: aa_seqs[seq.id] = SeqRecord(tempseq, id=seq.id) else: print(seq.id, "has premature stops, discarding") tmpfname = 'temp_in.fasta' SeqIO.write(aa_seqs.values(), tmpfname, 'fasta') if alignment_tool == 'mafft': os.system( 'mafft --reorder --amino temp_in.fasta 1> temp_out.fasta') aln_aa = AlignIO.read('temp_out.fasta', "fasta") elif alignment_tool == 'muscle': from Bio.Align.Applications import MuscleCommandline cline = MuscleCommandline(input=tmpfname, out=tmpfname[:-5] + 'aligned.fasta') cline() aln_aa = AlignIO.read(tmpfname[:-5] + 'aligned.fasta', "fasta") else: print 'Alignment tool not supported:' + alignment_tool #return #generate nucleotide alignment self.aln = pad_nucleotide_sequences(aln_aa, self.seqs) os.chdir(cwd) remove_dir(self.run_dir)
def get_multiple_alignment(seq_file, alignment_file=None, verbose=9): muscle_exe = 'muscle' if alignment_file == None: out_file = tempfile.NamedTemporaryFile(delete=False).name else: out_file = alignment_file muscle_cline = MuscleCommandline(muscle_exe, input=seq_file, out=out_file) if verbose > 6: print(" muscle command line:") print(muscle_cline) stdout, stderr = muscle_cline() # MultipleSeqAlignment = AlignIO.read(out_file, "fasta") # if alignment_file == None: # os.remove(out_file) # return(MultipleSeqAlignment) return (out_file)
def test_Muscle_with_options(self): """Round-trip through app with a switch and valued option""" cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("input", self.infile1) #"input" is alias for "in" cmdline.set_parameter("out", self.outfile2) #Use property: cmdline.objscore = "sp" cmdline.noanchors = True self.assertEqual( str(cmdline), muscle_exe + " -in Fasta/f002" + " -out Fasta/temp_align_out2.fa" + " -objscore sp -noanchors") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) output, error = cmdline() self.assertEqual(output, "") self.assertTrue("ERROR" not in error) self.assertTrue(error.strip().startswith("MUSCLE"), output)
def algo_msa(msa_type: str, seq_id: List[int], consensus: bool = None): if len(seq_id) > 10: return "Cannot process more than 10 sequences for MSA. Operation aborted." result = Virus.query.with_entities("id", "fasta").filter(Virus.id.in_(seq_id)) result_dict = {} for r in result: result_dict[r[0]] = r[1] fasta_file = "tmp/%s" % str(uuid.uuid4()) with open(fasta_file, "w") as fasta: # Ensure ordering of sequences based on input for i in seq_id: fasta.write(result_dict[i] + "\n\n") msa_command = None if msa_type == "muscle": msa_command = MuscleCommandline("muscle", input=fasta_file, html=True, quiet=True) ret = msa_command() elif msa_type == "clustalo": msa_command = ClustalOmegaCommandline(infile=fasta_file) ret = msa_command() else: # if msa_type == "mview": clustal_file = "tmp/%s" % str(uuid.uuid4()) msa_command = ClustalOmegaCommandline(infile=fasta_file, outfile=clustal_file) msa_command() con = "on" if consensus else "off" ret = runCommand([ "mview", "--css", "on", "--pcid", "aligned", "--ruler", "on", "--width", "80", "-coloring", "mismatch", "-colormap", "pink", "-consensus", con, "-con_threshold", "100", "-html", "head", "-in", "fasta", clustal_file ]) os.remove(clustal_file) os.remove(fasta_file) return ret
def align_fasta(infname, outfname, debug=False): """ Generate an alignment for the given fasta file. Args: infname (str): Path to fasta to be aligned. outfname (str): Path to output fasta to be """ muscle_exec = { "Windows": "niclassify/bin/muscle3.8.31_i86win32.exe", "Linux": "niclassify/bin/muscle3.8.31_i86linux64", "Darwin": "niclassify/bin/muscle3.8.31_i86darwin64" }[PLATFORM] alignment_call = MuscleCommandline(os.path.realpath( os.path.join(MAIN_PATH, muscle_exec)), input=os.path.realpath(infname), out=os.path.realpath(outfname)) print(alignment_call.__str__()) if debug: subprocess.run(alignment_call.__str__(), creationflags=subprocess.CREATE_NEW_CONSOLE, shell=True) else: subprocess.run(alignment_call.__str__(), shell=True) r_script = os.path.realpath( os.path.join(MAIN_PATH, "niclassify/core/scripts/trim_alignment.R")) trim_call = [R_LOC, r_script, outfname, outfname] if debug: proc = subprocess.run(trim_call, creationflags=subprocess.CREATE_NEW_CONSOLE, env=os.environ.copy()) else: proc = subprocess.run(trim_call, env=os.environ.copy()) if os.stat(outfname).st_size == 0: raise ChildProcessError("Sequence Alignment Failed") if proc.returncode != 0: raise RScriptFailedError("R TrimAlignment failed")
def _align_muscle(input_file,output_file,**kwargs): """ Run muscle. """ # Run muscle try: cmd = MuscleCommandline(input=input_file, out=output_file,**kwargs) cmd_args = str(cmd).split() output = subprocess.run(args=cmd_args) except FileNotFoundError: err = "muscle does not appear to be in your path\n" raise RuntimeError(err) # Make sure it returned successfully if output.returncode != 0: err = "muscle failed\n" raise RuntimeError(err)
def muscle_aln(): mergedaln = {} # Reuse alignment if len(glob.glob("coregenes/*.fasta")) > 0: print "Running muscle on extracted sequences..." coregenes = glob.glob("coregenes/*.fasta") for gene in coregenes: print '\r{0:.1%} completed'.format( float(coregenes.index(gene)) / len(coregenes)), muscle_cline = MuscleCommandline(muscle_exe, input=gene, maxiters=1) stdout, stderr = muscle_cline() alignment = AlignIO.read(StringIO(stdout), 'fasta') AlignIO.write(alignment, open(gene + ".aln", "w"), "fasta") # ref_gen_len = len([x for x in list(alignment) if x.id == ref_gen][0].seq) for seq in alignment: if seq.id not in mergedaln: mergedaln[seq.id] = seq else: mergedaln[seq.id] += seq missing_genome = list( set([ os.path.split(genome)[1].split('.')[0] for genome in genomes ]) - set([seq.id for seq in alignment])) for genome in missing_genome: if genome not in mergedaln: mergedaln[genome] = SeqRecord(Seq( "-" * alignment.get_alignment_length(), Bio.Alphabet.SingleLetterAlphabet()), id=genome, name=genome, description=genome) else: mergedaln[genome] += "-" * alignment.get_alignment_length() pickle.dump(mergedaln, open("mergedaln", "wb")) SeqIO.write(mergedaln.values(), "muscleout.aln", "fasta") trimal_cline = "%s -in muscleout.aln -out trimmed_muscleout.aln -gappyout" % ( trimal_exe) os.system(trimal_cline) else: print "No sequences to align" exit(0)
def quickAlign(refseq, testseq, maxiters=None, diags=None, gapopen=None): #sanity check try: refseq = re.sub("-", "", refseq) except TypeError: #not a string, probably a SeqRecord try: refseq = str(refseq.seq) refseq = re.sub("-", "", refseq) except AttributeError: #give up sys.exit( "quickAlign() requires inputs to be either strings or SeqRecord objects" ) try: testseq = re.sub("-", "", testseq) except TypeError: #not a string, probably a SeqRecord try: testseq = str(testseq.seq) testseq = re.sub("-", "", testseq) except AttributeError: #give up sys.exit( "quickAlign() requires inputs to be either strings or SeqRecord objects" ) handle = StringIO() handle.write(">ref\n%s\n>test\n%s\n" % (refseq, testseq)) data = handle.getvalue() muscle_cline = MuscleCommandline(cmd=muscle, quiet=True) if maxiters is not None: muscle_cline.maxiters = maxiters if diags is not None: muscle_cline.diags = diag if gapopen is not None: muscle_cline.gapopen = gapopen stdout, stderr = muscle_cline(stdin=data) aligned = dict() for p in SeqIO.parse(StringIO(stdout), "fasta"): aligned[p.id] = str(p.seq) return aligned
def align_prot_objs(prots, grpname): alignmentInputFile = filehash['ALL']['workdir'][ 'NA'] + '/' + grpname + '.prot.fasta' alignmentOutputFile = filehash['ALL']['workdir'][ 'NA'] + '/' + grpname + '.prot.align.fasta' print >> sys.stderr, alignmentInputFile, "\t", alignmentOutputFile output_handle = open(alignmentInputFile, "w") SeqIO.write(prots, output_handle, "fasta") output_handle.close() cline = MuscleCommandline(input=alignmentInputFile, out=alignmentOutputFile) print >> sys.stderr, cline cline() handleM = open(alignmentOutputFile, "rU") alignedProts = [] for record in SeqIO.parse(handleM, "fasta"): alignedProts.append(record.seq) handleM.close() return alignedProts
def align(self, seq_iter): with TemporaryFilePath() as in_path: with OpenFile(in_path, 'w') as tmp: for seq in seq_iter: tmp.write(seq.format('fasta')) self.kwargs['input'] = in_path with TemporaryFilePath() as tmp_out_path: self.kwargs['out'] = tmp_out_path muscle_command = MuscleCommandline(self.exe, **self.kwargs) self.cmd = str(muscle_command) _LOG.debug('{0}: Executing command {1!r}'.format( self.name, self.cmd)) stdout, stderr = muscle_command() results = dataio.get_buffered_seq_iter([tmp_out_path], format='fasta') if self.out_path: self.out_path = functions.get_new_path(self.out_path) shutil.move(tmp_out_path, self.out_path) return results
def alignSEQ(SEQs, s, n): outName = [] for i in SEQs: inputName = "TMP_{0:03d}.fa".format(n) inFaHandle = open(inputName, "w") inFaHandle.write('>' + i + '\n' + SEQs[i] + "\n") inFaHandle.write(s) inFaHandle.close() outfile = inputName.replace("TMP_", "aligned_") m_cline = MuscleCommandline(muscle, input=inputName, out=work_dir + '/' + outfile, clw=False) m_cline() outName.append(outfile) return outName
def _align_fasta(self): """Executed MUSCLE via commandline to create a multi-alignment from the input FASTA file. Raises ------ OSError When the MUSCLE commandline program returns an error. Notes ----- This function uses the FASTA file set in the class constructor for creating the alignment. """ try: muscle_cline = MuscleCommandline(input=self.fasta, **self.kwargs) stdout, _ = muscle_cline() return AlignIO.read(StringIO(stdout), "fasta") except ApplicationError as err: raise OSError(err.stderr)
def muscle_aln(seqreclist, **kwargs): """Align with muscle""" #let's write to file s = str(uuid.uuid4()) output_handle = open(TEMP_DIR + "/%s.fasta" % s, "w") SeqIO.write(seqreclist, output_handle, "fasta") output_handle.close() muscle_cline = MuscleCommandline(MUSCLE_BIN, input=TEMP_DIR + "/%s.fasta" % s, **kwargs) # print muscle_cline stdout, stderr = muscle_cline() # # print stderr # print stdout msa = AlignIO.read(StringIO(stdout), "fasta") os.system("rm " + TEMP_DIR + "/%s.fasta" % s) return msa
def align_muscle(*seqs, **kwargs): '''Global alignment of sequences via MUSCLE''' import subprocess as sp from Bio import AlignIO, SeqIO from Bio.Align.Applications import MuscleCommandline if not len(seqs): return None # Convert to SeqRecord if required if isinstance(seqs[0], basestring): from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.Alphabet import single_letter_alphabet seqs = [SeqRecord(Seq(s, single_letter_alphabet), id='seq'+str(i+1), name='seq'+str(i+1), description='seq'+str(i+1)) for i, s in enumerate(seqs)] muscle_cline = MuscleCommandline(diags=True, quiet=True) child = sp.Popen(str(muscle_cline), stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, shell=True) SeqIO.write(seqs, child.stdin, "fasta") child.stdin.close() align = AlignIO.read(child.stdout, "fasta") child.stderr.close() child.stdout.close() if ('sort' in kwargs) and kwargs['sort']: from Bio.Align import MultipleSeqAlignment as MSA alisort = [] for seq in seqs: for row in align: if row.id == seq.id: alisort.append(row) break align = MSA(alisort) return align