def align_cluster(self, cluster_file): """ Worker fuction for align_clusters Inputs a FASTA file containing an unaligned sequence cluster. Uses MAFFT to align the cluster. """ mafft_cline = MafftCommandline(input=cluster_file) mafft_cline.set_parameter("--auto", True) mafft_cline.set_parameter("--adjustdirection", True) color = Color() print(color.red + str(mafft_cline) + color.done) sys.stdout.flush() if cluster_file.find("/") != -1: alignment_file = "alignments" + cluster_file[cluster_file.index("/" ):] else: alignment_file = "alignments/" + cluster_file try: stdout, stderr = mafft_cline() with open(alignment_file, "w") as handle: handle.write(stdout) except: print( color.red + "Error: alignment file not generated. Please check your MAFFT installation." + color.done) return alignment_file
def executeMafft(mafft_exe, directory='', gap_penalty=10.0): import os, sys from Bio.Align.Applications import MafftCommandline if len(directory) > 0 and directory[-1] != '/': directory += '/' if len(mafft_exe) == 0: sys.stderr.write('Install mafft before execution.') sys.exit(-1) after = directory + 'aligned_contigs/' if not os.path.exists(after): os.mkdir(after) seq_dir = directory + 'sequences/' seqfiles = os.listdir(seq_dir) for seqfile in seqfiles: if seqfile[-6:] == '.fasta': sequences = {} seq_ids = [] for line in open(seq_dir + seqfile, 'r'): if line[0] == '>': seq_ids.append(line.strip()[1:]) else: sequences.setdefault(seq_ids[-1], '') sequences[seq_ids[-1]] += line.strip() transcript = seqfile[:seqfile.find('.')] mafft_cline = MafftCommandline(mafft_exe, input=seq_dir + seqfile) mafft_cline.set_parameter('--op', gap_penalty) writefile = open(after + transcript + '_aligned.fasta', 'w') stdout, stderr = mafft_cline() writefile.write(stdout) writefile.close()
def align_cluster(self, cluster_file): """ Worker fuction for align_clusters Inputs a FASTA file containing an unaligned sequence cluster. Uses MAFFT to align the cluster. """ mafft_cline = MafftCommandline(input=cluster_file) mafft_cline.set_parameter("--auto", True) mafft_cline.set_parameter("--adjustdirection", True) color = Color() print(color.red + str(mafft_cline) + color.done) sys.stdout.flush() if cluster_file.find("/") != -1: alignment_file = "alignments" + cluster_file[cluster_file.index("/"):] else: alignment_file = "alignments/" + cluster_file stdout, stderr = mafft_cline() with open(alignment_file, "w") as handle: handle.write(stdout) return alignment_file
def test_Mafft_with_options(self): """Simple round-trip through app with infile and options, result passed to stdout.""" cmdline = MafftCommandline(mafft_exe) cmdline.set_parameter("input", self.infile1) cmdline.set_parameter("maxiterate", 100) cmdline.set_parameter("--localpair", True) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdoutdata, stderrdata = cmdline() self.assertTrue(stdoutdata.startswith(">gi|1348912|gb|G26680|G26680")) self.assertNotIn("$#=0", stderrdata)
def test_Mafft_with_options(self): """Simple round-trip through app with infile and options. Result passed to stdout. """ cmdline = MafftCommandline(mafft_exe) cmdline.set_parameter("input", self.infile1) cmdline.set_parameter("maxiterate", 100) cmdline.set_parameter("--localpair", True) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(result.return_code, 0) self.assert_(stdout.read().startswith(">gi|1348912|gb|G26680|G26680")) self.assert_("$#=0" not in stderr.read()) self.assertEqual(str(result._cl), mafft_exe \ + " --localpair --maxiterate 100 Fasta/f002")
def test_Mafft_with_options(self): """Simple round-trip through app with infile and options. Result passed to stdout. """ cmdline = MafftCommandline(mafft_exe) cmdline.set_parameter("input", self.infile1) cmdline.set_parameter("maxiterate", 100) cmdline.set_parameter("--localpair", True) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32")) return_code = child.wait() self.assertEqual(return_code, 0) self.assert_(child.stdout.read().startswith(">gi|1348912|gb|G26680|G26680")) self.assert_("$#=0" not in child.stderr.read()) del child
def test_Mafft_with_options(self): """Simple round-trip through app with infile and options. Result passed to stdout. """ cmdline = MafftCommandline(mafft_exe) cmdline.set_parameter("input", self.infile1) cmdline.set_parameter("maxiterate", 100) cmdline.set_parameter("--localpair", True) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform!="win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertEqual(return_code, 0, "Got error code %i back from:\n%s" % (return_code, cmdline)) self.assertTrue(stdoutdata.startswith(">gi|1348912|gb|G26680|G26680")) self.assertTrue("$#=0" not in stderrdata) del child
handle.close() sleep(0.02) SeqIO.write(atpA_records, "atpA_unaligned.fasta", "fasta") for accession in rbcL_accessions: if accession.strip() != '': handle = Entrez.efetch(db='nucleotide', rettype='fasta', retmode='text', id=accession) rbcL_records.append(SeqIO.read(handle, 'fasta')) handle.close() sleep(0.02) SeqIO.write(rbcL_records, "rbcL_unaligned.fasta", "fasta") print("Aligning atpA with MAFFT...") mafft_cline = MafftCommandline(input="atpA_unaligned.fasta") mafft_cline.set_parameter("--auto", True) mafft_cline.set_parameter("--adjustdirection", True) print(str(mafft_cline)) stdout, stderr = mafft_cline() print("Writing atpA alignment to FASTA file...") with open("atpA_aligned.fasta", "w") as handle: handle.write(stdout) print("Aligning rbcL with MAFFT...") mafft_cline = MafftCommandline(input="rbcL_unaligned.fasta") mafft_cline.set_parameter("--auto", True) mafft_cline.set_parameter("--adjustdirection", True) print(str(mafft_cline)) stdout, stderr = mafft_cline()
def test_Mafft_with_complex_command_line(self): """Round-trip with complex command line.""" cmdline = MafftCommandline(mafft_exe) cmdline.set_parameter("input", self.infile1) cmdline.set_parameter("--localpair", True) cmdline.set_parameter("--weighti", 4.2) cmdline.set_parameter("retree", 5) cmdline.set_parameter("maxiterate", 200) cmdline.set_parameter("--nofft", True) cmdline.set_parameter("op", 2.04) cmdline.set_parameter("--ep", 0.51) cmdline.set_parameter("--lop", 0.233) cmdline.set_parameter("lep", 0.2) cmdline.set_parameter("--reorder", True) cmdline.set_parameter("--treeout", True) cmdline.set_parameter("nuc", True) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(result.return_code, 0) self.assert_(stdout.read().startswith(">gi|1348912|gb|G26680|G26680")) self.assert_("$#=0" not in stderr.read()) self.assertEqual(str(result._cl), mafft_exe \ + " --localpair --weighti 4.2 --retree 5 " \ + "--maxiterate 200 --nofft --op 2.04 --ep 0.51" \ + " --lop 0.233 --lep 0.2 --reorder --treeout" \ + " --nuc Fasta/f002")
def main(): # parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument("--email", "-e", help="Email address for NCBI database searches.") parser.add_argument("--genes", "-g", help="Text file that contains a list of all gene names.") parser.add_argument( "--max_seq_length", "-m", help="Optional. Sets the maximum sequence length to include. Use this to exclude genomes.", ) parser.add_argument( "--species", "-s", help="Text file that contains a list of all species binomials and their synonyms." ) parser.add_argument( "--taxids", "-t", help="Optional. Text file that contains a list of all taxids. Use this to avoid repeating the NCBI taxid lookups.", ) args = parser.parse_args() print("\n\nmatrix_maker.py\n\n") if not args.email: print( "NCBI requires an email address for database searches. Please use the --email flag to specify an email address.\n" ) sys.exit(0) else: email = args.email if not args.species or not os.path.isfile(args.species): print("Please specify a valid list of taxa to search for.\n") sys.exit(0) if args.max_seq_length: max_seq_length = int(args.max_seq_length) else: max_seq_length = -1 genes = [] if not args.genes or not os.path.isfile(args.genes): print("Please specify a valid list of genes to search for.\n") sys.exit(0) else: # read in gene names.... # format of file: # gene_name,include,rbcL,RBCL # gene_name,exclude,RRRBCL with open(args.genes, "rb") as csvfile: genereader = csv.reader(csvfile, delimiter=",") for row in genereader: if row[1] == "include": gene = Gene(row[0]) for i in range(2, len(row)): if row[i] != "": gene.gene_names.append(row[i]) genes.append(gene) if row[1] == "exclude": for gene in genes: if gene.name == row[0]: for i in range(2, len(row)): if row[i] != "": gene.exclusions.append(row[i]) # list of all taxon objects taxa = [] # check for taxid print("Checking for taxids csv file...") if args.taxids and os.path.isfile(args.taxids): with open(args.taxids, "rb") as csvfile: print("Found taxids csv file, reading taxids...\n") taxidsreader = csv.reader(csvfile, delimiter=",") for row in taxidsreader: taxa.append(Taxon(row[0], row[1])) else: print("No taxids csv file found.\n") # open species list file, get synonyms and any missing taxids with open(args.species, "rb") as csvfile: print("Checking list of species, getting missing taxids from NCBI...") taxids_file = open("taxids.csv", "w") namesreader = csv.reader(csvfile, delimiter=",") i = 1 num_lines = sum(1 for line in open(args.species)) for row in namesreader: # update status percent = str(round(100 * i / float(num_lines), 2)) sys.stdout.write("\r" + "Completed: " + str(i) + "/" + str(num_lines) + " (" + percent + "%)") sys.stdout.flush() i += 1 # check to see if we already have a taxid for this species found = False for taxon in taxa: if taxon.binomial == row[0]: found = True taxids_file.write(taxon.binomial + "," + taxon.taxid + "\n") # add synonyms for j in range(1, len(row)): taxon.synonyms.append(row[j]) break if not found: # get the taxid from NCBI taxon = Taxon(row[0]) taxon.get_taxid(email) # dont overload genbank time.sleep(0.1) taxids_file.write(taxon.binomial + "," + taxon.taxid + "\n") # add synonyms for j in range(1, len(row)): taxon.synonyms.append(row[j]) taxa.append(taxon) taxids_file.close() print("\nWriting all taxids to file taxids.csv...") print("\nDownloading sequences from NCBI...") for gene in genes: print("\nSearching for gene: " + gene.name) i = 1 for taxon in taxa: # update status percent = str(round(100 * i / float(len(taxa)), 2)) sys.stdout.write("\r" + "Completed: " + str(i) + "/" + str(num_lines) + " (" + percent + "%)") sys.stdout.flush() i += 1 if taxon.taxid != "not found": taxon.get_sequences(email, gene) # dont overload genbank time.sleep(0.2) print("\nGenerating unaligned FASTA file...") unaligned_file = open(gene.name + ".fasta", "w") for taxon in taxa: record = taxon.get_longest_seq(gene.name, max_seq_length) if record != None: # output format: >binomial_accession_description description = taxon.binomial + "_" + record.id + "_" + record.description description = description.replace(" ", "_") unaligned_file.write(">" + description + "\n") unaligned_file.write(str(record.seq) + "\n\n") unaligned_file.close() print("Making alignment with MAFFT...") try: from Bio.Align.Applications import MafftCommandline mafft_cline = MafftCommandline(input=gene.name + ".fasta") mafft_cline.set_parameter("--auto", True) mafft_cline.set_parameter("--adjustdirection", True) print(str(mafft_cline)) stdout, stderr = mafft_cline() print("Writing alignment to FASTA file...") with open("aligned_" + gene.name + ".fasta", "w") as handle: handle.write(stdout) except: print("Problem finding MAFFT, alignment skipped.") print("\nGenerating summary results spreadsheet...\n") summary = open("result.csv", "w") header = "taxon," for gene in genes: header += gene.name + "," summary.write(header + "\n") for taxon in taxa: accessions = taxon.binomial + "," for gene in genes: # each column will be the longest sequences accession record = taxon.get_longest_seq(gene.name, max_seq_length) if record != None: accessions += record.id + "," else: accessions += "," summary.write(accessions + "\n") summary.close() print("Done!\n")
def main(): print("\n\nmatrix_maker.py\n\n") print("Getting all taxid...\n") print("Writing taxids to file taxids.txt...\n") taxids_file = open("taxids.txt", "w") name_file = open(taxa_file) names = name_file.readlines() taxids = [] import time for name in names: name = "%s" %(name.split()[0]) taxid = get_taxon_id(name) name_taxid_text = name + "\t" + taxid print(name_taxid_text) taxids_file.write(name_taxid_text + "\n") taxids.append( taxid ) # dont overload genbank time.sleep(0.1) taxids_file.close() print("\nDownloading sequences for each taxid...\n") #Keeping the longest sequence for each taxon...\n") from Bio import Entrez from Bio import SeqIO final_records = [] for taxid in taxids: if taxid != "not found": records = get_sequences(taxid) # keep all records final_records = final_records + records # dont overload genbank time.sleep(0.2) # find the longest sequence #longest_len = 0 #longest_seq = None #for record in records: # if len(record) > longest_len: # longest_len = len(record) # longest_seq = record #if longest_seq != None: # final_records.append(longest_seq) print("\nGenerating unaligned FASTA file with GenBank formatted description...\n") SeqIO.write(final_records, "output_unaligned_gb_format.fasta", "fasta") print("Generating unaligned FASTA file with custom formatted description...\n") unaligned_file = open("output_unaligned_custom_format.fasta", "w") for record in final_records: # remove the organism name from the description description = record.description if description.find(record.annotations["organism"] + " ") != -1: description = description.replace(record.annotations["organism"] + " ", "") # custom format for Andrew: >Organism name_accession_description description = record.annotations["organism"] + "_" + record.id + "_" + description description = description.replace(" ", "_") unaligned_file.write(">" + description + "\n") unaligned_file.write(str(record.seq) + "\n") unaligned_file.close() print("Making alignment with MAFFT...") try: from Bio.Align.Applications import MafftCommandline mafft_cline = MafftCommandline(input="output_unaligned_custom_format.fasta") mafft_cline.set_parameter("--auto", True) mafft_cline.set_parameter("--adjustdirection", True) print(str(mafft_cline)) stdout, stderr = mafft_cline() print("Writing alignment to FASTA file...\n") with open("output_aligned.fasta", "w") as handle: handle.write(stdout) except: print("Problem finding MAFFT, alignment skipped.") print("Done!\n")
def test_Mafft_with_complex_command_line(self): """Round-trip with complex command line.""" cmdline = MafftCommandline(mafft_exe) cmdline.set_parameter("input", self.infile1) cmdline.set_parameter("--localpair", True) cmdline.set_parameter("--weighti", 4.2) cmdline.set_parameter("retree", 5) cmdline.set_parameter("maxiterate", 200) cmdline.set_parameter("--nofft", True) cmdline.set_parameter("op", 2.04) cmdline.set_parameter("--ep", 0.51) cmdline.set_parameter("--lop", 0.233) cmdline.set_parameter("lep", 0.2) cmdline.set_parameter("--reorder", True) cmdline.set_parameter("--treeout", True) cmdline.set_parameter("nuc", True) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) self.assertEqual(str(cmdline), mafft_exe \ + " --localpair --weighti 4.2 --retree 5 " \ + "--maxiterate 200 --nofft --op 2.04 --ep 0.51" \ + " --lop 0.233 --lep 0.2 --reorder --treeout" \ + " --nuc Fasta/f002") child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform!="win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertEqual(return_code, 0, "Got error code %i back from:\n%s" % (return_code, cmdline)) self.assertTrue(stdoutdata.startswith(">gi|1348912|gb|G26680|G26680")) self.assertTrue("$#=0" not in stderrdata) del child
def main(): # parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument("--email", "-e", help="Email address for NCBI database searches.") parser.add_argument( "--genes", "-g", help= "Text file that defines the gene regions of interest using both include and exclude terms." ) parser.add_argument( "--max_seq_length", "-m", help= "Optional. Sets the maximum sequence length to include. Use this to exclude genomes." ) parser.add_argument( "--species", "-s", help= "Text file that contains a list of all species binomials and their synonyms." ) parser.add_argument( "--taxids", "-t", help= "Optional. Text file that contains a list of all taxids. Use this to avoid repeating the NCBI taxid lookups." ) args = parser.parse_args() print("\n\nmatrix_maker.py\n\n") if not args.email: print( "NCBI requires an email address for database searches. Please use the --email flag to specify an email address.\n" ) sys.exit(0) else: email = args.email if not args.species or not os.path.isfile(args.species): print("Please specify a valid list of taxa to search for.\n") sys.exit(0) if args.max_seq_length: max_seq_length = int(args.max_seq_length) else: max_seq_length = -1 genes = [] if not args.genes or not os.path.isfile(args.genes): print("Please specify a valid list of genes to search for.\n") sys.exit(0) else: # read in gene names.... # format of file: # gene_name,include,rbcL,RBCL # gene_name,exclude,RRRBCL with open(args.genes, 'rb') as csvfile: genereader = csv.reader(csvfile, delimiter=",") for row in genereader: if row[1] == "include": gene = Gene(row[0]) for i in range(2, len(row)): if row[i] != "": gene.gene_names.append(row[i]) genes.append(gene) if row[1] == "exclude": for gene in genes: if gene.name == row[0]: for i in range(2, len(row)): if row[i] != "": gene.exclusions.append(row[i]) # list of all taxon objects taxa = [] # check for taxid print("Checking for taxids csv file...") if args.taxids and os.path.isfile(args.taxids): with open(args.taxids, 'rb') as csvfile: print("Found taxids csv file, reading taxids...\n") taxidsreader = csv.reader(csvfile, delimiter=",") for row in taxidsreader: taxa.append(Taxon(row[0], row[1])) else: print("No taxids csv file found.\n") # open species list file, get synonyms and any missing taxids with open(args.species, 'rb') as csvfile: print("Checking list of species, getting missing taxids from NCBI...") taxids_file = open("taxids.csv", "w") namesreader = csv.reader(csvfile, delimiter=",") i = 1 num_lines = sum(1 for line in open(args.species)) for row in namesreader: # update status percent = str(round(100 * i / float(num_lines), 2)) sys.stdout.write('\r' + 'Completed: ' + str(i) + '/' + str(num_lines) + ' (' + percent + '%)') sys.stdout.flush() i += 1 # check to see if we already have a taxid for this species found = False for taxon in taxa: if taxon.binomial == row[0]: found = True taxids_file.write(taxon.binomial + "," + taxon.taxid + "\n") # add synonyms for j in range(1, len(row)): taxon.synonyms.append(row[j]) break if not found: taxon = Taxon(row[0]) # add synonyms for j in range(1, len(row)): taxon.synonyms.append(row[j]) taxa.append(taxon) # get the taxid from NCBI taxon.get_taxid(email) # dont overload genbank time.sleep(0.1) taxids_file.write(taxon.binomial + "," + taxon.taxid + "\n") taxids_file.close() print("\nWriting all taxids to file taxids.csv...") print("\nDownloading sequences from NCBI...") for gene in genes: print("\nSearching for gene: " + gene.name) i = 1 for taxon in taxa: # update status percent = str(round(100 * i / float(len(taxa)), 2)) sys.stdout.write('\r' + 'Completed: ' + str(i) + '/' + str(num_lines) + ' (' + percent + '%)') sys.stdout.flush() i += 1 if taxon.taxid != "not found": taxon.get_sequences(email, gene) # dont overload genbank time.sleep(0.2) print("\nGenerating unaligned FASTA file...") unaligned_file = open(gene.name + ".fasta", "w") for taxon in taxa: record = taxon.get_longest_seq(gene.name, max_seq_length) if type(record) == Bio.SeqRecord.SeqRecord: # output format: >binomial_accession_description description = taxon.binomial + "_" + record.id + "_" + record.description description = description.replace(" ", "_") unaligned_file.write(">" + description + "\n") unaligned_file.write(str(record.seq) + "\n\n") unaligned_file.close() print("Making alignment with MAFFT...") try: from Bio.Align.Applications import MafftCommandline mafft_cline = MafftCommandline(input=gene.name + ".fasta") mafft_cline.set_parameter("--auto", True) mafft_cline.set_parameter("--adjustdirection", True) print(str(mafft_cline)) stdout, stderr = mafft_cline() print("Writing alignment to FASTA file...") with open("aligned_" + gene.name + ".fasta", "w") as handle: handle.write(stdout) except: print("Problem finding MAFFT, alignment skipped.") print("\nGenerating summary results spreadsheet...\n") summary = open("result.csv", "w") header = "taxon," for gene in genes: header += gene.name + "," summary.write(header + "\n") for taxon in taxa: accessions = taxon.binomial + "," for gene in genes: # each column will be the longest sequences accession record = taxon.get_longest_seq(gene.name, max_seq_length) if type(record) == Bio.SeqRecord.SeqRecord: accessions += record.id + "," else: accessions += "," summary.write(accessions + "\n") summary.close() print("Done!\n")
def test_Mafft_with_complex_command_line(self): """Round-trip with complex command line.""" cmdline = MafftCommandline(mafft_exe) cmdline.set_parameter("input", self.infile1) cmdline.set_parameter("--localpair", True) cmdline.set_parameter("--weighti", 4.2) cmdline.set_parameter("retree", 5) cmdline.set_parameter("maxiterate", 200) cmdline.set_parameter("--nofft", True) cmdline.set_parameter("op", 2.04) cmdline.set_parameter("--ep", 0.51) cmdline.set_parameter("--lop", 0.233) cmdline.set_parameter("lep", 0.2) cmdline.set_parameter("--reorder", True) cmdline.set_parameter("--treeout", True) cmdline.set_parameter("nuc", True) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) self.assertEqual(str(cmdline), mafft_exe \ + " --localpair --weighti 4.2 --retree 5 " \ + "--maxiterate 200 --nofft --op 2.04 --ep 0.51" \ + " --lop 0.233 --lep 0.2 --reorder --treeout" \ + " --nuc Fasta/f002") child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32")) return_code = child.wait() self.assertEqual(return_code, 0) self.assert_(child.stdout.read().startswith(">gi|1348912|gb|G26680|G26680")) self.assert_("$#=0" not in child.stderr.read()) del child
def test_Mafft_with_complex_command_line(self): """Round-trip with complex command line.""" cmdline = MafftCommandline(mafft_exe) cmdline.set_parameter("input", self.infile1) cmdline.set_parameter("--localpair", True) cmdline.set_parameter("--weighti", 4.2) cmdline.set_parameter("retree", 5) cmdline.set_parameter("maxiterate", 200) cmdline.set_parameter("--nofft", True) cmdline.set_parameter("op", 2.04) cmdline.set_parameter("--ep", 0.51) cmdline.set_parameter("--lop", 0.233) cmdline.set_parameter("lep", 0.2) cmdline.set_parameter("--reorder", True) cmdline.set_parameter("--treeout", True) cmdline.set_parameter("nuc", True) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) self.assertEqual(str(cmdline), mafft_exe + " --localpair --weighti 4.2 --retree 5 " + "--maxiterate 200 --nofft --op 2.04 --ep 0.51" + " --lop 0.233 --lep 0.2 --reorder --treeout" + " --nuc Fasta/f002") stdoutdata, stderrdata = cmdline() self.assertTrue(stdoutdata.startswith(">gi|1348912|gb|G26680|G26680")) self.assertTrue("$#=0" not in stderrdata)
for j, accession in enumerate(accessions[i]): if accession.strip() != '': handle = Entrez.efetch(db='nucleotide', rettype='fasta', retmode='text', id=accession) record = SeqIO.read(handle, 'fasta') records[i].append( SeqRecord(Seq(str(record.seq), IUPAC.ambiguous_dna), id=taxa[j], description="")) handle.close() sleep(0.02) SeqIO.write(records[i], "sequences_unaligned/" + genes[i] + ".fasta", "fasta") for i, gene in enumerate(genes): print("Aligning " + gene + " with MAFFT...") mafft_cline = MafftCommandline(input="sequences_unaligned/" + genes[i] + ".fasta") mafft_cline.set_parameter("--auto", True) mafft_cline.set_parameter("--adjustdirection", True) print(str(mafft_cline)) stdout, stderr = mafft_cline() print("Writing " + gene + " alignment to FASTA file...") with open("sequences_aligned/" + genes[i] + ".fasta", "w") as handle: handle.write(stdout) print("Done.")