def ML_tree(infile, outfile, file_type): # Tree creation with maximum-likelihood algorithm (phyML) # input : infile = .fasta alignment file that the user can import or paste, outfile = name of output file, file_type = clustal is the clustal too has been used, fasta if muscle tool has been used # output : .newick file and .png picture to display # phylogeny page should allow to choose maximum likelihood method # convert file to phylip records = SeqIO.parse("static/data/sauvegardes/" + dirName + infile, file_type) # clustal <-> fasta count = SeqIO.write(records, "static/data/sauvegardes/" + dirName + outfile + ".phylip", "phylip") print("Converted %i records" % count) if (user_OS == 'darwin'): cmd = PhymlCommandline(cmd='static/tools/MacOS/PhyML-3.1/PhyML-3.1_macOS-MountainLion', input='static/data/sauvegardes/' + dirName + outfile + '.phylip') if (user_OS == 'linux'): cmd = PhymlCommandline(cmd='static/tools/Linux/PhyML-3.1/PhyML-3.1_linux64', input='static/data/sauvegardes/' + dirName + outfile + '.phylip') if (user_OS == 'win32'): cmd = PhymlCommandline(cmd= current_path + '/static/tools/Windows/PhyML-3.1/PhyML-3.1_win32.exe', input='static/data/sauvegardes/' + dirName + outfile + '.phylip') out_log, err_log = cmd() tree = Phylo.read('static/data/sauvegardes/' + dirName + outfile + '.phylip_phyml_tree.txt', 'newick') Phylo.draw(tree, do_show=False) Phylo.write(tree, 'static/data/sauvegardes/' + dirName + 'tree.txt', "newick") foo = current_path + '/static/data/sauvegardes/' + dirName + 'tree.png' plt.savefig(foo)
def ml(self, data_type, input_file): self.converter(input_file, data_type) base = os.path.splitext(input_file)[0] data_file = base + '.phylip' if data_type == 'DNA': phyml_cline = PhymlCommandline(input=data_file) else: phyml_cline = PhymlCommandline(input=data_file, datatype='aa') stdout, stderr = phyml_cline() os.rename(base + '.phylip_phyml_tree', base + '_ml.nw') # tree file is generated stat_file = base + '.phylip_phyml_stats' os.remove(data_file) os.remove(stat_file)
def test_phyml(self): """Run PhyML using the wrapper.""" cmd = PhymlCommandline(phyml_exe, input=EX_PHYLIP, datatype="aa") # Smoke test try: out, err = cmd() self.assertTrue(len(out) > 0) self.assertEqual(len(err), 0) # Check the output tree outfname = EX_PHYLIP + "_phyml_tree.txt" if not os.path.isfile(outfname): # NB: Briefly, PhyML dropped the .txt suffix (#919) outfname = outfname[:-4] tree = Phylo.read(outfname, "newick") self.assertEqual(tree.count_terminals(), 4) except Exception as exc: self.fail("PhyML wrapper error: %s" % exc) finally: # Clean up generated files for suffix in [ "_phyml_tree.txt", "_phyml_tree", "_phyml_stats.txt", "_phyml_stats" ]: fname = EX_PHYLIP + suffix if os.path.isfile(fname): os.remove(fname)
def test_phyml(self): """Run PhyML using the wrapper.""" # Stabilize phyml tests by running in single threaded mode by default. # Note: PHYMLCPUS environment is specific to Debian and derivatives. if not os.getenv("PHYMLCPUS"): os.putenv("PHYMLCPUS", "1") cmd = PhymlCommandline(phyml_exe, input=EX_PHYLIP, datatype="aa") # Smoke test try: out, err = cmd() self.assertGreater(len(out), 0) self.assertEqual(len(err), 0) # Check the output tree outfname = EX_PHYLIP + "_phyml_tree.txt" if not os.path.isfile(outfname): # NB: Briefly, PhyML dropped the .txt suffix (#919) outfname = outfname[:-4] tree = Phylo.read(outfname, "newick") self.assertEqual(tree.count_terminals(), 4) except Exception as exc: self.fail(f"PhyML wrapper error: {exc}") finally: # Clean up generated files for suffix in [ "_phyml_tree.txt", "_phyml_tree", "_phyml_stats.txt", "_phyml_stats", ]: fname = EX_PHYLIP + suffix if os.path.isfile(fname): os.remove(fname)
def compute_tree(self, sequencing_data, app='FastTree'): tmp = mkdtemp() phylip_path = sequencing_data.to_phylip(f'{tmp}/sequences.phylip') if app == 'phyml' and len(sequencing_data) < 2000: phyml_cmd = PhymlCommandline(input=phylip_path) phyml_cmd() self.tree_path = f'{self.outdir}/sequences_phyml_tree.txt' elif app == 'FastTree': if app == 'phyml': print('Fallback to FastTree (too many OTUs)') self.tree_path = f'{self.outdir}/tree.nwk' with open(self.tree_path, 'w') as file_handle: proc = subprocess.Popen(['FastTree', '-nt', phylip_path], stdout=file_handle) proc.wait() else: sys.exit('Unknown application {}'.format(app)) self.root_tree()
def construc_tree(file_name_with_path, file_name, dict): phyml = PhymlCommandline(input=file_name_with_path + '.phy') phyml() tree = Phylo.read(file_name_with_path + ".phy_phyml_tree.txt", "newick") for leaf in tree.get_terminals(): leaf.name = dict[leaf.name] Phylo.write(tree, file_name_with_path + ".phy_phyml_tree.txt", "newick") return 0
def create_gene_tree(msa_phy, bootstrap): 'Runs PhyML to make gene trees' print("Creating ML gene tree...") cmdline = PhymlCommandline("phyml", input=msa_phy, datatype='aa', bootstrap=bootstrap) out_log, err_log = cmdline()
def ml_tree(aln, name): from Bio.Phylo.Applications import PhymlCommandline AlignIO.write(aln, '%s.phy' %name, 'phylip-relaxed') cmdline = PhymlCommandline(input='%s.phy' %name, datatype='nt', alpha='e', bootstrap=100) print (cmdline) cmdline() mtree = Phylo.read('%s.phy_phyml_tree.txt' %name,'newick') return mtree
def _runphyml(self): """"Run phyml. Input a phylip formatted alignment file and describe the datatype ('nt' or 'aa'). """ run_phyml = PhymlCommandline(self.phyml_exe, input=self.phyml_input, datatype=self.datatype) out_log, err_log = run_phyml() self.phyml_log(out_log) self.phyml_log(err_log)
def make_core_tree(path_to_seq, jmodel_gamma, jmodel_model, jmodel_pinv): handle2 = AlignIO.read(open(path_to_seq), "fasta") #name! h = open(path_to_seq + "_align_phylip", "w") #name! h.write(handle2.format("phylip")) h.close() phyml_exe = "/afs/andrew.cmu.edu/usr23/lleung/phyml/phyml" cmdline = PhymlCommandline(phyml_exe, input=path_to_seq + "_align_phylip", alpha=jmodel_gamma, prop_invar=jmodel_pinv, model=jmodel_model) if jmodel_pinv == 0.0: if jmodel_gamma == 0.0: cmdline = PhymlCommandline(phyml_exe, input=path_to_seq + "_align_phylip", model=jmodel_model) else: cmdline = PhymlCommandline(phyml_exe, input=path_to_seq + "_align_phylip", alpha=jmodel_gamma, model=jmodel_model) #change name of alignment file and the model stdo, stder = cmdline()
def runphyml(gene): """ Run phyml to generate tree results. """ # Use the phyml executable file phyml_exe = None # This is mainly intended for windows use or use with an executable file exe_name = "PhyML-3.1_win32.exe" if sys.platform == "win32" else "phyml" phyml_exe = exe_name # Create the command & run phyml # Input a phylip formatted alignment file and describe the datatype ('nt' or 'aa') run_phyml = PhymlCommandline(phyml_exe, input=gene + '_aligned.phy', datatype='nt') print(run_phyml) out_log, err_log = run_phyml()
def test_phyml(self): """Run PhyML using the wrapper.""" cmd = PhymlCommandline(phyml_exe, input=EX_PHYLIP, datatype='aa') # Smoke test try: out, err = cmd() self.assertTrue(len(out) > 0) self.assertEqual(len(err), 0) # Check the output tree tree = Phylo.read(EX_PHYLIP + '_phyml_tree.txt', 'newick') self.assertEqual(tree.count_terminals(), 4) finally: # Clean up generated files for suffix in ['_phyml_tree.txt', '_phyml_stats.txt']: fname = EX_PHYLIP + suffix if os.path.isfile(fname): os.remove(fname)
def build_ml_phyml(alignment, outfile, work_dir=".", **kwargs): """ build maximum likelihood tree of DNA seqs with PhyML """ phy_file = op.join(work_dir, "work", "aln.phy") AlignIO.write(alignment, file(phy_file, "w"), "phylip-relaxed") phyml_cl = PhymlCommandline(cmd=PHYML_BIN("phyml"), input=phy_file, **kwargs) logging.debug("Building ML tree using PhyML: %s" % phyml_cl) stdout, stderr = phyml_cl() tree_file = phy_file + "_phyml_tree.txt" if not op.exists(tree_file): print("***PhyML failed.", file=sys.stderr) return None sh("cp {0} {1}".format(tree_file, outfile), log=False) logging.debug("ML tree printed to %s" % outfile) return outfile, phy_file
def phylo(run): # Draw phylogenic tree from BLAST result_handle = open("blast%s.xml" % run) blast_record = NCBIXML.read(result_handle) print("\tCollecting Sequences") # Extract sequences from BLAST result def get_seqs(source): for aln in source: for hsp in aln.hsps: yield SeqRecord(Seq(hsp.sbjct), id=aln.accession) break seqs = get_seqs(blast_record.alignments,) SeqIO.write(seqs, 'Phylo/family.fasta', 'fasta') print("\tAligning Sequences") cmdline = MuscleCommandline(input="Phylo/family.fasta", out="Phylo/family.aln", clw=True) cmdline() AlignIO.convert("Phylo/family.aln", "clustal", "Phylo/family.phy", "phylip-relaxed") print("\tGenerating Tree...") cmdline = PhymlCommandline(input="Phylo/family.phy") out_log, err_log = cmdline() print("\tDrawing Tree") tree = Phylo.read("Phylo/family.phy_phyml_tree.txt", "newick") Phylo.draw_ascii(tree) return
print ("clustalw output:\n %s"%(clustaltextout)) # read in the alignment file and create a MultipleSeqAlignment object clustalalignment = AlignIO.read("p53_homologous.aln", "clustal") #write the alignment to a format that can be read by PhyML alignout_filename = "p53_homologous.out" AlignIO.write(clustalalignment,alignout_filename,"phylip-relaxed") print("Making tree (takes around 75 seconds): ") # specify the location of the phyml executable (this depends on your machine) phyml_exe_path = r"D:\SCHOOL\fall 2020\Biological Models in Python\Week 7\PhyML-3.1_win32.exe" #optional check to see if that path exists assert os.path.isfile(phyml_exe_path), "PhyML executable missing" # create an instance of a Bio.AlignApplication that can be called like a function and runs phyml phymlcmd = PhymlCommandline(cmd=phyml_exe_path,input=alignout_filename) phymltextout,phymltexterr = phymlcmd() print ("PhyML output:\n%s"%(phymltextout)) tree_filename = alignout_filename + "_phyml_tree.txt" tree = Phylo.read(tree_filename, "newick") print("Ascii tree:\n") Phylo.draw_ascii(tree)
print("The nucleotide file has been converted to relaxed-phylip format.") # Convert the amino acid file print("Convert the amino acid fasta aligned file to relaxed-phylip format.") AlignIO.convert(Gene[0] + "_cds_aa_aligned.fasta", "fasta", Gene[0] + "_cds_aa_aligned.phy", "phylip-relaxed") os.system("cp " + Gene[0] + "_cds_aa_aligned.phy " + ) print("The amino acid file has been converted to relaxed-phylip format.") # Directory change to output directory os.chdir(h) # ----------------------------------------------------------------------------- # Create the command & run phyml # Input a phylip formatted alignment file and describe the datatype ('nt' or 'aa') # Use the nucleotide aligned file with PhyML run_phyml_nucl = PhymlCommandline(input=f + '/' + Gene[0] + '_cds_nucl_aligned.phy', datatype='nt') print("\n" + "The following PhyML command is being run: "), print(run_phyml_nucl) out_log_nucl, err_log_nucl = run_phyml_nucl() print(out_log_nucl, err_log_nucl) # ----------------------------------------------------------------------------- # Use the amino acid aligned file with PhyML run_phyml_aa = PhymlCommandline(input=f + '/' + Gene[0] + '_cds_aa_aligned.phy', datatype='aa') print("\n" + "The following PhyML command is being run: "), print(run_phyml_aa) out_log_aa, err_log_aa = run_phyml_aa() print(out_log_aa, err_log_aa) print("PhyML tree files and stats have been created." + "\n") # ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
os.chdir(test_out) # Convert the file to relaxed-phylip format print("Convert the file to relaxed-phylip format.") AlignIO.convert(Gene[0] + "_cds_aligned.fasta", "fasta", Gene[0] + "_aligned.phy", "phylip-relaxed") y = r'C:\Users\shutchins2\Desktop\"In Progress"\Code\GBK2TREE\"Alignments & Trees"\"Alignment Scripts and Files"' + "\\" os.system("move " + Gene[0] + "_aligned.phy " + y) print("The file has been converted to relaxed-phylip format.") os.chdir(home) # Create the command & run phyml # Input a phylip formatted alignment file and describe the datatype ('nt' or 'aa') run_phyml = PhymlCommandline(phyml_exe, input=Gene[0] + '_aligned.phy', datatype='nt') print("\n" + "The following PhyML command is being run: "), print(run_phyml) out_log, err_log = run_phyml() #tree = Phylo.read('HTR1D_aligned.phy', 'newick') #Phylo.draw_ascii(tree) # Move phyml output files to appropriate directory os.system("move " + Gene[0] + "_aligned._phyml_tree.txt " + x) os.system("move " + Gene[0] + "_aligned._phyml_stats.txt " + x) os.system("move " + Gene[0] + "_aligned.phy " + x) print("PhyML tree files and stats have been created." + "\n") #------------------------------------------------------------------------------
aln = AlignIO.read('Hemoglobina_speciesnames.aln', 'clustal') tree = Phylo.read('Hemoglobina_speciesnames.dnd', 'newick') # ### Build tree using Maximum Likelihoog (ML) # In[6]: from Bio.Phylo.TreeConstruction import * from Bio import AlignIO from Bio.Phylo.Applications import PhymlCommandline AlignIO.convert("Hemoglobina_speciesnames.aln", "clustal", "Hemoglobina_speciesnames.phy", "phylip-relaxed") phyml_cl = PhymlCommandline(input='Hemoglobina_speciesnames.phy', datatype='aa', alpha='e', bootstrap='120') out_log, err_log = phyml_cl() # In[7]: mltree = Phylo.read('Hemoglobina_speciesnames.phy_phyml_tree.txt', 'newick') Phylo.draw_ascii(mltree) # ### Build tree using Neighbour Joining (NJ) # In[8]: aln = AlignIO.read('Hemoglobina_speciesnames.aln', 'clustal') constructor = DistanceTreeConstructor()
tot = 0 for ide, count in motifs: tot += count print('Found ' + str(tot) + ' occurences of "' + motif + '" in ' + upper) print(len(motifs)) break #print (motifs) quit() call( 'clustalo -i ../scripts/output/unaligned.fa -o phyAlign.clu --force --outfmt=clu', shell=True) AlignIO.convert('phyAlign.clu', 'clustal', 'phyAlign.phy', 'phylip-relaxed') cmdline = PhymlCommandline(input='phyAlign.phy', alpha='e', bootstrap=1, sequential=False) call(str(cmdline), shell=True) my_tree = Phylo.read("phyAlign.phy_phyml_tree.txt", "newick") Phylo.draw(my_tree, show_confidence=False) quit() seqs = [ 'MANNNSDRQGLEPRVIRTLGSQALSGPSISNRTSSSE', 'ANPHFSKNVKEAMIKTASPTPLSTPIYRIAQACDRCRSKKTRCDGKRPQCSQCAAVGFECRISDKLLRKAYPKGYTESLEERVRELEAENKRLLALCDIKEQQISLVSQSRPQTSTDNTINGNFKHDLKDAPLNLSSTNIYLLNQTVNKQLQNGKMDGDNSGSAMSPLGAPPPPPHKDHLCDGVSCTNHLHVKPTSTSLNDPTAISFEQDEAPGLPAVKALKSMTTHQRSTQLATLVSLSIPRSTEEILFIPQLLTRIRQIFGFNSKQCLYTVSLLSSLKNRLPAPRLLAPSTSTKLKEKDEDK', 'KLDDDSAFVKRFQSTNLSEFVDLKKFLISLKFNINSFSKQSEKPANDQDDELLSLTEIKELLHLFFKFWSNQVPILNNDHFLIYFNNFVEVVKHLSTENLETNNTTKSTVTTNHEIFALKLLMMLQMGLLVKIKMEKIKYTVPKNPKAKYARLMAYYHQLSLIIPKNPYFLNMSTTSLPSLQLLSLASFYYLNVGDISAIYGVRGRIVSMAQQLRLHRCPSAVLSVHSNPVLQKFEQSERRLLFWAIYYVDVFASLQLGVPRLLKDFDIECALPISDVEYKDQLSMENEKADKKAKKIQLQGQVSSFSLQIIRFAKILGNILDSIFKRGMMDERITSEVALVHENALDNWRNQLPEMYYFQITVNGTVNLDEIRATNQRNTETKFDKKDIILFEKKILLLFYFLAKSMIHLPVIATKPLPKNVDNATKKKQSMFNNDSKGATNQDHMILDVDMTSPAIRTSSSYIILQQATNATLTIFQAINSMYLPLPLNVSRTLIRFSLLCARGSLEYTKGGALFLDNKNLLLDTIKDIENDRLLDLPGIASWHTLKLFDMSINLLLKAPNVKVERLDKFLEKKLNYYNRLMGLPPATTTSLKPLFGSQSKNSLENRQRTPNVKRENPEHEYLYGNDSNNNNNSEAGHSPMTNTTNGNK' ] #r = requests.post('https://www.ncbi.nlm.nih.gov/Structure/bwrpsb/bwrpsb.cgi?', data=(('db','cdd'), ('queries',seq), ('tdata','hits')))
assert os.path.isfile(clustalw_exe), "Clustal W executable missing" stdout, stderr = cline() # %% convert alignment file from fasta from muscle in Poseidon to phyli-relaxed msaFile = "/muscleAlignmentCoralSeq.msa" outFile = "/muscleAlignmentCoralSeq.phy" import os path = os.getcwd() #path = "/Users/kgrabb/Documents/2018.05CoralLarvae/Genomes/Poseidon/blastResults/v2" print(path) print(path + msaFile) inputFile = path + msaFile outputFile = path + outFile viewFile = pd.read_csv(inputFile) print(viewFile.head(5)) AlignIO.convert(inputFile, "fasta", outputFile, "phylip-relaxed") # %% use PhyML. feed in phy alignment with the command line wrapper from Bio.Phylo.Applications import PhymlCommandline cmdline = PhymlCommandline(input=outputFile, datatype="aa", model="WAG", alpha="e", bootstrap=100) out_log, err_log = cmdline() # %%
# this command writes a file in a fromat suitable for Phyml (software for molecular phylogeny) AlignIO.write(alignment[:, 60:300], "phyml.phy", "phylip-relaxed") # Some general alignment analysis tools are available in module Bio.Align as AlignInfo summary_align = AlignInfo.SummaryInfo(alignment) consensus = summary_align.gap_consensus() print(consensus) # # if we have phyml locally, we can run it with this code # otherwise, we can run it online http://atgc.lirmm.fr/phyml from Bio.Phylo.Applications import PhymlCommandline cmdline = PhymlCommandline(input='phyml.phy', datatype='aa', model='LG', alpha='e', bootstrap=100) # module for delaing phylogenetic trees from Bio import Phylo # loading file into an object “tree” tree = Phylo.read("./data/phyml_phy_phyml_tree.txt", "newick") # text visualization Phylo.draw_ascii(tree) # # matplotlib visualization # get_ipython().magic('matplotlib notebook') Phylo.draw(tree) plt.show(block=True)
def build_tree(msa_file, original, file_name="tree", bootstrap=10): """ Build a phylogenetic tree based on a multiple sequence alignment. http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc217 PhyML site: http://www.atgc-montpellier.fr/ "New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing the Performance of PhyML 3.0." Guindon S., Dufayard J.F., Lefort V., Anisimova M., Hordijk W., Gascuel O. Systematic Biology, 59(3):307-21, 2010. Color based on novelty """ # See https://biopython.org/wiki/Phylo for general code and settings # Convert to phyllip format tree_file = file_name # for convenience AlignIO.convert(msa_file, "clustal", tree_file, "phylip-relaxed", alphabet=IUPAC.protein) # NOTE: make sure file name is "phyml" PhyML = PhymlCommandline("./PhyML") PhyML.input = tree_file PhyML.datatype = 'aa' # Specify that amino acids are being input PhyML.model = 'LG' # Amino acid substitution matrix PhyML.alpha = 'e' # non-parametric bootstrap relplicates; 100 is point of dimiishing returns PhyML.bootstrap = bootstrap # Run tree generation, print success/failure print("Building distance tree from multiple sequence alignment...\n") stdout, stderr = PhyML() print(stdout + stderr) print(f"Newick tree saved as {tree_file + '_phyml_tree.txt'}") # Read in tree file, convert to XML (to be able to add color, etc.) tree = Phylo.read(tree_file + "_phyml_tree.txt", "newick") tree = tree.as_phyloxml() # Stylize the tree # Colorblind-safe colors can be checked with ColorOracle: http://colororacle.org/ for clade in tree.find_clades(): # Bold lines clade.width = 3 # Red if known gene or false positive if str(clade.name).startswith("gi|"): clade.color = "#e4002b" # Blue for originally searched gene elif clade.name == original.id: clade.color = "#006db6" # Black for comparitor nodes elif clade.name is not None and not clade.color: clade.color = "#000000" # Gray for non-terminal nodes elif not clade.name: clade.color = "#63666a" # Green for novel genes if str(clade.name).endswith("***"): clade.color = "#00bf71" # Configure plot. Image size determined based on number of nodes tree_len = len(tree.get_terminals()) plt.rc("font", size=18) # Bigger font for easier reading fig = plt.figure(figsize=(1.6 * tree_len, tree_len), dpi=300) axes = fig.add_subplot(1, 1, 1) Phylo.draw(tree, axes=axes, do_show=False) # Save white background image fig.savefig(f"{tree_file}.png", format='png', bbox_inches='tight', dpi=300) # Save transparent image fig.savefig(f"{tree_file}_transparent.png", format='png', bbox_inches='tight', dpi=300, transparent=True) print(f"Tree images saved as {tree_file + '.png'} " f"and {tree_file + '_transparent.png'} to {os.getcwd()}\n")