def from_newick(cls, newick_file, config={}, **kwargs): """Read tree from newick file.""" try: import phylopandas as pd df = pd.read_newick(newick_file) data = df.to_dict(orient='records') return cls(data=data, config={}, **kwargs) except ImportError: Exception("DendroPy and Phylopandas must be installed.")
def compute_tree( self, sequence_col='sequence', datatype='aa', bootstrap='-1', model='LG', frequencies='e', **kwargs): """Compute tree.""" fname = "compute_tree.phy" path = os.path.join(self.project_dir, fname) # Write out path self.data.phylo.to_phylip(filename=path) # Prepare options for PhyML. options = { 'input':path, 'datatype':datatype, 'bootstrap':bootstrap, 'model':model, 'frequencies':frequencies, } # Update with any kwargs manually set by users. options.update(**kwargs) # ----- Flexibility here to use different ext apps ----- # Build command line arguments for PhyML. cml = Bio.Phylo.Applications.PhymlCommandline(**options) cml_args = str(cml).split() output = subprocess.run(cml_args) # Get path (catch variations in file extension generated by phyml) outfile = "compute_tree.phy_phyml_tree" outpath = os.path.join(self.project_dir, outfile) # Old versions of phyml have .txt at end. if not os.path.exists(outpath): outpath += '.txt' # ------------------------------------------------------ # Update dataframe tree_data = ph.read_newick(outpath) # Swap ids and uids for leaf nodes tree_data.loc[tree_data.type == 'leaf', 'uid'] = tree_data.id # Add to main dataframe self.data = self.data.phylo.combine(tree_data, on='uid')
def show(self): """Show rough interactive tree. """ # Write a dummy tree tree_path = os.path.join(self.project_dir, 'vega-tree.newick') self.data['tree'].write(path=tree_path, schema='newick') tree_df = phylopandas.read_newick(tree_path) # Create vega tree vegatree = phylovega.VegaTree(tree_df) # Write vega tree. vega_path = os.path.join(self.project_dir, 'vega-tree.vg.json') vegatree.to_json(vega_path) # Display vega tree vegatree.display()
def run(df, project_dir, program="phyml", keep_tmp=False, **kwargs): tree_functions = {"phyml": _phyml} # Figure out which alignment function to use try: tree_function = tree_functions[program] except KeyError: err = "Tree building program '{}' not recognized.\n\n".format(program) err += "Should be one of:\n" programs = list(tree_functions.keys()) programs.sort() for p in programs: err += " {}\n".format(p) raise ValueError(err) input_file = "_" os.chdir(project_dir) ph.seqio.write.to_phylip(df, id_col="uid", filename="_") tree_function(input_file, **kwargs) # Parse phyml output new_tree = ph.read_newick("__phyml_tree.txt") new_tree.loc[new_tree.type == 'leaf', 'uid'] = new_tree.id # Add to main DataFrame output = df.phylo.combine(new_tree, on='uid') if not keep_tmp: os.remove(input_file) os.chdir("..") return output