def run(self, pamlsrc, output_folder, model='M1'): """Run PAML using ETE. The default model is M1 as it is best for orthology inference in our case. You can use models `M2`, `M0`, `M3`. Ensure that you have the correct path to your codeml binary. It should be in the paml `/bin`. :param pamlsrc: Path to the codemly binary. :param output_folder: The name of the output folder. :param model: The model to be used. (Default value = 'M1') """ # Import the newick tree tree = EvolTree('temptree.nw') # Import the alignment tree.link_to_alignment(self.alignmentfile) tree.workdir = self.workdir # Set the binpath of the codeml binary tree.execpath = pamlsrc # Run the model M1, M2, M3, or M0 model_path = model + '.' + output_folder tree.run_model(model_path) self.ete3paml_log.info('Codeml is generating data in %s.' % model_path)
def main(self): """The main function for running the test.""" print("Running model %s paml on input." % str(self.defaultmodel)) tree = EvolTree(self.tree) # Import the newick tree tree.link_to_alignment(self.alignment) # Import the alignment tree.workdir = self.workdir # Set the working directory tree.execpath = self.pamlpath # Set the binpath of the codeml binary tree.run_model(self.defaultmodel) # Run the codeml model
def ete3paml(gene, paml_path, workdir='data/paml-output/', model='M1'): """ Use ETE3's integration with PAML""" # Import the species tree to compare species that are present in alignment # file t = Tree('data/initial-data/species_tree.nw', format=1) orgsfile = pd.read_csv('data/initial-data/organisms.csv', header=None) # Create a list name/variable and use list() orgs = list(orgsfile[0]) organismslist = formatlist(orgs) # Import alignment file as string alignment_file = open( 'data/clustal-output/' + gene + '_Aligned/' + gene + '_aligned_cds_nucl.fasta', 'r') alignment_str = alignment_file.read() alignment_file.close() # Keep the branches in the species tree for species in the alignment file # Some species may not be present in the alignment file branches2keep = [] for organism in organismslist: if organism in alignment_str: #print('Yup.') branches2keep.append(organism) else: pass #print('Nope.') Make an error code in the log # Input a list of branches to keep on the base tree speciestree = t.prune(branches2keep, preserve_branch_length=True) # Import the newick tree tree = EvolTree(speciestree) # Import the alignment tree.link_to_alignment('data/clustal-output/' + gene + '_Aligned/' + gene + '_aligned_cds_nucl.fasta') tree.workdir = workdir # Set the binpath of the codeml binary tree.execpath = paml_path # Run the codeml model tree.run_model(model + '.' + gene)