def build_tree_from_alignment(aln, moltype, best_tree=False, params={}): """Returns a tree from Alignment object aln. aln: an xxx.Alignment object, or data that can be used to build one. moltype: cogent.core.moltype.MolType object best_tree: best_tree suppport is currently not implemented params: dict of parameters to pass in to the RAxML app controller. The result will be an xxx.Alignment object, or None if tree fails. """ if best_tree: raise NotImplementedError if '-m' not in params: if moltype == DNA or moltype == RNA: #params["-m"] = 'GTRMIX' # in version 7.2.3, GTRMIX is no longer supported but says GTRCAT # behaves like GTRMIX (http://www.phylo.org/tools/raxmlhpc2.html) params["-m"] = 'GTRGAMMA' elif moltype == PROTEIN: params["-m"] = 'PROTGAMMAmatrixName' else: raise ValueError("Moltype must be either DNA, RNA, or PROTEIN") if not hasattr(aln, 'toPhylip'): aln = Alignment(aln) seqs, align_map = aln.toPhylip() # generate temp filename for output params["-w"] = "/tmp/" params["-n"] = get_tmp_filename().split("/")[-1] params["-k"] = True params["-p"] = randint(1,100000) params["-x"] = randint(1,100000) ih = '_input_as_multiline_string' raxml_app = Raxml(params=params, InputHandler=ih, WorkingDir=None, SuppressStderr=True, SuppressStdout=True) raxml_result = raxml_app(seqs) tree = DndParser(raxml_result['Bootstrap'], constructor=PhyloNode) for node in tree.tips(): node.Name = align_map[node.Name] raxml_result.cleanUp() return tree
def build_tree_from_alignment(aln, moltype, best_tree=False, params={}): """Returns a tree from Alignment object aln. aln: an xxx.Alignment object, or data that can be used to build one. moltype: cogent.core.moltype.MolType object best_tree: best_tree suppport is currently not implemented params: dict of parameters to pass in to the RAxML app controller. The result will be an xxx.Alignment object, or None if tree fails. """ if best_tree: raise NotImplementedError if '-m' not in params: if moltype == DNA or moltype == RNA: #params["-m"] = 'GTRMIX' # in version 7.2.3, GTRMIX is no longer supported but says GTRCAT # behaves like GTRMIX (http://www.phylo.org/tools/raxmlhpc2.html) params["-m"] = 'GTRGAMMA' elif moltype == PROTEIN: params["-m"] = 'PROTGAMMAmatrixName' else: raise ValueError("Moltype must be either DNA, RNA, or PROTEIN") if not hasattr(aln, 'toPhylip'): aln = Alignment(aln) seqs, align_map = aln.toPhylip() # generate temp filename for output params["-w"] = "/tmp/" params["-n"] = get_tmp_filename().split("/")[-1] params["-k"] = True params["-p"] = randint(1, 100000) params["-x"] = randint(1, 100000) ih = '_input_as_multiline_string' raxml_app = Raxml(params=params, InputHandler=ih, WorkingDir=None, SuppressStderr=True, SuppressStdout=True) raxml_result = raxml_app(seqs) tree = DndParser(raxml_result['Bootstrap'], constructor=PhyloNode) for node in tree.tips(): node.Name = align_map[node.Name] raxml_result.cleanUp() return tree
def test_insert_sequences_into_tree(self): """Inserts sequences into Tree using params - test handles tree-insertion""" # generate temp filename for output outfname = splitext(get_tmp_filename('/tmp/'))[0] # create starting tree outtreefname = outfname + '.tre' outtree = open(outtreefname, 'w') outtree.write(REF_TREE) outtree.close() # set params for tree-insertion params = {} params["-w"] = "/tmp/" params["-n"] = get_tmp_filename().split("/")[-1] params["-f"] = 'v' #params["-G"] = '0.25' params["-t"] = outtreefname params["-m"] = 'GTRGAMMA' aln_ref_query = get_align_for_phylip( StringIO(PHYLIP_FILE_DNA_REF_QUERY)) aln = Alignment(aln_ref_query) seqs, align_map = aln.toPhylip() tree = insert_sequences_into_tree(seqs, DNA, params=params, write_log=False) for node in tree.tips(): removed_query_str = re.sub('QUERY___', '', str(node.Name)) new_node_name = re.sub('___\d+', '', str(removed_query_str)) if new_node_name in align_map: node.Name = align_map[new_node_name] self.assertTrue(isinstance(tree, PhyloNode)) self.assertEqual(tree.getNewick(with_distances=True), RESULT_TREE) self.assertEqual(len(tree.tips()), 7) self.assertRaises(NotImplementedError, build_tree_from_alignment, \ self.align1, RNA, True) remove(outtreefname)
def test_insert_sequences_into_tree(self): """Inserts sequences into Tree using params - test handles tree-insertion""" # generate temp filename for output outfname=splitext(get_tmp_filename('/tmp/'))[0] # create starting tree outtreefname=outfname+'.tre' outtree=open(outtreefname,'w') outtree.write(REF_TREE) outtree.close() # set params for tree-insertion params={} params["-w"]="/tmp/" params["-n"] = get_tmp_filename().split("/")[-1] params["-f"] = 'v' #params["-G"] = '0.25' params["-t"] = outtreefname params["-m"] = 'GTRGAMMA' aln_ref_query=get_align_for_phylip(StringIO(PHYLIP_FILE_DNA_REF_QUERY)) aln = Alignment(aln_ref_query) seqs, align_map = aln.toPhylip() tree = insert_sequences_into_tree(seqs, DNA, params=params, write_log=False) for node in tree.tips(): removed_query_str=re.sub('QUERY___','',str(node.Name)) new_node_name=re.sub('___\d+','',str(removed_query_str)) if new_node_name in align_map: node.Name = align_map[new_node_name] self.assertTrue(isinstance(tree, PhyloNode)) self.assertEqual(tree.getNewick(with_distances=True),RESULT_TREE) self.assertEqual(len(tree.tips()), 7) self.assertRaises(NotImplementedError, build_tree_from_alignment, \ self.align1, RNA, True) remove(outtreefname)
def test_insert_sequences_into_tree(self): """Inserts sequences into Tree""" params={} # generate temp filename for output params["-r"] = self.refseq_fname params["-t"] = self.tree_fname params["-s"] = self.stats_fname params["--out-dir"] = "/tmp" aln_ref_query=MinimalFastaParser(StringIO(QUERY_SEQS)) aln = Alignment(aln_ref_query) seqs, align_map = aln.toPhylip() tree = insert_sequences_into_tree(seqs, DNA, params=params, write_log=False) # rename tips back to query names for node in tree.tips(): if node.Name in align_map: node.Name = align_map[node.Name] self.assertEqual(tree.getNewick(with_distances=True), RESULT_TREE)
class ParsInsertTests(TestCase): def setUp(self): # create a list of files to cleanup self._paths_to_clean_up = [] self._dirs_to_clean_up = [] # load query seqs self.seqs = Alignment(MinimalFastaParser(QUERY_SEQS.split())) # generate temp filename tmp_dir='/tmp' self.outfile = get_tmp_filename(tmp_dir) # create and write out reference sequence file self.outfasta=splitext(self.outfile)[0]+'.fasta' fastaout=open(self.outfasta,'w') fastaout.write(REF_SEQS) fastaout.close() self._paths_to_clean_up.append(self.outfasta) # create and write out starting tree file self.outtree=splitext(self.outfile)[0]+'.tree' treeout=open(self.outtree,'w') treeout.write(REF_TREE) treeout.close() self._paths_to_clean_up.append(self.outtree) def tearDown(self): """cleans up all files initially created""" # remove the tempdir and contents map(remove,self._paths_to_clean_up) map(rmdir,self._dirs_to_clean_up) def test_base_command(self): """Base command-calls""" app = ParsInsert() self.assertEqual(app.BaseCommand, \ ''.join(['cd "',getcwd(),'/"; ','ParsInsert'])) def test_change_working_dir(self): """Change working dir""" app = ParsInsert(WorkingDir='/tmp/ParsInsertTest') self.assertEqual(app.BaseCommand, \ ''.join(['cd "','/tmp/ParsInsertTest',\ '/"; ','ParsInsert'])) rmtree('/tmp/ParsInsertTest') def test_insert_sequences_into_tree(self): """Inserts sequences into Tree""" # define log fp log_fp='/tmp/parsinsert.log' self._paths_to_clean_up.append(log_fp) # define tax assignment values fp tax_assign_fp='/tmp/tax_assignments.log' self._paths_to_clean_up.append(tax_assign_fp) # set the reference alignment and starting tree param={ '-t':self.outtree, '-s':self.outfasta, '-l':log_fp, '-o':tax_assign_fp } seqs, align_map = self.seqs.toPhylip() # insert sequences into tree tree = insert_sequences_into_tree(seqs, DNA, params=param) # rename tips back to query names for node in tree.tips(): if node.Name in align_map: node.Name = align_map[node.Name] self.assertEqual(tree.getNewick(with_distances=True),exp_tree)
class ParsInsertTests(TestCase): def setUp(self): # create a list of files to cleanup self._paths_to_clean_up = [] self._dirs_to_clean_up = [] # load query seqs self.seqs = Alignment(MinimalFastaParser(QUERY_SEQS.split())) # generate temp filename tmp_dir = '/tmp' self.outfile = get_tmp_filename(tmp_dir) # create and write out reference sequence file self.outfasta = splitext(self.outfile)[0] + '.fasta' fastaout = open(self.outfasta, 'w') fastaout.write(REF_SEQS) fastaout.close() self._paths_to_clean_up.append(self.outfasta) # create and write out starting tree file self.outtree = splitext(self.outfile)[0] + '.tree' treeout = open(self.outtree, 'w') treeout.write(REF_TREE) treeout.close() self._paths_to_clean_up.append(self.outtree) def tearDown(self): """cleans up all files initially created""" # remove the tempdir and contents map(remove, self._paths_to_clean_up) map(rmdir, self._dirs_to_clean_up) def test_base_command(self): """Base command-calls""" app = ParsInsert() self.assertEqual(app.BaseCommand, \ ''.join(['cd "',getcwd(),'/"; ','ParsInsert'])) def test_change_working_dir(self): """Change working dir""" app = ParsInsert(WorkingDir='/tmp/ParsInsertTest') self.assertEqual(app.BaseCommand, \ ''.join(['cd "','/tmp/ParsInsertTest',\ '/"; ','ParsInsert'])) rmtree('/tmp/ParsInsertTest') def test_insert_sequences_into_tree(self): """Inserts sequences into Tree""" # define log fp log_fp = '/tmp/parsinsert.log' self._paths_to_clean_up.append(log_fp) # define tax assignment values fp tax_assign_fp = '/tmp/tax_assignments.log' self._paths_to_clean_up.append(tax_assign_fp) # set the reference alignment and starting tree param = { '-t': self.outtree, '-s': self.outfasta, '-l': log_fp, '-o': tax_assign_fp } seqs, align_map = self.seqs.toPhylip() # insert sequences into tree tree = insert_sequences_into_tree(seqs, DNA, params=param) # rename tips back to query names for node in tree.tips(): if node.Name in align_map: node.Name = align_map[node.Name] self.assertEqual(tree.getNewick(with_distances=True), exp_tree)