def test_treefile_source(self): """ Test PhyTrees.from_treefile() and PhyTrees.write() methods. """ infile = 'Newick/f002.trees.newick' self.assertTrue(os.path.isfile(infile)) tree_db = PhyTrees.from_treefile(infile, 'newick') outfile = 'tmp_test.newick' outrepfile = 'tmp_test.rep' self.files_to_clean.add(outfile) self.files_to_clean.add(outrepfile) tree_db.write(outfile) self.assertTrue(os.path.isfile(outfile)) # Check the content of both sequence files self.assertEqual(len(list(Phylo.parse(infile, 'newick'))), len(list(Phylo.parse(outfile, 'newick')))) # Check the content of the report file with open(outrepfile, 'r') as repfile: for line in repfile.readlines(): self.assertTrue( ('Num. trees: 9' in line) or ('History:' in line) or (bool( re.match( r"""\d\d\d\d/\d\d/\d\d\ \d\d:\d\d:\d\d[ ]+ [ ]+.*Tests/Newick/f002\.trees\.newick \ +newick""", line, re.VERBOSE))))
def make_paml_trees(alignment_loc, tree_loc, test=False): all_species = SPECIES_NAMES sample_output = [] cnt_empty = 0 empty = [] for folder in FOLDERS: if test: alignments = glob.glob(os.path.join(alignment_loc, 'aln_*')) else: alignments = glob.glob(os.path.join(alignment_loc, folder, 'aln_*')) trees = Phylo.parse('yeast_tree_topology.txt', "newick") try: os.makedirs(os.path.join("trees", folder)) except OSError: if not os.path.join(tree_loc, folder): raise for item in alignments: species = [] trees = Phylo.parse('yeast_tree_topology.txt', "newick") with open(item) as f_in: lines = f_in.readlines() if len(lines) == 0: cnt_empty += 1 empty.append(item) break for line in lines: if line[0].islower() and line[0] != '-': species.append(line.split()[0][:10]) cut_nodes = list(set(all_species).difference(species)) for tree in trees: for node in cut_nodes: tree.prune(node) gene_name = os.path.basename(item)[4:] if test: file_loc = os.path.join(tree_loc, "tre_%s" % gene_name) Phylo.write(tree, file_loc, "newick") else: file_loc = os.path.join(tree_loc, folder, "tre_%s" % gene_name) Phylo.write(tree, file_loc, "newick") if test: with open(file_loc, 'r') as f_in: sample_output = f_in.readlines() return sample_output
def fool_around_with_trees(): trees = Phylo.parse('ape.tree', 'nexus') for tree in trees: if tree.name == 'STATE_0': print('success') tree_state0 = tree print(tree_state0.get_path('human')) print('total_branch length=>' + str(tree_state0.total_branch_length())) print('depths=>' + str(tree_state0.depths())) print('distance human urangutan=>' + str(tree_state0.distance('human', 'orangutan'))) # OK matches met mijn berekeningen ! # draw a tree /works tree_state0.ladderize() Phylo.draw(tree_state0) # nicer graph / doesn't work # import pylab # Phylo.draw_graphviz(tree_state0) # pylab.show() # ASCII tree /works Phylo.draw_ascii(tree_state0) # network graph : works, but not a clear figure net = Phylo.to_networkx(tree_state0) networkx.draw(net) pylab.show() # doesn't work # from io import StringIO # treedata = '[&R] ((5:0.04847887618847128,(3:0.03189934232093919,((2:0.011076861832266626,1:0.011076861832266626):0.009810542752873795,4:0.02088740458514042):0.011011937735798769):0.01657953386753209):0.017232517763959114,6:0.06571139395243039);' # handle = StringIO(treedata) # tree = Phylo.read(handle, 'nexus') # conensus trees / works, not very informative trees = list(Phylo.parse('ape.tree', 'nexus')) strict_tree = strict_consensus(trees) majority_tree = majority_consensus(trees, 0.5) adam_tree = adam_consensus(trees) # Phylo.draw(strict_tree) Phylo.draw(majority_tree) Phylo.draw(adam_tree) # ok werkt ! path van human -> root (4 nodes, of clades zoals het hier # wordt genoemd) return()
def check_newick(self, newick_file): try: tree = Phylo.parse(newick_file, 'newick').next() assert(set([x.name for x in tree.get_terminals()]) == set(self.sequence_lookup.keys())) return True except: return False
def test_convert_phyloxml_filename(self): """Write phyloxml to a given filename.""" trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") tmp_filename = tempfile.mktemp() count = Phylo.write(trees, tmp_filename, "phyloxml") os.remove(tmp_filename) self.assertEqual(13, count)
def RootTree_cnv(self, OriNwk, Root): OutF = open('test.nwk', 'w') OutF.write(OriNwk) OutF.close() trees = list(Phylo.parse('test.nwk', 'newick')) for tree in trees: tree = tree.root_with_outgroup({'name': Root}) Phylo.write(trees, 'newtree.nwk', "newick") Tree = open('newtree.nwk', 'r').readlines()[0].strip() Len = len(Tree) Posi = Tree.find(',' + Root + ':0.00000') PosRev = -1 * (Len - Posi) LastBraLen = '' Rm = '' while Tree[PosRev] != ':': LastBraLen += Tree[PosRev] PosRev = PosRev - 1 BraLen = LastBraLen[::-1] # print BraLen NewTree = '(' + Root + ':' + BraLen + Tree[2:].replace( '):' + BraLen + Root + ':0.00000', '') + '\n' # print NewTree return NewTree
def convert_boottrees(fname_trees): out_fnames = [] for i, tree in enumerate(Phylo.parse(fname_trees, "newick")): fname_tree = "%s.codeml-%d" % (fname_trees, i) Phylo.write(tree, fname_tree, "newick") out_fnames.append(fname_tree) return out_fnames
def from_treefile ( cls, treefile, fileformat ) : """ Create a PhyTrees object retrieving all the information stored at the tree file provided. If 'treefile' contains a relative path, the current working directory will be used to get the absolute path. Arguments : treefile ( string ) Input tree file. fileformat ( string ) Input file format. Raises : IOError If the path or the file provided doesn't exist. * The file format must be supported by Bio.Phylo. * If the file format provided doesn't correspond to the actual file format, an empty tree list will be created. """ filepath = get_abspath(treefile) # Read the tree file and create a new PhyTrees object, generating a new # report list tree_list = list(Phylo.parse(filepath, fileformat)) date_time = datetime.now().strftime('%Y/%m/%d %H:%M:%S') report = [(date_time, filepath, fileformat)] return ( cls(tree_list, report) )
def __init__(self, taxon_term_table, panther_tree_nhx, slim_terms=None): self.term_constraint_lists = {} self.taxon_indexes = {} self.slim_terms = [] self.tree = None if slim_terms: # Get list of slim terms to filter for slim_file = open(slim_terms) for t in slim_file.readlines(): self.slim_terms.append(t.rstrip()) slim_file.close() with open(taxon_term_table) as t3f: header = t3f.readline().rstrip() headers = header.split("\t") index_count = 0 for h in headers[1:len(headers)]: self.taxon_indexes[h] = index_count index_count += 1 for l in t3f.readlines(): cols = l.split("\t") go_term = cols[0] if len(self.slim_terms) == 0 or go_term in self.slim_terms: self.term_constraint_lists[go_term] = cols[1:len(cols)] logger.debug("taxon_indexes: {}".format(len(self.taxon_indexes))) logger.debug("term_constraint_lists: {}".format(len(self.term_constraint_lists))) # Parse species_tree self.tree = next(Phylo.parse(panther_tree_nhx, "newick")) self.tree.clade.name, self.tree.clade.id = extract_clade_name(self.tree.clade.comment) name_children(self.tree.clade)
def parse(file): trees = Phylo.parse(file, "newick").__next__() levels = trees.depths( unit_branch_lengths=True ) # returns a dictionary of pairs (Clade name : depth) root = list(levels.keys())[list(levels.values()).index(0)] for key in levels.keys(): # loop that finds the name of the root node if levels[key] == 0: break global rootnode rootnode = Node(root.name, levels[key], root.count_terminals(), root.clades) global maxDepth maxDepth = max(levels.values()) clade_list = trees.find_clades() names_list = levels.keys() global nodes nodes = [] # this is the list that will contain all nodes for Clade in clade_list: # calculates properties and creates nodes node_name = Clade.name node_children = Clade.clades node_leaves = Clade.count_terminals() if Clade in names_list: node_depth = levels[Clade] else: node_depth = 0 nodes.append(Node(node_name, node_depth, node_leaves, node_children)) return
def newTreeInProject(self, treename, treefile, projectTitle, treetype): import phyloimport_algorithm, root_phylotree_algorithm collectionName = self.returnCollectionForObjectByName( projectTitle, 'PhyloTree', treename) #collectionName = self.prefixString+projectTitle+self.separatorString+"PhyloTree"+self.separatorString+treename treeCollection = self.db[collectionName] print "uploading tree to collection: ", collectionName print "treetype is: ", treetype # create the new collection in mongo for this tree trees = Phylo.parse(treefile, treetype) #print "length of trees list: ",len(trees) for tree in trees: #process tree phyloimport_algorithm.recursive_clade(tree, treeCollection) root_phylotree_algorithm.addRootToTree(treeCollection) # add a tree record entry to the 'PyloTree' array in the project record self.db[self.projectCollectionName].update( {"name": projectTitle}, {'$push': { u'PhyloTree': { treename: treefile } }}) self.db[self.projectCollectionName].update( {"name": projectTitle}, {'$addToSet': { u'datatypes': u'PhyloTree' }})
def read_trees(fname, species_polyploid, tree_repeats): """ Reads input gene trees. Args: fname: tree file (one newick string per line) species_polyploid: list of Species objects tree_repeats: number of trees per locus Returns: list of parsed trees """ # remove trailing whitespace from each line with open(fname, "r") as f: input_data = [line.rstrip() for line in f.readlines()] # count and remove trailing newlines nt = 0 for line in reversed(input_data): if line: break else: nt += 1 if nt > 0: del input_data[-nt:] input_data = rename_polyploids(input_data, species_polyploid, tree_repeats) # append marker IDs input_data = io.StringIO("\n".join(input_data)) # create file handle input_data_trees = list(Phylo.parse(input_data, "newick")) return input_data_trees
def from_phytrees ( cls, phytrees_file ) : """ Create a PhyTrees object retrieving all the information from previously saved PhyTrees tree and report files. If 'phytrees_file' contains a relative path, the current working directory will be used to get the absolute path. Arguments : phytrees_file ( string ) Tree file generated by PhyTrees.write(). Raises : ValueError If the number of trees read doesn't match the number stored in the report document. """ data_filepath = get_abspath(phytrees_file) report_filepath = os.path.splitext(data_filepath)[0] + '.rep' # Load all the contents into a new PhyTrees object tree_list = list(Phylo.parse(data_filepath, 'newick')) report = [] with open(report_filepath, 'r') as report_file : str_num_trees = report_file.readline() num_trees = int(str_num_trees.split(':')[-1]) if ( len(tree_list) != num_trees ) : message = 'The number of trees at report file doesn\'t match ' \ 'the number of trees loaded' raise ValueError(message) # Ignore "History:" line report_file.readline() for line in report_file.readlines() : date_time, filepath, fileformat = line.strip().split(' ') report.append((date_time, filepath, fileformat)) return ( cls(tree_list, report) )
def main(nexusfile, reftree, burnin=10): # Using the Nexus module data = Nexus.Nexus(nexusfile) taxlabels = data.structured[1].commandlines[1].options.split() nb2taxlabels = data.translate trees = data.trees # Using the Phylo module trees = list(Phylo.parse(nexusfile, 'nexus')) N0 = len(trees) trees = trees[N0 * burnin / 100 + 1:] N = N0 * (100 - burnin) / 100 topologies = Counter() topo_groups = defaultdict(list) for tree in trees: # Ensure all equivalent topologies will be represented the same way biophylo_leaf_sort(tree, tree.root) topo = biophylo_topology(tree, tree.root) topologies[topo] += 1 topo_groups.append(tree) MAP_topology, MAP_count = topologies.most_common(1)[0] MAP_proba = float(MAP_count) / sum(topologies.values()) clades = represent_clades(reftree, BioPhylo.get_children, BioPhylo.get_label)
def load_trees_from_file(self, handle): """ Parse a file containing Newick tree strings """ self.trees = [] tree_iter = Phylo.parse(handle, 'newick') for t in tree_iter: if self.rotate == 'ladder': t.ladderize() elif rotate == 'random': scramble(t) else: pass if self.rotate2 == 'none': pass else: gravitate(t, subtree=subtree, mode=rotate2) if self.normalize != 'none': self.normalize_tree(t, mode=self.normalize) if self.resolve_poly: collapse_polytomies(t) self.annotate_tree(t) self.trees.append(t) self.kmat = [[0 for i in self.ntrees] for j in self.ntrees] #self.kmat = zeros( (self.ntrees, self.ntrees) ) self.is_kmat_computed = False self.delta_values = {}
def _calculate_gsi(self): """ Method for calculating Gene Support Indices :return: """ LOGGER.info("Calculating Gene Support Indices (GSIs)" " from the gene trees..") genome_num = 0 bcg_dir = os.path.join(self._dirpath, self.config.bcg_dir) for file in os.listdir(bcg_dir): if file.endswith('.bcg'): genome_num += 1 nwk_file = os.path.join(self._align_output_dir, "all_gene.trees") trees = Phylo.parse(nwk_file, 'newick') tree = Consensus.majority_consensus(trees, cutoff=(100-self.config.gsi_threshold) * genome_num/100) Phylo.draw_ascii(tree) ubcg_gsi_file = os.path.join(self._align_output_dir, f'UBCG_gsi({self._bcg_num}' f'){self.config.postfixes.align_tree_const}') with open(ubcg_gsi_file, 'w') as f: Phylo.write(tree, f, 'newick') LOGGER.info("The final tree marked with GSI was written" " to %s", ubcg_gsi_file)
def action(args): def newname(leaf, newname): leaf.name = newname return leaf tree = Phylo.parse(args.tree, args.tree_type).next() leafs = (leaf for leaf in tree.get_terminals()) if args.info: info = DictReader(args.info, fieldnames = ['seqname','newname']) info = {i['seqname']:i['newname'] for i in info} # for newick trees :s will be replaced by |s if args.tree_type == 'newick': info = {s.replace(':', '|'):n for s,n in info.items()} leafs = (l for l in leafs if l.name in info) leafs = (newname(l, info[l.name]) for l in leafs) if args.remove_word: leafs = (newname(l, re.sub(args.remove_word, '', l.name)) for l in leafs) leafs = (newname(l, l.name.strip()) for l in leafs) leafs = (newname(l, args.add_prefix + l.name) for l in leafs) leafs = (newname(l, l.name + args.add_suffix) for l in leafs) # do this last if args.tree_type == 'newick': leafs = (newname(l, l.name.replace(' ', '_')) for l in leafs) # execute changes and write tree list(leafs) Phylo.write(tree, args.out, args.tree_type)
def action(args): def newname(leaf, newname): leaf.name = newname return leaf tree = Phylo.parse(args.tree, args.tree_type).next() leafs = (leaf for leaf in tree.get_terminals()) if args.info: info = DictReader(args.info, fieldnames=['seqname', 'newname']) info = {i['seqname']: i['newname'] for i in info} # for newick trees :s will be replaced by |s if args.tree_type == 'newick': info = {s.replace(':', '|'): n for s, n in info.items()} leafs = (l for l in leafs if l.name in info) leafs = (newname(l, info[l.name]) for l in leafs) if args.remove_word: leafs = (newname(l, re.sub(args.remove_word, '', l.name)) for l in leafs) leafs = (newname(l, l.name.strip()) for l in leafs) leafs = (newname(l, args.add_prefix + l.name) for l in leafs) leafs = (newname(l, l.name + args.add_suffix) for l in leafs) # do this last if args.tree_type == 'newick': leafs = (newname(l, l.name.replace(' ', '_')) for l in leafs) # execute changes and write tree list(leafs) Phylo.write(tree, args.out, args.tree_type)
def load_trees_from_file (self, handle): """ Parse a file containing Newick tree strings """ self.trees = [] tree_iter = Phylo.parse(handle, 'newick') for t in tree_iter: if self.rotate=='ladder': t.ladderize() elif rotate=='random': scramble(t) else: pass if self.rotate2 == 'none': pass else: gravitate(t, subtree=subtree, mode=rotate2) if self.normalize != 'none': self.normalize_tree(t, mode=self.normalize) if self.resolve_poly: collapse_polytomies(t) self.annotate_tree(t) self.trees.append(t) self.kmat = [[0 for i in self.ntrees] for j in self.ntrees] #self.kmat = zeros( (self.ntrees, self.ntrees) ) self.is_kmat_computed = False self.delta_values = {}
def newTreeInProjectFromString(self,treename,treestring,projectTitle, description,treetype): import phyloimport_algorithm, root_phylotree_algorithm collectionName = self.prefixString+projectTitle+self.separatorString+"PhyloTree"+self.separatorString+treename treeCollection = self.db[collectionName] treeCollection.drop() print "uploading tree to collection: ",collectionName print "treetype is: ",treetype # if the project does not exist, create it projectCollectionName = self.prefixString + 'projects' if self.db[projectCollectionName].find_one({"name": projectTitle}) == None: self.newProject(projectTitle) # create the new collection in mongo for this tree. The tree is encoded # in a string, so it needs to be processed slightly different than from a file from StringIO import StringIO handle = StringIO(treestring) trees = Phylo.parse(handle, treetype) #print "length of trees list: ",len(trees) for tree in trees: phyloimport_algorithm.recursive_clade(tree, treeCollection) # add a tree record entry to the 'PyloTree' array in the project record self.db[self.projectCollectionName].update({"name": projectTitle}, { '$push': {u'PhyloTree': {treename:str(description)}}}) self.db[self.projectCollectionName].update({"name": projectTitle}, { '$addToSet': {u'datatypes': u'PhyloTree'}}) # make sure the tree is rooted, so viewers work root_phylotree_algorithm.addRootToTree(treeCollection) # emit a signal so the GUI knows to update if (self.QtGuiEnabled): self.datatypeListChangedSignal.emit(); self.datasetListChangedSignal.emit();
def main(): # getting the tree tree_gen = Phylo.parse(PATH_EXAMPLE, 'newick') tree_object = next(tree_gen) # the tree basic information print(tree_info(tree_object)) # drawing the tree Phylo.draw(tree_object) # distance comparing tns = dendropy.TaxonNamespace() tre_one = Tree.get_from_path(PATH_EXAMPLE, 'newick', taxon_namespace=tns) tre_two = Tree.get_from_path(PATH_BIF, 'newick', taxon_namespace=tns) euclidean_distance = treecompare.euclidean_distance(tre_one, tre_two) robinson_distance = treecompare.robinson_foulds_distance(tre_one, tre_two) print("Robinson Foulds distance: ", robinson_distance) print("Euclidean distance: ", euclidean_distance) # common ancestors common_ancestor_tree = tree_object.common_ancestor({"name": "C"}, {"name": "D"}) common_ancestor_tree.color = "blue" print("COMMON ANCESTOR: ", common_ancestor_tree) Phylo.draw(common_ancestor_tree)
def main(): parser = argparse.ArgumentParser( description='Generate clusters of tips from a tree that have a path length within ' 'a maximum distance of each other.' ) parser.add_argument('tree', help='<input> file containing Newick tree string.') parser.add_argument('cutoff', type=float, help='Maximum patristic distance.') parser.add_argument('outfile', default=None, help='<output> file to write results in CSV format.') parser.add_argument('--minimize', help='Report no more than one nearest neighbour per tip.', action='store_true') parser.add_argument('--keep_ties', help='If more than one tip has the same patristic distance, ' 'report all as nearest neighbours.', action='store_true') parser.add_argument('--overwrite', help='Overwrite existing output file.', action='store_true') args = parser.parse_args() assert args.cutoff > 0, 'Cutoff %f must be greater than 0.' % (args.cutoff, ) if os.path.exists(args.outfile) and not args.overwrite: print 'Output file', args.outfile, 'already exists, use --overwrite.' sys.exit() outfile = open(args.outfile, 'w') outfile.write('tree,tip1,tip2,dist,is.tie\n') trees = Phylo.parse(args.tree, 'newick') for treenum, tree in enumerate(trees): results = find_short_edges(tree, args.cutoff) for key, dist in results.iteritems(): outfile.write('%d,%s,%s,%f\n' % (treenum, key[0], key[1], dist)) outfile.close()
def nexus2nhx(infile, outfile): """Designed to work with Beast/treeannotator output""" with open(outfile, 'w') as out: for tree in Phylo.parse(infile, 'nexus'): for node in tree.get_terminals() + tree.get_nonterminals(): node.comment = NHX_comment_formatter( beast_comment_parser(node.comment)) Phylo.write(tree, out, 'newick')
def __init__(self, filename): tree = next(Phylo.parse(filename,'newick')) self.data = np.array(getConfidence(tree.root)) _, genes, _,_, self.method, pops = os.path.basename(filename).split('_')[:6] self.pops = int(pops) self.genes = genes.split('-') #self.repr = len(self.genes), self.method, self.pops self.repr = f'{genes}/{self.pops}'
def test_parse(self): """Extract and count phylogenetic trees using Phylo.parse.""" for filename in nexml_files: count = tree_counts.get(filename, 1) path = os.path.join("NeXML", filename) msg = "Failed parser test for %s" % path trees = list(Phylo.parse(path, "nexml")) self.assertEqual(len(trees), count, msg=msg)
def get_species_names_in_tree(trees_filename): logger.debug("Getting species names from trees %s" % trees_filename) species_name_list = set() trees = list(Phylo.parse(trees_filename, "newick")) for tree in trees: for clade in tree.get_terminals(): species_name_list.add(clade.name) return species_name_list
def test_majority_consensus(self): ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick') ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree)) ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees, 1) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
def load_tree_impl(file): try: tree = Phylo.read(file, phylo_formats[pathlib.Path(file).suffix]) except ValueError: tree = next(Phylo.parse(file, phylo_formats[pathlib.Path(file).suffix])) Phylo.draw(tree, do_show=False) plt.savefig(pathlib.Path(file).with_suffix(".png")) tree = tree.as_phyloxml() return pathlib.Path(file).with_suffix(".png"), tree.count_terminals(), tree.total_branch_length()
def test_convert_phyloxml_binary(self): """Try writing phyloxml to a binary handle; fail on Py3.""" trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") with tempfile.NamedTemporaryFile(mode="wb") as out_handle: if sys.version_info[0] < 3: count = Phylo.write(trees, out_handle, "phyloxml") self.assertEqual(13, count) else: self.assertRaises(TypeError, Phylo.write, trees, out_handle, "phyloxml")
def mbtrees(input_file, output_file, **kwargs): """ Get a list of trees from output of MrBayes """ from Bio import Phylo with open(output_file, "w") as f: for i, t in enumerate(Phylo.parse(input_file, format="nexus")): if i % kwargs["downsample"] == 0 and i > kwargs["burnin"]: Phylo.write(t, f, format="newick")
def test_include(self): """ Test PhyTrees.include() method. """ infile1 = 'Newick/f002.trees.newick' infile2 = 'Nexus/f005.trees.nexus' self.assertTrue(os.path.isfile(infile1)) self.assertTrue(os.path.isfile(infile2)) tree_db = PhyTrees.from_treefile(infile1, 'newick') tree_db.include(infile2, 'nexus') # Check the sequence data inlist1 = [tree for tree in Phylo.parse(infile1, 'newick')] inlist2 = [tree for tree in Phylo.parse(infile2, 'nexus')] self.assertEqual(len(inlist1) + len(inlist2), len(tree_db)) # Check the report information self.assertIn('Tests/Newick/f002.trees.newick', tree_db._report[0][1]) self.assertIn('newick', tree_db._report[0][2]) self.assertIn('Tests/Nexus/f005.trees.nexus', tree_db._report[1][1]) self.assertIn('nexus', tree_db._report[1][2])
def test_include ( self ) : """ Test PhyTrees.include() method. """ infile1 = 'Newick/f002.trees.newick' infile2 = 'Nexus/f005.trees.nexus' self.assertTrue(os.path.isfile(infile1)) self.assertTrue(os.path.isfile(infile2)) tree_db = PhyTrees.from_treefile(infile1, 'newick') tree_db.include(infile2, 'nexus') # Check the sequence data inlist1 = [tree for tree in Phylo.parse(infile1, 'newick')] inlist2 = [tree for tree in Phylo.parse(infile2, 'nexus')] self.assertEqual(len(inlist1) + len(inlist2), len(tree_db)) # Check the report information self.assertIn('Tests/Newick/f002.trees.newick', tree_db._report[0][1]) self.assertIn('newick', tree_db._report[0][2]) self.assertIn('Tests/Nexus/f005.trees.nexus', tree_db._report[1][1]) self.assertIn('nexus', tree_db._report[1][2])
def RootTree_rootBottom(self, OriNwk, RootTaxa): OutF=open('test.nwk','w') OutF.write(OriNwk) OutF.close() trees = list(Phylo.parse('test.nwk', 'newick')) for tree in trees: tree = tree.root_with_outgroup({'name': RootTaxa}) Phylo.write(trees, 'test1.nwk', "newick") return open('test1.nwk','r').readlines()[0]
def Tree_Filter(tree_url, value): tree_stream = StringIO(requests.get(tree_url).text) trees = Phylo.parse(tree_stream, "newick") for tree in trees: element = tree.find_any(name=".*{0}.*".format(value)) if element is not None: return True return False
def __init__(self, tree_file): # Load graph from tree file with open(tree_file) as tf: tree_line = tf.readline() tree_string = StringIO(tree_line) # tree_phylo = next(PantherNewickIOParser(tree_string).parse()) tree_phylo = next(Phylo.parse(tree_string, "newick")) # Leaves parse clean due to not having species name in 'S:' self.tree: Newick.Tree = tree_phylo
def test_convert_phyloxml_to_newick_branch_length_only(self): """Write phyloxml with bootstrap values to newick format using branch_length_only=True""" trees = Phylo.parse(EX_APAF, "phyloxml") tmp_filename = tempfile.mktemp() try : Phylo.write(trees, tmp_filename, "newick", branch_length_only=True) os.remove(tmp_filename) except TypeError: self.fail()
def detect_type(filename): """ :param filename: File to read and detect the format :return: detected type, in [fasta, phylip, phylip-relaxed, newick, N/A] Tests formats using biopython SeqIO or Phylo """ mimetype=magic.from_file(filename,mime=True) if mimetype != "text/plain" : return mimetype # Check Fasta Format try: nbseq = 0 for r in SeqIO.parse(filename, "fasta"): nbseq += 1 if nbseq > 0: return "fasta" except Exception: pass # Check phylip strict try: nbseq = 0 for r in SeqIO.parse(filename, "phylip"): nbseq += 1 if nbseq > 0: return "phylip" except Exception: pass # Check phylip relaxed try: nbseq = 0 for r in SeqIO.parse(filename, "phylip-relaxed"): nbseq += 1 if nbseq > 0: return "phylip" except Exception: pass # Check Newick try: nbtrees = 0 trees = Phylo.parse(filename, 'newick') for t in trees: nbtrees += 1 if nbtrees > 0: return "nhx" except Exception as e: pass return "txt"
def load_tree(tree_filename): if tree_filename is not None: print('\nLoading tree:') trees = Phylo.parse(tree_filename, 'newick') whitelist_assemblies = set() for tree in trees: whitelist_assemblies |= set(get_tip_names(tree.root)) print(' found {:,} assemblies in {}'.format(len(whitelist_assemblies), tree_filename)) else: whitelist_assemblies = None return whitelist_assemblies
def test_majority_consensus(self): # three trees # ref_tree = open('./TreeConstruction/majority_ref.tre') ref_tree = list(Phylo.parse("./TreeConstruction/majority_ref.tre", "newick")) consensus_tree = Consensus.majority_consensus(self.trees) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[0])) consensus_tree = Consensus.majority_consensus(self.trees, 1) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[1]))
def checkLength(treeFile): tree = Phylo.parse(treeFile, 'phyloxml').next() i = 0 sum = 0 totalBranch = 0 branchList = getAllBranchLength(tree) for branch in branchList: sum = sum+branchList[i] i = i+1 totalBranch = totalBranch+1 avg = sum/totalBranch if avg < 1: return False return True
def test_treefile_source ( self ) : """ Test PhyTrees.from_treefile() and PhyTrees.write() methods. """ infile = 'Newick/f002.trees.newick' self.assertTrue(os.path.isfile(infile)) tree_db = PhyTrees.from_treefile(infile, 'newick') outfile = 'tmp_test.newick' outrepfile = 'tmp_test.rep' self.files_to_clean.add(outfile) self.files_to_clean.add(outrepfile) tree_db.write(outfile) self.assertTrue(os.path.isfile(outfile)) # Check the content of both sequence files self.assertEqual(len(list(Phylo.parse(infile, 'newick'))), len(list(Phylo.parse(outfile, 'newick')))) # Check the content of the report file with open(outrepfile, 'r') as repfile : for line in repfile.readlines() : self.assertTrue(('Num. trees: 9' in line) or ('History:' in line) or (bool(re.match(r"""\d\d\d\d/\d\d/\d\d\ \d\d:\d\d:\d\d[ ]+ [ ]+.*Tests/Newick/f002\.trees\.newick \ +newick""", line, re.VERBOSE))))
def __main__(): aZip = zipfile.ZipFile(sys.argv[1],'r') aZip.extractall('.') id = 0 for name in aZip.namelist(): treeFile = name if not checkLength(treeFile): tree = Phylo.parse(treeFile, 'phyloxml').next() newTree = changeLength(tree) newTName = treeFile.split('.')[0]+'_adj.xml' Phylo.write(newTree,newTName,'phyloxml') treeFile = newTName outputFileName = 'tree_'+ str(id) + '.html' writeHeaderFile(outputFileName) writeEndofFile(outputFileName,treeFile) id = id+1
def newTreeInProject(self,treename,treefile,projectTitle, treetype): import phyloimport_algorithm, root_phylotree_algorithm collectionName = self.returnCollectionForObjectByName(projectTitle, 'PhyloTree', treename) #collectionName = self.prefixString+projectTitle+self.separatorString+"PhyloTree"+self.separatorString+treename treeCollection = self.db[collectionName] print "uploading tree to collection: ",collectionName print "treetype is: ",treetype # create the new collection in mongo for this tree trees = Phylo.parse(treefile, treetype) #print "length of trees list: ",len(trees) for tree in trees: #process tree phyloimport_algorithm.recursive_clade(tree, treeCollection) root_phylotree_algorithm.addRootToTree(treeCollection) # add a tree record entry to the 'PyloTree' array in the project record self.db[self.projectCollectionName].update({"name": projectTitle}, { '$push': {u'PhyloTree': {treename:treefile}}}) self.db[self.projectCollectionName].update({"name": projectTitle}, { '$addToSet': {u'datatypes': u'PhyloTree'}})
def test_strict_consensus(self): ref_trees = list(Phylo.parse('./TreeConstruction/strict_refs.tre', 'newick')) # three trees consensus_tree = Consensus.strict_consensus(self.trees) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0])) # tree 1 and tree 2 consensus_tree = Consensus.strict_consensus(self.trees[:2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1])) # tree 1 and tree 3 consensus_tree = Consensus.strict_consensus(self.trees[::2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
def newTreeInProject(self,treename,treefile,projectTitle, treetype): collectionName = self.prefixString+projectTitle+"_"+"PhyloTree"+"_"+treename treeCollection = self.db[collectionName] print "uploading tree to collection: ",collectionName print "treetype is: ",treetype # create the new collection in mongo for this tree trees = Phylo.parse(treefile, treetype) #print "length of trees list: ",len(trees) for tree in trees: #process tree phyloimport_algorithm.recursive_clade(tree, treeCollection) # add a tree record entry to the 'PyloTree' array in the project record self.db.ar_projects.update({"name": projectTitle}, { '$push': {u'PhyloTree': {treename:treefile}}}) self.db.ar_projects.update({"name": projectTitle}, { '$addToSet': {u'datatypes': u'PhyloTree'}}) # emit a signal so the GUI knows to update self.datatypeListChangedSignal.emit(); self.datasetListChangedSignal.emit();
def readconvert(filesuffix, treeformat_input, treeformat_output, namesfile): idtable = {} f = open(namesfile, "r") for line in f: fields = line.rstrip().split("\t") idtable[fields[0]] = fields[1] #this is the list containing the file names filelist = glob.glob('*.'+str(filesuffix.replace('.',''))) for i in filelist: tree = Phylo.parse(i, treeformat_input) for t in tree: for node in t.get_terminals(): name = node.name if name in idtable: node.name = idtable[name] else: node.name = name print name +' not in table' Phylo.write(t,i.replace('.tree', '_tipsrenamed.tree'), treeformat_output)
def test_adam_consensus(self): # ref_trees = open('./TreeConstruction/adam_refs.tre') ref_trees = list(Phylo.parse("./TreeConstruction/adam_refs.tre", "newick")) # three trees consensus_tree = Consensus.adam_consensus(self.trees) # tree_file = '/home/yeyanbo/adam.tres' # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0])) # tree 1 and tree 2 consensus_tree = Consensus.adam_consensus(self.trees[:2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1])) # tree 1 and tree 3 consensus_tree = Consensus.adam_consensus(self.trees[::2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
def test_bioseqs_source ( self ) : """ Test PhyTrees.from_bioseqs() method and len() property. """ infile = 'PhyTrees/f002.trees.newick' inrepfile = 'PhyTrees/f002.trees.rep' self.assertTrue(os.path.isfile(infile)) self.assertTrue(os.path.isfile(inrepfile)) tree_db = PhyTrees.from_phytrees(infile) # Check the content of the PhyTrees' object self.assertEqual(len(tree_db), len(list(Phylo.parse(infile, 'newick')))) # Check the content of the PhyTrees' report with open(inrepfile, 'r') as repfile : line = repfile.readline().strip() # Num. trees: 9 self.assertEqual(len(tree_db), int(line[-2:])) line = repfile.readline() # History: line = repfile.readline().strip() # [First source information] source_info = line.split(' ') self.assertEqual(tree_db._report[0], tuple(source_info))
def newTreeInProjectFromString(self,treename,treestring,projectTitle, description,treetype): collectionName = self.prefixString+projectTitle+"_"+"PhyloTree"+"_"+treename treeCollection = self.db[collectionName] print "uploading tree to collection: ",collectionName print "treetype is: ",treetype # create the new collection in mongo for this tree. The tree is encoded # in a string, so it needs to be processed slightly different than from a file from StringIO import StringIO handle = StringIO(treestring) trees = Phylo.parse(handle, treetype) #print "length of trees list: ",len(trees) for tree in trees: phyloimport_algorithm.recursive_clade(tree, treeCollection) # add a tree record entry to the 'PyloTree' array in the project record self.db.ar_projects.update({"name": projectTitle}, { '$push': {u'PhyloTree': {treename:str(description)}}}) self.db.ar_projects.update({"name": projectTitle}, { '$addToSet': {u'datatypes': u'PhyloTree'}}) # make sure the tree is rooted, so viewers work root_phylotree_algorithm.addRootToTree(treeCollection) # emit a signal so the GUI knows to update self.datatypeListChangedSignal.emit(); self.datasetListChangedSignal.emit();
def consensus(outdir, min_freq=0.5, is_rooted=True, trees_splits_encoded=False): """Generate a rooted consensus tree""" # first ensure that all trees in the distribution have same number # of taxa, otherwise, make it so by dropping taxa not present in # all trees all_tip_names = [] # read in from distribution.tre phylogenies = [] phyloparse = Phylo.parse(os.path.join(outdir, 'distribution.tre'), 'newick') for p in phyloparse: phylogenies.append(p) for phylogeny in phylogenies: terminals = phylogeny.get_terminals() all_tip_names.append([e.name for e in terminals]) counted = Counter(sum(all_tip_names, [])) to_drop = [e for e in counted.keys() if counted[e] < len(phylogenies)] if (len(counted.keys()) - len(to_drop)) < 3: return False for tip_names, phylogeny in zip(all_tip_names, phylogenies): dropping = [e for e in tip_names if e in to_drop] for tip_name in dropping: phylogeny.prune(tip_name) with open('.for_consensus.tre', "w") as file: Phylo.write(phylogenies, file, 'newick') # create dendropy list trees = dp.TreeList() trees.read_from_path('.for_consensus.tre', "newick", rooting='force-rooted') os.remove('.for_consensus.tre') # https://groups.google.com/forum/#!topic/dendropy-users/iJ32ibnS5Bc sd = dp.SplitDistribution(taxon_namespace=trees.taxon_namespace) #sd.is_rooted = is_rooted tsum = dp.calculate.treesum.TreeSummarizer() tsum.count_splits_on_trees(trees, split_distribution=sd) #trees_splits_encoded=trees_splits_encoded) consensus = tsum.tree_from_splits(sd, min_freq=min_freq) consensus.write_to_path(os.path.join(outdir, 'consensus.tre'), "newick") return True
def standard_test ( self, informat, outformat, params ) : """ Standard testing procedure used by all tests. Arguments : informat ( string ) Input file format. outformat ( string ) Output file format. params ( string ) Arguments passed to the consensus tree tool. """ infile = '{}/f002.trees.{}'.format(informat.capitalize(), informat) outfile = 'tmp_test.tree' self.add_file_to_clean(outfile) # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(Phylo.parse(infile, informat))), 9) # Generate the consensus tree PhyloAssemble.get_consensus_tree(consense_exe, infile, informat, args=params, outfile=outfile, outfile_format=outformat) # Check the output self.assertTrue(os.path.isfile(outfile))
def test_newick_read_multiple(self): """Parse a Nexus file with multiple trees.""" trees = list(Phylo.parse(EX_NEXUS, 'nexus')) self.assertEqual(len(trees), 3) for tree in trees: self.assertEqual(len(tree.get_terminals()), 9)
def setUp(self): self.phylogenies = list(Phylo.parse(EX_PHYLO, 'phyloxml'))
def test_convert_phyloxml_text(self): """Write phyloxml to a text handle.""" trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") with tempfile.NamedTemporaryFile(mode="w") as out_handle: count = Phylo.write(trees, out_handle, "phyloxml") self.assertEqual(13, count)