def resolve_node(node): S = node.child_nodes() B = list_bipartitions(S) R = [] for b in B: if len(b) > 1: c = [x for x in S if not x in b] u = Node() v1 = Node() v2 = Node() for x in b: v1.add_child(x) for x in c: v2.add_child(x) u.add_child(v1) u.add_child(v2) R.append(Tree(seed_node=u).as_string("newick")) for x in S: node.add_child(x) return R
def graph2tree(G, root=0, names=[]): # assum G is acyclic seed_node = Node() seed_node.label = names[root] if names else str(root) T = Tree(seed_node=seed_node) n = len(G) node_refs = [None for i in range(n)] node_refs[root] = seed_node count = 1 curr_v = root stk = [root] while len(stk) > 0: curr_v = stk.pop() for v, length in G[curr_v]: if node_refs[v] is None: stk.append(v) new_node = Node() new_node.label = names[v] if names else str(v) node_refs[v] = new_node node_refs[curr_v].add_child(new_node) new_node.edge_length = length for node in T.leaf_node_iter(): node.taxon = T.taxon_namespace.new_taxon(label=node.label) return T
def simulateTreeTopology(n): # simulate a binary tree of n leaves leaves = [Node()] nodeOrder = [] myTree = Tree(seed_node = leaves[0]) for i in range(n-1): r = randint(0,i) a = Node() b = Node() p = leaves[r] p.add_child(a) p.add_child(b) leaves[r] = a leaves.append(b) nodeOrder.append(p) IDs = list(range(1,n+1)) i = 0 shuffle(IDs) for leaf in leaves: leaf.taxon = Taxon(label=str(IDs[i])) leaf.time = 0 i += 1 return myTree,nodeOrder
def extract_tree_with_taxa(tree, taxa, suppress_unifurcations=True): taxon_to_leaf = {} for n in tree.preorder_node_iter(): n.keep = False if n.is_leaf(): taxon_to_leaf[n.taxon] = n for t in taxa: for n in taxon_to_leaf[t].ancestor_iter(inclusive=True): n.keep = True out = Tree() q_old = Queue() q_old.put(tree.seed_node) q_new = Queue() q_new.put(out.seed_node) while not q_old.empty(): n_old = q_old.get() n_new = q_new.get() for c_old in n_old.child_node_iter(): if c_old.keep: c_new = Node(taxon=c_old.taxon, label=c_old.label, edge_length=c_old.edge_length) n_new.add_child(c_new) q_old.put(c_old) q_new.put(c_new) if suppress_unifurcations: out.suppress_unifurcations() return out
def resolve_polytomies(tree, update_splits=False, rng=None): """ Copied from more recent DendroPy than the version that we bundle... Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ _LOG.debug("start resolving polytomies") from dendropy import Node polytomies = [] if rng is None: rng = POLYTOMY_RNG for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) _LOG.debug("Found %d polytomies" % len(polytomies)) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: #if nc == 3 and node.parent_node is None: # continue to_attach = children[2:] for child in to_attach: node.remove_child(child) attachment_points = children[:2] + [node] while len(to_attach) > 0: next_child = to_attach.pop() next_sib = rng.sample(attachment_points, 1)[0] next_attachment = Node() next_attachment.edge.length = 0.0 p = next_sib.parent_node if p is None: c_list = list(next_sib.child_nodes()) next_sib.add_child(next_attachment) next_sib.add_child(next_child) for child in c_list: next_sib.remove_child(child) next_attachment.add_child(child) else: p.add_child(next_attachment) p.remove_child(next_sib) next_attachment.add_child(next_sib) next_attachment.add_child(next_child) attachment_points.append(next_attachment) _LOG.debug("polytomies resolution - updating splits") if update_splits: tree.update_splits() _LOG.debug("polytomies resolved.")
def resolve_polytomies(tree, update_splits=False, rng=None): """ Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ polytomies = [] for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: while len(children) > 2: nn1 = Node() nn1.edge.length = 0 if rng: sample = random.sample(children, 2) else: sample = [children[0], children[1]] c1 = sample[0] c2 = sample[1] node.remove_child(c1) node.remove_child(c2) nn1.add_child(c1) nn1.add_child(c2) node.add_child(nn1) children = node.child_nodes() if update_splits: tree.update_splits()
taxonomyTree = sys.argv[3] species = {} lines = open(speciesList, 'r') for line in lines: species[line.strip()] = line.strip() lines = open(taxonomyFile, 'r') header = lines.readline() nodes_dict = {} #Read first line, root node line = lines.readline() results = line.strip().split(',') tree = Tree() root = Node() root.__dict__['label'] = results[0].replace("\"", "") nodes_dict[results[0].replace("\"", "")] = root prune = ['1'] #Add root node to tree tree.__dict__['_seed_node'].add_child(root) for line in lines: results = line.strip().split(',') node = Node() node.__dict__['label'] = results[0].replace("\"", "") node.taxon = Taxon(results[0].replace("\"", "")) nodes_dict[results[0].replace("\"", "")] = node nodes_dict[results[1].replace("\"", "")].add_child(node) if results[0].replace("\"", "") not in species:
def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % (str(s), p, o)) out.write('%s\n' % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len( parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len( parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
treefile = argv[1] infofile = argv[2] outfile = argv[3] with open(infofile, 'r') as f: mapping = {} for line in f: taxa = line.split() mapping[taxa[0]] = taxa[1:] myTree = Tree.get_from_path(treefile, "newick") leaves = list(myTree.leaf_node_iter()) for node in leaves: if node.taxon.label in mapping: new_node = Node(edge_length=node.edge_length) pNode = node.parent_node pNode.remove_child(node) pNode.add_child(new_node) new_node.add_child(node) pNode = new_node for taxon_name in mapping[node.taxon.label]: new_taxon = Taxon(label=taxon_name) myTree.taxon_namespace.add_taxon(new_taxon) new_node = Node(edge_length=0, taxon=new_taxon) pNode.add_child(new_node) myTree.write_to_path(outfile, "newick")
def get_super_tree(self, superTree_method, **args): def parse_trees(**args): n_tree, n_branch = float(len(self.data['trees'])), {} for mt_id, mt in enumerate(self.data['trees']): w = (float(len(mt.tre.leaf_nodes())) / len(self.data['taxa']))**2 for node in mt.tre.preorder_node_iter(): if node.barcode not in n_branch: n_branch[node.barcode] = [[w, mt_id, node]] else: n_branch[node.barcode].append([w, mt_id, node]) return n_tree, n_branch def consensus(self, **args): n_tree, n_branch = parse_trees(**args) n_branch = sorted([[len(v) / n_tree, k, v] for k, v in n_branch.iteritems()], reverse=True) consensus_tree = [] for posterior, branch, nodes in n_branch: for cbr, _, _ in consensus_tree: b1, b2 = sorted([branch, cbr]) if not (((b1 & b2) == b1) or ((b1 & (~b2)) == b1)): branch = 0 break if branch: consensus_tree.append([branch, posterior, nodes]) return sorted(consensus_tree, reverse=True) def MCC(self, **args): n_tree, n_branch = parse_trees(**args) for mt_id, mt in enumerate(self.data['trees']): if len(mt.tre.leaf_nodes()) == len(self.data['taxa']): mt.score = np.sum([ len(n_branch[node.barcode]) for node in mt.tre.preorder_node_iter() ]) tre = max(self.data['trees'], key=lambda x: x.score).tre return [[ n.barcode, len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode] ] for n in tre.preorder_node_iter()] def load_subtree(self, treeLabel, **args): n_tree, n_branch = parse_trees(**args) for mt_id, mt in enumerate(self.data['trees']): if mt.tre.label == treeLabel: tre = mt.tre break return [[ n.barcode, len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode], n.age, n.edge_length ] for n in tre.preorder_node_iter()] #def ASTRID(self, **args) : #from dendropy import PhylogeneticDistanceMatrix def load_tree(self, consFile=None, **args): n_tree, n_branch = parse_trees(**args) with open(consFile) as fin: schema = 'nexus' if fin.readline().upper().startswith( '#NEXUS') else 'newick' for tre in Tree.yield_from_files([consFile], schema=schema): break internal_id = n_taxa = len(self.data['taxa']) digit_code = np.power(2, np.arange(n_taxa, dtype='object')) for node in tre.postorder_node_iter(): if node.is_leaf(): node.id = self.data['taxa'][node.taxon.label] node.barcode = digit_code[node.id] else: node.id, internal_id = internal_id, internal_id + 1 node.barcode = sum([c.barcode for c in node.child_nodes()]) tre.seed_node.age = tre.seed_node.distance_from_tip() for node in tre.preorder_node_iter(): if node.parent_node: node.age = node.parent_node.age - node.edge_length return [[ n.barcode, len(n_branch.get(n.barcode, [])) / n_tree, n_branch.get(n.barcode, []), n.age, n.edge_length ] for n in tre.preorder_node_iter()] if superTree_method in ('MCC', 'ASTRID', 'consensus'): branches = locals()[superTree_method](self, **args) elif os.path.isfile(superTree_method): branches = load_tree(self, consFile=superTree_method, **args) else: branches = load_subtree(self, treeLabel=superTree_method, **args) supertree = Tree() sn = supertree.seed_node sn.barcode, sn.posterior = branches[0][0], branches[0][1] sn.age = branches[0][3] if len(branches[0]) > 3 else np.sum( [n[2].age * n[0] for n in branches[0][2]]) / np.sum([n[0] for n in branches[0][2]]) sn.contain = [[b[0], b[1], b[2].id] for b in branches[0][2]] for br in branches[1:]: cbr, posterior, nodes = br[:3] while (sn.barcode & cbr) != cbr: sn = sn.parent_node new_node = Node() if len(nodes) == 0 or ( not nodes[0][2].taxon) else Node(taxon=Taxon( label=nodes[0][2].taxon.label)) sn.add_child(new_node) sn = new_node sn.barcode, sn.posterior = cbr, posterior sn.contain = [[b[0], b[1], b[2].id] for b in nodes] if len(br) <= 3: sn.edge_length = 0.0 if len(nodes) == 0 else np.sum( [n[2].edge_length * n[0] for n in nodes]) / np.sum([n[0] for n in nodes]) sn.age = sn.parent_node.age if len(nodes) == 0 else np.sum( [n[2].age * n[0] for n in nodes]) / np.sum([n[0] for n in nodes]) else: sn.age, sn.edge_length = br[3:] internal_id = len(self.data['taxa']) for node in supertree.postorder_node_iter(): if node.is_leaf(): node.id = self.data['taxa'][node.taxon.label] else: node.id = internal_id internal_id += 1 return MetaTree(supertree)