def simulateTreeTopology(n): # simulate a binary tree of n leaves leaves = [Node()] nodeOrder = [] myTree = Tree(seed_node = leaves[0]) for i in range(n-1): r = randint(0,i) a = Node() b = Node() p = leaves[r] p.add_child(a) p.add_child(b) leaves[r] = a leaves.append(b) nodeOrder.append(p) IDs = list(range(1,n+1)) i = 0 shuffle(IDs) for leaf in leaves: leaf.taxon = Taxon(label=str(IDs[i])) leaf.time = 0 i += 1 return myTree,nodeOrder
def extract_tree_with_taxa(tree, taxa, suppress_unifurcations=True): taxon_to_leaf = {} for n in tree.preorder_node_iter(): n.keep = False if n.is_leaf(): taxon_to_leaf[n.taxon] = n for t in taxa: for n in taxon_to_leaf[t].ancestor_iter(inclusive=True): n.keep = True out = Tree() q_old = Queue() q_old.put(tree.seed_node) q_new = Queue() q_new.put(out.seed_node) while not q_old.empty(): n_old = q_old.get() n_new = q_new.get() for c_old in n_old.child_node_iter(): if c_old.keep: c_new = Node(taxon=c_old.taxon, label=c_old.label, edge_length=c_old.edge_length) n_new.add_child(c_new) q_old.put(c_old) q_new.put(c_new) if suppress_unifurcations: out.suppress_unifurcations() return out
def resolve_polytomies(tree, update_splits=False, rng=None): """ Copied from more recent DendroPy than the version that we bundle... Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ _LOG.debug("start resolving polytomies") from dendropy import Node polytomies = [] if rng is None: rng = POLYTOMY_RNG for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) _LOG.debug("Found %d polytomies" % len(polytomies)) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: #if nc == 3 and node.parent_node is None: # continue to_attach = children[2:] for child in to_attach: node.remove_child(child) attachment_points = children[:2] + [node] while len(to_attach) > 0: next_child = to_attach.pop() next_sib = rng.sample(attachment_points, 1)[0] next_attachment = Node() next_attachment.edge.length = 0.0 p = next_sib.parent_node if p is None: c_list = list(next_sib.child_nodes()) next_sib.add_child(next_attachment) next_sib.add_child(next_child) for child in c_list: next_sib.remove_child(child) next_attachment.add_child(child) else: p.add_child(next_attachment) p.remove_child(next_sib) next_attachment.add_child(next_sib) next_attachment.add_child(next_child) attachment_points.append(next_attachment) _LOG.debug("polytomies resolution - updating splits") if update_splits: tree.update_splits() _LOG.debug("polytomies resolved.")
def reroot_at_midpoint(self, update_splits=False, delete_outdegree_one=True): ''' Modified from the source code of Dendropy v3.12.0. ''' pdm = treecalc.PatristicDistanceMatrix(self._tree) n1,n2 = pdm.max_dist_nodes plen = float(pdm.max_dist)/2 mrca_node = pdm.mrca(n1.taxon, n2.taxon) cur_node = n1 break_on_node = None target_edge = None head_node_edge_len = None while cur_node is not mrca_node: if cur_node.edge.length > plen: target_edge = cur_node.edge head_node_edge_len = plen plen = 0 break elif abs(cur_node.edge.length - plen) < 1e-6: break_on_node = cur_node.parent_node break else: plen -= cur_node.edge.length cur_node = cur_node.parent_node assert break_on_node is not None or target_edge is not None if break_on_node: self._tree.reseed_at(break_on_node, update_splits=False, delete_outdegree_one=delete_outdegree_one) else: tail_node_edge_len = target_edge.length - head_node_edge_len old_head_node = target_edge.head_node old_tail_node = target_edge.tail_node old_tail_node.remove_child(old_head_node) new_seed_node = Node() new_seed_node.add_child(old_head_node, edge_length =head_node_edge_len) old_tail_node.add_child(new_seed_node, edge_length = tail_node_edge_len) self._tree.reseed_at(new_seed_node, update_splits=False, delete_outdegree_one=delete_outdegree_one) self._tree.is_rooted = True if update_splits: self._tree.update_splits(delete_outdegree_one = False) return self._tree.seed_node
def graph2tree(G, root=0, names=[]): # assum G is acyclic seed_node = Node() seed_node.label = names[root] if names else str(root) T = Tree(seed_node=seed_node) n = len(G) node_refs = [None for i in range(n)] node_refs[root] = seed_node count = 1 curr_v = root stk = [root] while len(stk) > 0: curr_v = stk.pop() for v, length in G[curr_v]: if node_refs[v] is None: stk.append(v) new_node = Node() new_node.label = names[v] if names else str(v) node_refs[v] = new_node node_refs[curr_v].add_child(new_node) new_node.edge_length = length for node in T.leaf_node_iter(): node.taxon = T.taxon_namespace.new_taxon(label=node.label) return T
def resolve_polytomies(tree, update_splits=False, rng=None): """ Copied from more recent DendroPy than the version that we bundle... Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ _LOG.debug("start resolving polytomies") from dendropy import Node polytomies = [] if rng is None: rng = POLYTOMY_RNG for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) _LOG.debug("Found %d polytomies" %len(polytomies)) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: #if nc == 3 and node.parent_node is None: # continue to_attach = children[2:] for child in to_attach: node.remove_child(child) attachment_points = children[:2] + [node] while len(to_attach) > 0: next_child = to_attach.pop() next_sib = rng.sample(attachment_points, 1)[0] next_attachment = Node() next_attachment.edge.length = 0.0 p = next_sib.parent_node if p is None: c_list = list(next_sib.child_nodes()) next_sib.add_child(next_attachment) next_sib.add_child(next_child) for child in c_list: next_sib.remove_child(child) next_attachment.add_child(child) else: p.add_child(next_attachment) p.remove_child(next_sib) next_attachment.add_child(next_sib) next_attachment.add_child(next_child) attachment_points.append(next_attachment) _LOG.debug("polytomies resolution - updating splits") if update_splits: tree.update_splits() _LOG.debug("polytomies resolved.")
def get_dendropy_tree(self, cluster_tree): consensus_tree_dendropy = Tree() #Rozró¿nienie liœci od wêz³ów i zapisanie ich do dwóch osobnych tablic leafs = [] nodes = [] for cluster in cluster_tree.get_cluster_list(): if cluster.taxon is not None: leafs.append(cluster) if cluster.taxon is None: nodes.append(cluster) #Posortowanie listy wêz³ów po to, aby zacz¹æ przeszukiwaæ je od tych, które maj¹ najmniej liœci i budowaæ drzewo od do³u nodes.sort(key=lambda x: len(x.clusters)) #Tablica tymczasowych utworzonych ju¿ wêz³ów, z których budowane jest drzewo created_nodes = [] for node in nodes: #Dla ka¿ego znalezionego wêz³a, tworzony jest wêze³ drzewa # a nastêpnie dodawane s¹ do niego jego liœcie created_node = Node() for leaf in node.clusters: #Je¿eli jego liœci nie jest na liœcie lisci, oznacza to, ¿e zosta³ ju¿ wczeœniej zu¿yty #Czyli znajduje siê ju¿ w tymczasowym wêŸle i nale¿y jako dziecko dodaæ ten tymczasowy wêze³ if not self.is_leaf_in_leafs(leaf,leafs): # Nale¿y znaleŸæ stworzony wêze³ z liœciem i dodaæ go jako dziecko do nowego wêz³a # A zu¿yty wêze³ usun¹æ sub_created_node, created_nodes = self.find_created_node_with_leaf(created_nodes, leaf) if sub_created_node is not None: created_node.add_child(sub_created_node) else: #je¿eli liœc nie zosta³ jeszcze zu¿yty, to nale¿y go dodaæ jako dziecko wêz³a # i usun¹æ z listy liœci created_node.add_child(leaf) leafs = self.remove_leaf_from_list(leaf,leafs) created_nodes.append(created_node) # Finalnie, wêze³ ze wszystkimi liœciami oraz wêz³ami bêdzie na pierwszym i jedynym miejscu w liœcie tymczasowych tree = Tree(seed_node=created_nodes[0]) return tree
def get_dendropy_tree(self, cluster_tree): consensus_tree_dendropy = Tree() #Rozr�nienie li�ci od w�z��w i zapisanie ich do dw�ch osobnych tablic leafs = [] nodes = [] for cluster in cluster_tree.get_cluster_list(): if cluster.taxon is not None: leafs.append(cluster) if cluster.taxon is None: nodes.append(cluster) #Posortowanie listy w�z��w po to, aby zacz�� przeszukiwa� je od tych, kt�re maj� najmniej li�ci i budowa� drzewo od do�u nodes.sort(key=lambda x: len(x.clusters)) #Tablica tymczasowych utworzonych ju� w�z��w, z kt�rych budowane jest drzewo created_nodes = [] for node in nodes: #Dla ka�ego znalezionego w�z�a, tworzony jest w�ze� drzewa # a nast�pnie dodawane s� do niego jego li�cie created_node = Node() for leaf in node.clusters: #Je�eli jego li�ci nie jest na li�cie lisci, oznacza to, �e zosta� ju� wcze�niej zu�yty #Czyli znajduje si� ju� w tymczasowym w�le i nale�y jako dziecko doda� ten tymczasowy w�ze� if not self.is_leaf_in_leafs(leaf,leafs): # Nale�y znale�� stworzony w�ze� z li�ciem i doda� go jako dziecko do nowego w�z�a # A zu�yty w�ze� usun�� sub_created_node, created_nodes = self.find_created_node_with_leaf(created_nodes, leaf) if sub_created_node is not None: created_node.add_child(sub_created_node) else: #je�eli li�c nie zosta� jeszcze zu�yty, to nale�y go doda� jako dziecko w�z�a # i usun�� z listy li�ci created_node.add_child(leaf) leafs = self.remove_leaf_from_list(leaf,leafs) created_nodes.append(created_node) # Finalnie, w�ze� ze wszystkimi li�ciami oraz w�z�ami b�dzie na pierwszym i jedynym miejscu w li�cie tymczasowych tree = Tree(seed_node=created_nodes[0]) return tree
def resolve_polytomies(tree, update_splits=False, rng=None): """ Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ polytomies = [] for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: while len(children) > 2: nn1 = Node() nn1.edge.length = 0 if rng: sample = random.sample(children, 2) else: sample = [children[0], children[1]] c1 = sample[0] c2 = sample[1] node.remove_child(c1) node.remove_child(c2) nn1.add_child(c1) nn1.add_child(c2) node.add_child(nn1) children = node.child_nodes() if update_splits: tree.update_splits()
def resolve_polytomies(tree, update_splits=False, rng=None): """ Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ polytomies = [] for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: while len(children) > 2: nn1 = Node() nn1.edge.length = 0 if rng: sample = random.sample(children,2) else: sample = [children[0], children[1]] c1 = sample[0] c2 = sample[1] node.remove_child(c1) node.remove_child(c2) nn1.add_child(c1) nn1.add_child(c2) node.add_child(nn1) children = node.child_nodes() if update_splits: tree.update_splits()
def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % ( str(s), p, o)) out.write('%s\n' % ( str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(parentless) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % (str(s), p, o)) out.write('%s\n' % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len( parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len( parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
def resolve_node(node): S = node.child_nodes() B = list_bipartitions(S) R = [] for b in B: if len(b) > 1: c = [x for x in S if not x in b] u = Node() v1 = Node() v2 = Node() for x in b: v1.add_child(x) for x in c: v2.add_child(x) u.add_child(v1) u.add_child(v2) R.append(Tree(seed_node=u).as_string("newick")) for x in S: node.add_child(x) return R
treefile = argv[1] infofile = argv[2] outfile = argv[3] with open(infofile, 'r') as f: mapping = {} for line in f: taxa = line.split() mapping[taxa[0]] = taxa[1:] myTree = Tree.get_from_path(treefile, "newick") leaves = list(myTree.leaf_node_iter()) for node in leaves: if node.taxon.label in mapping: new_node = Node(edge_length=node.edge_length) pNode = node.parent_node pNode.remove_child(node) pNode.add_child(new_node) new_node.add_child(node) pNode = new_node for taxon_name in mapping[node.taxon.label]: new_taxon = Taxon(label=taxon_name) myTree.taxon_namespace.add_taxon(new_taxon) new_node = Node(edge_length=0, taxon=new_taxon) pNode.add_child(new_node) myTree.write_to_path(outfile, "newick")
parser.add_argument("-od",type=float,default=0,required=False,help="Outgroup branch length") parser.add_argument("-i",type=str,default="infile.tree",required=True,help="Input Newick tree file") parser.add_argument("-o",type=str,default="outtree.tree",required=False,help="Output Newick tree file") args = parser.parse_args() trees=TreeList.get_from_path(args.i,schema="newick",rooting="force-rooted") if args.gt != 0: print "Scaling branch lengths to time with generation time %d\n" % args.gt for tree in trees: for edge in tree.preorder_edge_iter(): #print "DEBUG: %s" % edge.length if edge.length != None: edge.length=edge.length/args.gt if args.od != 0: print "Adding outgroup with branch length %d\n" % args.od namespace=trees.taxon_namespace outgroup= Taxon("outgroup") namespace.add_taxon(outgroup) ntree=0 labels=namespace.labels() labels.remove("outgroup") for tree in trees: outgroup_node=Node(taxon=outgroup,edge_length=args.od) new_root_node=Node() tree.seed_node.edge_length=args.od-tree.seed_node.distance_from_tip() new_root_node.add_child(tree.seed_node) new_root_node.add_child(outgroup_node) tree.seed_node=new_root_node trees.write(path=args.o,schema="newick",suppress_rooting=True)
def get_super_tree(self, superTree_method, **args): def parse_trees(**args): n_tree, n_branch = float(len(self.data['trees'])), {} for mt_id, mt in enumerate(self.data['trees']): w = (float(len(mt.tre.leaf_nodes())) / len(self.data['taxa']))**2 for node in mt.tre.preorder_node_iter(): if node.barcode not in n_branch: n_branch[node.barcode] = [[w, mt_id, node]] else: n_branch[node.barcode].append([w, mt_id, node]) return n_tree, n_branch def consensus(self, **args): n_tree, n_branch = parse_trees(**args) n_branch = sorted([[len(v) / n_tree, k, v] for k, v in n_branch.iteritems()], reverse=True) consensus_tree = [] for posterior, branch, nodes in n_branch: for cbr, _, _ in consensus_tree: b1, b2 = sorted([branch, cbr]) if not (((b1 & b2) == b1) or ((b1 & (~b2)) == b1)): branch = 0 break if branch: consensus_tree.append([branch, posterior, nodes]) return sorted(consensus_tree, reverse=True) def MCC(self, **args): n_tree, n_branch = parse_trees(**args) for mt_id, mt in enumerate(self.data['trees']): if len(mt.tre.leaf_nodes()) == len(self.data['taxa']): mt.score = np.sum([ len(n_branch[node.barcode]) for node in mt.tre.preorder_node_iter() ]) tre = max(self.data['trees'], key=lambda x: x.score).tre return [[ n.barcode, len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode] ] for n in tre.preorder_node_iter()] def load_subtree(self, treeLabel, **args): n_tree, n_branch = parse_trees(**args) for mt_id, mt in enumerate(self.data['trees']): if mt.tre.label == treeLabel: tre = mt.tre break return [[ n.barcode, len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode], n.age, n.edge_length ] for n in tre.preorder_node_iter()] #def ASTRID(self, **args) : #from dendropy import PhylogeneticDistanceMatrix def load_tree(self, consFile=None, **args): n_tree, n_branch = parse_trees(**args) with open(consFile) as fin: schema = 'nexus' if fin.readline().upper().startswith( '#NEXUS') else 'newick' for tre in Tree.yield_from_files([consFile], schema=schema): break internal_id = n_taxa = len(self.data['taxa']) digit_code = np.power(2, np.arange(n_taxa, dtype='object')) for node in tre.postorder_node_iter(): if node.is_leaf(): node.id = self.data['taxa'][node.taxon.label] node.barcode = digit_code[node.id] else: node.id, internal_id = internal_id, internal_id + 1 node.barcode = sum([c.barcode for c in node.child_nodes()]) tre.seed_node.age = tre.seed_node.distance_from_tip() for node in tre.preorder_node_iter(): if node.parent_node: node.age = node.parent_node.age - node.edge_length return [[ n.barcode, len(n_branch.get(n.barcode, [])) / n_tree, n_branch.get(n.barcode, []), n.age, n.edge_length ] for n in tre.preorder_node_iter()] if superTree_method in ('MCC', 'ASTRID', 'consensus'): branches = locals()[superTree_method](self, **args) elif os.path.isfile(superTree_method): branches = load_tree(self, consFile=superTree_method, **args) else: branches = load_subtree(self, treeLabel=superTree_method, **args) supertree = Tree() sn = supertree.seed_node sn.barcode, sn.posterior = branches[0][0], branches[0][1] sn.age = branches[0][3] if len(branches[0]) > 3 else np.sum( [n[2].age * n[0] for n in branches[0][2]]) / np.sum([n[0] for n in branches[0][2]]) sn.contain = [[b[0], b[1], b[2].id] for b in branches[0][2]] for br in branches[1:]: cbr, posterior, nodes = br[:3] while (sn.barcode & cbr) != cbr: sn = sn.parent_node new_node = Node() if len(nodes) == 0 or ( not nodes[0][2].taxon) else Node(taxon=Taxon( label=nodes[0][2].taxon.label)) sn.add_child(new_node) sn = new_node sn.barcode, sn.posterior = cbr, posterior sn.contain = [[b[0], b[1], b[2].id] for b in nodes] if len(br) <= 3: sn.edge_length = 0.0 if len(nodes) == 0 else np.sum( [n[2].edge_length * n[0] for n in nodes]) / np.sum([n[0] for n in nodes]) sn.age = sn.parent_node.age if len(nodes) == 0 else np.sum( [n[2].age * n[0] for n in nodes]) / np.sum([n[0] for n in nodes]) else: sn.age, sn.edge_length = br[3:] internal_id = len(self.data['taxa']) for node in supertree.postorder_node_iter(): if node.is_leaf(): node.id = self.data['taxa'][node.taxon.label] else: node.id = internal_id internal_id += 1 return MetaTree(supertree)
taxonomyTree = sys.argv[3] species = {} lines = open(speciesList, 'r') for line in lines: species[line.strip()] = line.strip() lines = open(taxonomyFile, 'r') header = lines.readline() nodes_dict = {} #Read first line, root node line = lines.readline() results = line.strip().split(',') tree = Tree() root = Node() root.__dict__['label'] = results[0].replace("\"", "") nodes_dict[results[0].replace("\"", "")] = root prune = ['1'] #Add root node to tree tree.__dict__['_seed_node'].add_child(root) for line in lines: results = line.strip().split(',') node = Node() node.__dict__['label'] = results[0].replace("\"", "") node.taxon = Taxon(results[0].replace("\"", "")) nodes_dict[results[0].replace("\"", "")] = node nodes_dict[results[1].replace("\"", "")].add_child(node) if results[0].replace("\"", "") not in species:
def rdf2dendropyTree(filepath): from rdflib.graph import Graph from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = Graph() graph.parse(filepath) nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open("parse_rdf.txt", "w") taxon_set = TaxonSet() OBO = Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: # print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write("%s %s %s\n" % (str(s), p, o)) out.write("%s\n" % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = ( "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless) ) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: sys.exit("no parentless") return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree