def reroot_at_midpoint(self, update_splits=False, delete_outdegree_one=True): ''' Modified from the source code of Dendropy v3.12.0. ''' pdm = treecalc.PatristicDistanceMatrix(self._tree) n1,n2 = pdm.max_dist_nodes plen = float(pdm.max_dist)/2 mrca_node = pdm.mrca(n1.taxon, n2.taxon) cur_node = n1 break_on_node = None target_edge = None head_node_edge_len = None while cur_node is not mrca_node: if cur_node.edge.length > plen: target_edge = cur_node.edge head_node_edge_len = plen plen = 0 break elif abs(cur_node.edge.length - plen) < 1e-6: break_on_node = cur_node.parent_node break else: plen -= cur_node.edge.length cur_node = cur_node.parent_node assert break_on_node is not None or target_edge is not None if break_on_node: self._tree.reseed_at(break_on_node, update_splits=False, delete_outdegree_one=delete_outdegree_one) else: tail_node_edge_len = target_edge.length - head_node_edge_len old_head_node = target_edge.head_node old_tail_node = target_edge.tail_node old_tail_node.remove_child(old_head_node) new_seed_node = Node() new_seed_node.add_child(old_head_node, edge_length =head_node_edge_len) old_tail_node.add_child(new_seed_node, edge_length = tail_node_edge_len) self._tree.reseed_at(new_seed_node, update_splits=False, delete_outdegree_one=delete_outdegree_one) self._tree.is_rooted = True if update_splits: self._tree.update_splits(delete_outdegree_one = False) return self._tree.seed_node
def get_dendropy_tree(self, cluster_tree): consensus_tree_dendropy = Tree() #Rozró¿nienie liœci od wêz³ów i zapisanie ich do dwóch osobnych tablic leafs = [] nodes = [] for cluster in cluster_tree.get_cluster_list(): if cluster.taxon is not None: leafs.append(cluster) if cluster.taxon is None: nodes.append(cluster) #Posortowanie listy wêz³ów po to, aby zacz¹æ przeszukiwaæ je od tych, które maj¹ najmniej liœci i budowaæ drzewo od do³u nodes.sort(key=lambda x: len(x.clusters)) #Tablica tymczasowych utworzonych ju¿ wêz³ów, z których budowane jest drzewo created_nodes = [] for node in nodes: #Dla ka¿ego znalezionego wêz³a, tworzony jest wêze³ drzewa # a nastêpnie dodawane s¹ do niego jego liœcie created_node = Node() for leaf in node.clusters: #Je¿eli jego liœci nie jest na liœcie lisci, oznacza to, ¿e zosta³ ju¿ wczeœniej zu¿yty #Czyli znajduje siê ju¿ w tymczasowym wêŸle i nale¿y jako dziecko dodaæ ten tymczasowy wêze³ if not self.is_leaf_in_leafs(leaf,leafs): # Nale¿y znaleŸæ stworzony wêze³ z liœciem i dodaæ go jako dziecko do nowego wêz³a # A zu¿yty wêze³ usun¹æ sub_created_node, created_nodes = self.find_created_node_with_leaf(created_nodes, leaf) if sub_created_node is not None: created_node.add_child(sub_created_node) else: #je¿eli liœc nie zosta³ jeszcze zu¿yty, to nale¿y go dodaæ jako dziecko wêz³a # i usun¹æ z listy liœci created_node.add_child(leaf) leafs = self.remove_leaf_from_list(leaf,leafs) created_nodes.append(created_node) # Finalnie, wêze³ ze wszystkimi liœciami oraz wêz³ami bêdzie na pierwszym i jedynym miejscu w liœcie tymczasowych tree = Tree(seed_node=created_nodes[0]) return tree
def get_dendropy_tree(self, cluster_tree): consensus_tree_dendropy = Tree() #Rozr�nienie li�ci od w�z��w i zapisanie ich do dw�ch osobnych tablic leafs = [] nodes = [] for cluster in cluster_tree.get_cluster_list(): if cluster.taxon is not None: leafs.append(cluster) if cluster.taxon is None: nodes.append(cluster) #Posortowanie listy w�z��w po to, aby zacz�� przeszukiwa� je od tych, kt�re maj� najmniej li�ci i budowa� drzewo od do�u nodes.sort(key=lambda x: len(x.clusters)) #Tablica tymczasowych utworzonych ju� w�z��w, z kt�rych budowane jest drzewo created_nodes = [] for node in nodes: #Dla ka�ego znalezionego w�z�a, tworzony jest w�ze� drzewa # a nast�pnie dodawane s� do niego jego li�cie created_node = Node() for leaf in node.clusters: #Je�eli jego li�ci nie jest na li�cie lisci, oznacza to, �e zosta� ju� wcze�niej zu�yty #Czyli znajduje si� ju� w tymczasowym w�le i nale�y jako dziecko doda� ten tymczasowy w�ze� if not self.is_leaf_in_leafs(leaf,leafs): # Nale�y znale�� stworzony w�ze� z li�ciem i doda� go jako dziecko do nowego w�z�a # A zu�yty w�ze� usun�� sub_created_node, created_nodes = self.find_created_node_with_leaf(created_nodes, leaf) if sub_created_node is not None: created_node.add_child(sub_created_node) else: #je�eli li�c nie zosta� jeszcze zu�yty, to nale�y go doda� jako dziecko w�z�a # i usun�� z listy li�ci created_node.add_child(leaf) leafs = self.remove_leaf_from_list(leaf,leafs) created_nodes.append(created_node) # Finalnie, w�ze� ze wszystkimi li�ciami oraz w�z�ami b�dzie na pierwszym i jedynym miejscu w li�cie tymczasowych tree = Tree(seed_node=created_nodes[0]) return tree
def resolve_node(node): S = node.child_nodes() B = list_bipartitions(S) R = [] for b in B: if len(b) > 1: c = [x for x in S if not x in b] u = Node() v1 = Node() v2 = Node() for x in b: v1.add_child(x) for x in c: v2.add_child(x) u.add_child(v1) u.add_child(v2) R.append(Tree(seed_node=u).as_string("newick")) for x in S: node.add_child(x) return R
def resolve_polytomies(tree, update_splits=False, rng=None): """ Copied from more recent DendroPy than the version that we bundle... Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ _LOG.debug("start resolving polytomies") from dendropy import Node polytomies = [] if rng is None: rng = POLYTOMY_RNG for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) _LOG.debug("Found %d polytomies" % len(polytomies)) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: #if nc == 3 and node.parent_node is None: # continue to_attach = children[2:] for child in to_attach: node.remove_child(child) attachment_points = children[:2] + [node] while len(to_attach) > 0: next_child = to_attach.pop() next_sib = rng.sample(attachment_points, 1)[0] next_attachment = Node() next_attachment.edge.length = 0.0 p = next_sib.parent_node if p is None: c_list = list(next_sib.child_nodes()) next_sib.add_child(next_attachment) next_sib.add_child(next_child) for child in c_list: next_sib.remove_child(child) next_attachment.add_child(child) else: p.add_child(next_attachment) p.remove_child(next_sib) next_attachment.add_child(next_sib) next_attachment.add_child(next_child) attachment_points.append(next_attachment) _LOG.debug("polytomies resolution - updating splits") if update_splits: tree.update_splits() _LOG.debug("polytomies resolved.")
def resolve_polytomies(tree, update_splits=False, rng=None): """ Copied from more recent DendroPy than the version that we bundle... Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ _LOG.debug("start resolving polytomies") from dendropy import Node polytomies = [] if rng is None: rng = POLYTOMY_RNG for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) _LOG.debug("Found %d polytomies" %len(polytomies)) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: #if nc == 3 and node.parent_node is None: # continue to_attach = children[2:] for child in to_attach: node.remove_child(child) attachment_points = children[:2] + [node] while len(to_attach) > 0: next_child = to_attach.pop() next_sib = rng.sample(attachment_points, 1)[0] next_attachment = Node() next_attachment.edge.length = 0.0 p = next_sib.parent_node if p is None: c_list = list(next_sib.child_nodes()) next_sib.add_child(next_attachment) next_sib.add_child(next_child) for child in c_list: next_sib.remove_child(child) next_attachment.add_child(child) else: p.add_child(next_attachment) p.remove_child(next_sib) next_attachment.add_child(next_sib) next_attachment.add_child(next_child) attachment_points.append(next_attachment) _LOG.debug("polytomies resolution - updating splits") if update_splits: tree.update_splits() _LOG.debug("polytomies resolved.")
def resolve_polytomies(tree, update_splits=False, rng=None): """ Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ polytomies = [] for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: while len(children) > 2: nn1 = Node() nn1.edge.length = 0 if rng: sample = random.sample(children, 2) else: sample = [children[0], children[1]] c1 = sample[0] c2 = sample[1] node.remove_child(c1) node.remove_child(c2) nn1.add_child(c1) nn1.add_child(c2) node.add_child(nn1) children = node.child_nodes() if update_splits: tree.update_splits()
def resolve_polytomies(tree, update_splits=False, rng=None): """ Arbitrarily resolve polytomies using 0-length splits. If `rng` is an object with a sample() method then the polytomy will be resolved by sequentially adding (generating all tree topologies equiprobably rng.sample() should behave like random.sample() If `rng` is not passed in, then polytomy is broken deterministically by repeatedly joining pairs of children. """ polytomies = [] for node in tree.postorder_node_iter(): if len(node.child_nodes()) > 2: polytomies.append(node) for node in polytomies: children = node.child_nodes() nc = len(children) if nc > 2: while len(children) > 2: nn1 = Node() nn1.edge.length = 0 if rng: sample = random.sample(children,2) else: sample = [children[0], children[1]] c1 = sample[0] c2 = sample[1] node.remove_child(c1) node.remove_child(c2) nn1.add_child(c1) nn1.add_child(c2) node.add_child(nn1) children = node.child_nodes() if update_splits: tree.update_splits()
def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % ( str(s), p, o)) out.write('%s\n' % ( str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(parentless) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % (str(s), p, o)) out.write('%s\n' % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len( parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len( parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
def rdf2dendropyTree(filepath): from rdflib.graph import Graph from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = Graph() graph.parse(filepath) nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open("parse_rdf.txt", "w") taxon_set = TaxonSet() OBO = Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: # print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write("%s %s %s\n" % (str(s), p, o)) out.write("%s\n" % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = ( "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless) ) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: sys.exit("no parentless") return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
treefile = argv[1] infofile = argv[2] outfile = argv[3] with open(infofile, 'r') as f: mapping = {} for line in f: taxa = line.split() mapping[taxa[0]] = taxa[1:] myTree = Tree.get_from_path(treefile, "newick") leaves = list(myTree.leaf_node_iter()) for node in leaves: if node.taxon.label in mapping: new_node = Node(edge_length=node.edge_length) pNode = node.parent_node pNode.remove_child(node) pNode.add_child(new_node) new_node.add_child(node) pNode = new_node for taxon_name in mapping[node.taxon.label]: new_taxon = Taxon(label=taxon_name) myTree.taxon_namespace.add_taxon(new_taxon) new_node = Node(edge_length=0, taxon=new_taxon) pNode.add_child(new_node) myTree.write_to_path(outfile, "newick")
parser.add_argument("-od",type=float,default=0,required=False,help="Outgroup branch length") parser.add_argument("-i",type=str,default="infile.tree",required=True,help="Input Newick tree file") parser.add_argument("-o",type=str,default="outtree.tree",required=False,help="Output Newick tree file") args = parser.parse_args() trees=TreeList.get_from_path(args.i,schema="newick",rooting="force-rooted") if args.gt != 0: print "Scaling branch lengths to time with generation time %d\n" % args.gt for tree in trees: for edge in tree.preorder_edge_iter(): #print "DEBUG: %s" % edge.length if edge.length != None: edge.length=edge.length/args.gt if args.od != 0: print "Adding outgroup with branch length %d\n" % args.od namespace=trees.taxon_namespace outgroup= Taxon("outgroup") namespace.add_taxon(outgroup) ntree=0 labels=namespace.labels() labels.remove("outgroup") for tree in trees: outgroup_node=Node(taxon=outgroup,edge_length=args.od) new_root_node=Node() tree.seed_node.edge_length=args.od-tree.seed_node.distance_from_tip() new_root_node.add_child(tree.seed_node) new_root_node.add_child(outgroup_node) tree.seed_node=new_root_node trees.write(path=args.o,schema="newick",suppress_rooting=True)