Example #1
0
File: tree.py Project: czli/Canopy
	def reroot_at_midpoint(self, update_splits=False, delete_outdegree_one=True):
		'''
			Modified from the source code of Dendropy v3.12.0.
		'''	
		pdm = treecalc.PatristicDistanceMatrix(self._tree)
		n1,n2 = pdm.max_dist_nodes
		plen = float(pdm.max_dist)/2
		mrca_node = pdm.mrca(n1.taxon, n2.taxon)
		cur_node = n1
		break_on_node = None
		target_edge = None
		head_node_edge_len = None

		while cur_node is not mrca_node:
        		if cur_node.edge.length > plen:
                		target_edge = cur_node.edge
                		head_node_edge_len = plen
                		plen = 0
                		break
        		elif abs(cur_node.edge.length - plen) < 1e-6:
                		break_on_node = cur_node.parent_node
                		break
        		else:
               			plen -= cur_node.edge.length
                		cur_node = cur_node.parent_node

		assert break_on_node is not None or target_edge is not None

		if break_on_node:
        		self._tree.reseed_at(break_on_node, update_splits=False, delete_outdegree_one=delete_outdegree_one)
		else:  
        		tail_node_edge_len = target_edge.length - head_node_edge_len
        		old_head_node = target_edge.head_node
        		old_tail_node = target_edge.tail_node
        		old_tail_node.remove_child(old_head_node)
        		new_seed_node = Node()
        		new_seed_node.add_child(old_head_node, edge_length =head_node_edge_len)
        		old_tail_node.add_child(new_seed_node, edge_length = tail_node_edge_len)
        		self._tree.reseed_at(new_seed_node, update_splits=False, delete_outdegree_one=delete_outdegree_one)

		self._tree.is_rooted = True
	
		if update_splits:
			self._tree.update_splits(delete_outdegree_one = False)

		return self._tree.seed_node
Example #2
0
    def get_dendropy_tree(self, cluster_tree):

        consensus_tree_dendropy =  Tree()
        #Rozró¿nienie liœci od wêz³ów i zapisanie ich do dwóch osobnych tablic
        leafs = []
        nodes = []
        for cluster in cluster_tree.get_cluster_list():
            if cluster.taxon is not None:
                leafs.append(cluster)
            if cluster.taxon is None:
                nodes.append(cluster)
        #Posortowanie listy wêz³ów po to, aby zacz¹æ przeszukiwaæ je od tych, które maj¹ najmniej liœci i budowaæ drzewo od do³u
        nodes.sort(key=lambda x: len(x.clusters))
        #Tablica tymczasowych utworzonych ju¿ wêz³ów, z których budowane jest drzewo
        created_nodes = []
        for node in nodes:

            #Dla ka¿ego znalezionego wêz³a, tworzony jest wêze³ drzewa
            # a nastêpnie dodawane s¹ do niego jego liœcie

            created_node = Node()
            for leaf in node.clusters:
                #Je¿eli jego liœci nie jest na liœcie lisci, oznacza to, ¿e zosta³ ju¿ wczeœniej zu¿yty
                #Czyli znajduje siê ju¿ w tymczasowym wêŸle i nale¿y jako dziecko dodaæ ten tymczasowy wêze³
                if not self.is_leaf_in_leafs(leaf,leafs):
                    # Nale¿y znaleŸæ stworzony wêze³ z liœciem i dodaæ go jako dziecko do nowego wêz³a
                    # A zu¿yty wêze³ usun¹æ
                    sub_created_node, created_nodes = self.find_created_node_with_leaf(created_nodes, leaf)
                    if sub_created_node is not None:
                        created_node.add_child(sub_created_node)
                else:
                    #je¿eli liœc nie zosta³ jeszcze zu¿yty, to nale¿y go dodaæ jako dziecko wêz³a
                    # i usun¹æ z listy liœci
                    created_node.add_child(leaf)
                    leafs = self.remove_leaf_from_list(leaf,leafs)

            created_nodes.append(created_node)

        # Finalnie, wêze³ ze wszystkimi liœciami oraz wêz³ami bêdzie na pierwszym i jedynym miejscu w liœcie tymczasowych
        tree = Tree(seed_node=created_nodes[0])
        return tree
Example #3
0
    def get_dendropy_tree(self, cluster_tree):

        consensus_tree_dendropy =  Tree()
        #Rozr�nienie li�ci od w�z��w i zapisanie ich do dw�ch osobnych tablic
        leafs = []
        nodes = []
        for cluster in cluster_tree.get_cluster_list():
            if cluster.taxon is not None:
                leafs.append(cluster)
            if cluster.taxon is None:
                nodes.append(cluster)
        #Posortowanie listy w�z��w po to, aby zacz�� przeszukiwa� je od tych, kt�re maj� najmniej li�ci i budowa� drzewo od do�u
        nodes.sort(key=lambda x: len(x.clusters))
        #Tablica tymczasowych utworzonych ju� w�z��w, z kt�rych budowane jest drzewo
        created_nodes = []
        for node in nodes:

            #Dla ka�ego znalezionego w�z�a, tworzony jest w�ze� drzewa
            # a nast�pnie dodawane s� do niego jego li�cie

            created_node = Node()
            for leaf in node.clusters:
                #Je�eli jego li�ci nie jest na li�cie lisci, oznacza to, �e zosta� ju� wcze�niej zu�yty
                #Czyli znajduje si� ju� w tymczasowym w�le i nale�y jako dziecko doda� ten tymczasowy w�ze�
                if not self.is_leaf_in_leafs(leaf,leafs):
                    # Nale�y znale�� stworzony w�ze� z li�ciem i doda� go jako dziecko do nowego w�z�a
                    # A zu�yty w�ze� usun��
                    sub_created_node, created_nodes = self.find_created_node_with_leaf(created_nodes, leaf)
                    if sub_created_node is not None:
                        created_node.add_child(sub_created_node)
                else:
                    #je�eli li�c nie zosta� jeszcze zu�yty, to nale�y go doda� jako dziecko w�z�a
                    # i usun�� z listy li�ci
                    created_node.add_child(leaf)
                    leafs = self.remove_leaf_from_list(leaf,leafs)

            created_nodes.append(created_node)

        # Finalnie, w�ze� ze wszystkimi li�ciami oraz w�z�ami b�dzie na pierwszym i jedynym miejscu w li�cie tymczasowych
        tree = Tree(seed_node=created_nodes[0])
        return tree
Example #4
0
def resolve_node(node):
    S = node.child_nodes()
    B = list_bipartitions(S)

    R = []

    for b in B:
        if len(b) > 1:
            c = [x for x in S if not x in b]
            u = Node()
            v1 = Node()
            v2 = Node()
            for x in b:
                v1.add_child(x)
            for x in c:
                v2.add_child(x)
            u.add_child(v1)
            u.add_child(v2)

            R.append(Tree(seed_node=u).as_string("newick"))

            for x in S:
                node.add_child(x)

    return R
Example #5
0
def resolve_polytomies(tree, update_splits=False, rng=None):
    """
    Copied from more recent DendroPy than the version that we bundle...
    
    Arbitrarily resolve polytomies using 0-length splits.

    If `rng` is an object with a sample() method then the polytomy will be
        resolved by sequentially adding (generating all tree topologies
        equiprobably
        rng.sample() should behave like random.sample()
    If `rng` is not passed in, then polytomy is broken deterministically by
        repeatedly joining pairs of children.
    """
    _LOG.debug("start resolving polytomies")
    from dendropy import Node
    polytomies = []
    if rng is None:
        rng = POLYTOMY_RNG
    for node in tree.postorder_node_iter():
        if len(node.child_nodes()) > 2:
            polytomies.append(node)

    _LOG.debug("Found %d polytomies" % len(polytomies))
    for node in polytomies:
        children = node.child_nodes()
        nc = len(children)
        if nc > 2:
            #if nc == 3 and node.parent_node is None:
            #    continue
            to_attach = children[2:]
            for child in to_attach:
                node.remove_child(child)
            attachment_points = children[:2] + [node]
            while len(to_attach) > 0:
                next_child = to_attach.pop()
                next_sib = rng.sample(attachment_points, 1)[0]
                next_attachment = Node()
                next_attachment.edge.length = 0.0
                p = next_sib.parent_node
                if p is None:
                    c_list = list(next_sib.child_nodes())
                    next_sib.add_child(next_attachment)
                    next_sib.add_child(next_child)
                    for child in c_list:
                        next_sib.remove_child(child)
                        next_attachment.add_child(child)
                else:

                    p.add_child(next_attachment)
                    p.remove_child(next_sib)
                    next_attachment.add_child(next_sib)
                    next_attachment.add_child(next_child)
                attachment_points.append(next_attachment)
    _LOG.debug("polytomies resolution - updating splits")
    if update_splits:
        tree.update_splits()
    _LOG.debug("polytomies resolved.")
Example #6
0
def resolve_polytomies(tree, update_splits=False, rng=None):
    """
    Copied from more recent DendroPy than the version that we bundle...
    
    Arbitrarily resolve polytomies using 0-length splits.

    If `rng` is an object with a sample() method then the polytomy will be
        resolved by sequentially adding (generating all tree topologies
        equiprobably
        rng.sample() should behave like random.sample()
    If `rng` is not passed in, then polytomy is broken deterministically by
        repeatedly joining pairs of children.
    """
    _LOG.debug("start resolving polytomies")
    from dendropy import Node
    polytomies = []
    if rng is None:
        rng = POLYTOMY_RNG
    for node in tree.postorder_node_iter():
        if len(node.child_nodes()) > 2:
            polytomies.append(node)
            
    _LOG.debug("Found %d polytomies" %len(polytomies))
    for node in polytomies:
        children = node.child_nodes()
        nc = len(children)
        if nc > 2:
            #if nc == 3 and node.parent_node is None:
            #    continue
            to_attach = children[2:]
            for child in to_attach:
                node.remove_child(child)
            attachment_points = children[:2] + [node]
            while len(to_attach) > 0:
                next_child = to_attach.pop()
                next_sib = rng.sample(attachment_points, 1)[0]
                next_attachment = Node()
                next_attachment.edge.length = 0.0
                p = next_sib.parent_node
                if p is None:
                    c_list = list(next_sib.child_nodes())
                    next_sib.add_child(next_attachment)
                    next_sib.add_child(next_child)
                    for child in c_list:
                        next_sib.remove_child(child)
                        next_attachment.add_child(child)
                else:
                
                    p.add_child(next_attachment)
                    p.remove_child(next_sib)
                    next_attachment.add_child(next_sib)
                    next_attachment.add_child(next_child)
                attachment_points.append(next_attachment)
    _LOG.debug("polytomies resolution - updating splits")
    if update_splits:
        tree.update_splits()
    _LOG.debug("polytomies resolved.")
def resolve_polytomies(tree, update_splits=False, rng=None):
    """
    Arbitrarily resolve polytomies using 0-length splits.

    If `rng` is an object with a sample() method then the polytomy will be
        resolved by sequentially adding (generating all tree topologies
        equiprobably
        rng.sample() should behave like random.sample()
    If `rng` is not passed in, then polytomy is broken deterministically by
        repeatedly joining pairs of children.
    """
    polytomies = []
    for node in tree.postorder_node_iter():
        if len(node.child_nodes()) > 2:
            polytomies.append(node)
    for node in polytomies:
        children = node.child_nodes()
        nc = len(children)
        if nc > 2:
            while len(children) > 2:
                nn1 = Node()
                nn1.edge.length = 0
                if rng:
                    sample = random.sample(children, 2)
                else:
                    sample = [children[0], children[1]]
                c1 = sample[0]
                c2 = sample[1]
                node.remove_child(c1)
                node.remove_child(c2)
                nn1.add_child(c1)
                nn1.add_child(c2)
                node.add_child(nn1)
                children = node.child_nodes()
    if update_splits:
        tree.update_splits()
def resolve_polytomies(tree, update_splits=False, rng=None):
    """
    Arbitrarily resolve polytomies using 0-length splits.

    If `rng` is an object with a sample() method then the polytomy will be
        resolved by sequentially adding (generating all tree topologies
        equiprobably
        rng.sample() should behave like random.sample()
    If `rng` is not passed in, then polytomy is broken deterministically by
        repeatedly joining pairs of children.
    """
    polytomies = []
    for node in tree.postorder_node_iter():
        if len(node.child_nodes()) > 2:
            polytomies.append(node)
    for node in polytomies:
        children = node.child_nodes()
        nc = len(children)
        if nc > 2:
            while len(children) > 2:
                nn1 = Node()
                nn1.edge.length = 0
                if rng:
                    sample = random.sample(children,2)
                else:
                    sample = [children[0], children[1]]
                c1 = sample[0]
                c2 = sample[1]
                node.remove_child(c1)
                node.remove_child(c2)
                nn1.add_child(c1)
                nn1.add_child(c2)
                node.add_child(nn1)
                children = node.child_nodes()
    if update_splits:
        tree.update_splits()
def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''
    
    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))
        
        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)
            
            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()
            
            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)
            
        if _DEBUGGING:
            out.write('%s %s %s\n' % ( str(s), p, o))
            out.write('%s\n' % ( str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(parentless) # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
Example #10
0
def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''

    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write('%s %s %s\n' % (str(s), p, o))
            out.write('%s\n' % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(
            parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(
                parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
def rdf2dendropyTree(filepath):
    from rdflib.graph import Graph
    from dendropy import Node, Tree, Edge, TaxonSet, Taxon

    graph = Graph()
    graph.parse(filepath)
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open("parse_rdf.txt", "w")
    taxon_set = TaxonSet()
    OBO = Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            # print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write("%s %s %s\n" % (str(s), p, o))
            out.write("%s\n" % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = (
            "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d"
            % len(parentless)
        )
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            sys.exit("no parentless")
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
Example #12
0
treefile = argv[1]
infofile = argv[2]
outfile = argv[3]

with open(infofile, 'r') as f:
    mapping = {}
    for line in f:
        taxa = line.split()
        mapping[taxa[0]] = taxa[1:]

myTree = Tree.get_from_path(treefile, "newick")

leaves = list(myTree.leaf_node_iter())

for node in leaves:
    if node.taxon.label in mapping:
        new_node = Node(edge_length=node.edge_length)
        pNode = node.parent_node
        pNode.remove_child(node)
        pNode.add_child(new_node)
        new_node.add_child(node)
        pNode = new_node

        for taxon_name in mapping[node.taxon.label]:
            new_taxon = Taxon(label=taxon_name)
            myTree.taxon_namespace.add_taxon(new_taxon)
            new_node = Node(edge_length=0, taxon=new_taxon)
            pNode.add_child(new_node)

myTree.write_to_path(outfile, "newick")
parser.add_argument("-od",type=float,default=0,required=False,help="Outgroup branch length")
parser.add_argument("-i",type=str,default="infile.tree",required=True,help="Input Newick tree file")
parser.add_argument("-o",type=str,default="outtree.tree",required=False,help="Output Newick tree file")
args = parser.parse_args()

trees=TreeList.get_from_path(args.i,schema="newick",rooting="force-rooted")
if args.gt != 0:
	print "Scaling branch lengths to time with generation time %d\n" % args.gt
	for tree in trees:
		for edge in tree.preorder_edge_iter():
			#print "DEBUG: %s" % edge.length
			if edge.length != None:
				edge.length=edge.length/args.gt

if args.od != 0:
	print "Adding outgroup with branch length %d\n" % args.od
	namespace=trees.taxon_namespace
	outgroup= Taxon("outgroup")
	namespace.add_taxon(outgroup)
	ntree=0
	labels=namespace.labels()
	labels.remove("outgroup")
	for tree in trees:
		outgroup_node=Node(taxon=outgroup,edge_length=args.od)
		new_root_node=Node()
		tree.seed_node.edge_length=args.od-tree.seed_node.distance_from_tip()
		new_root_node.add_child(tree.seed_node)
		new_root_node.add_child(outgroup_node)
		tree.seed_node=new_root_node	
trees.write(path=args.o,schema="newick",suppress_rooting=True)