Ejemplo n.º 1
0
def simulateTreeTopology(n):
# simulate a binary tree of n leaves
    leaves = [Node()]
    nodeOrder = []
    myTree = Tree(seed_node = leaves[0])

    for i in range(n-1):
        r = randint(0,i)
        a = Node()
        b = Node()
        p = leaves[r]
        p.add_child(a)
        p.add_child(b)
        leaves[r] = a
        leaves.append(b)
        nodeOrder.append(p)

    IDs = list(range(1,n+1))
    i = 0
    shuffle(IDs)   
    for leaf in leaves:
        leaf.taxon = Taxon(label=str(IDs[i]))
        leaf.time = 0
        i += 1        

    return myTree,nodeOrder    
Ejemplo n.º 2
0
def extract_tree_with_taxa(tree, taxa, suppress_unifurcations=True):
    taxon_to_leaf = {}
    for n in tree.preorder_node_iter():
        n.keep = False
        if n.is_leaf():
            taxon_to_leaf[n.taxon] = n
    for t in taxa:
        for n in taxon_to_leaf[t].ancestor_iter(inclusive=True):
            n.keep = True
    out = Tree()
    q_old = Queue()
    q_old.put(tree.seed_node)
    q_new = Queue()
    q_new.put(out.seed_node)
    while not q_old.empty():
        n_old = q_old.get()
        n_new = q_new.get()
        for c_old in n_old.child_node_iter():
            if c_old.keep:
                c_new = Node(taxon=c_old.taxon,
                             label=c_old.label,
                             edge_length=c_old.edge_length)
                n_new.add_child(c_new)
                q_old.put(c_old)
                q_new.put(c_new)
    if suppress_unifurcations:
        out.suppress_unifurcations()
    return out
Ejemplo n.º 3
0
def resolve_polytomies(tree, update_splits=False, rng=None):
    """
    Copied from more recent DendroPy than the version that we bundle...
    
    Arbitrarily resolve polytomies using 0-length splits.

    If `rng` is an object with a sample() method then the polytomy will be
        resolved by sequentially adding (generating all tree topologies
        equiprobably
        rng.sample() should behave like random.sample()
    If `rng` is not passed in, then polytomy is broken deterministically by
        repeatedly joining pairs of children.
    """
    _LOG.debug("start resolving polytomies")
    from dendropy import Node
    polytomies = []
    if rng is None:
        rng = POLYTOMY_RNG
    for node in tree.postorder_node_iter():
        if len(node.child_nodes()) > 2:
            polytomies.append(node)

    _LOG.debug("Found %d polytomies" % len(polytomies))
    for node in polytomies:
        children = node.child_nodes()
        nc = len(children)
        if nc > 2:
            #if nc == 3 and node.parent_node is None:
            #    continue
            to_attach = children[2:]
            for child in to_attach:
                node.remove_child(child)
            attachment_points = children[:2] + [node]
            while len(to_attach) > 0:
                next_child = to_attach.pop()
                next_sib = rng.sample(attachment_points, 1)[0]
                next_attachment = Node()
                next_attachment.edge.length = 0.0
                p = next_sib.parent_node
                if p is None:
                    c_list = list(next_sib.child_nodes())
                    next_sib.add_child(next_attachment)
                    next_sib.add_child(next_child)
                    for child in c_list:
                        next_sib.remove_child(child)
                        next_attachment.add_child(child)
                else:

                    p.add_child(next_attachment)
                    p.remove_child(next_sib)
                    next_attachment.add_child(next_sib)
                    next_attachment.add_child(next_child)
                attachment_points.append(next_attachment)
    _LOG.debug("polytomies resolution - updating splits")
    if update_splits:
        tree.update_splits()
    _LOG.debug("polytomies resolved.")
Ejemplo n.º 4
0
Archivo: tree.py Proyecto: czli/Canopy
	def reroot_at_midpoint(self, update_splits=False, delete_outdegree_one=True):
		'''
			Modified from the source code of Dendropy v3.12.0.
		'''	
		pdm = treecalc.PatristicDistanceMatrix(self._tree)
		n1,n2 = pdm.max_dist_nodes
		plen = float(pdm.max_dist)/2
		mrca_node = pdm.mrca(n1.taxon, n2.taxon)
		cur_node = n1
		break_on_node = None
		target_edge = None
		head_node_edge_len = None

		while cur_node is not mrca_node:
        		if cur_node.edge.length > plen:
                		target_edge = cur_node.edge
                		head_node_edge_len = plen
                		plen = 0
                		break
        		elif abs(cur_node.edge.length - plen) < 1e-6:
                		break_on_node = cur_node.parent_node
                		break
        		else:
               			plen -= cur_node.edge.length
                		cur_node = cur_node.parent_node

		assert break_on_node is not None or target_edge is not None

		if break_on_node:
        		self._tree.reseed_at(break_on_node, update_splits=False, delete_outdegree_one=delete_outdegree_one)
		else:  
        		tail_node_edge_len = target_edge.length - head_node_edge_len
        		old_head_node = target_edge.head_node
        		old_tail_node = target_edge.tail_node
        		old_tail_node.remove_child(old_head_node)
        		new_seed_node = Node()
        		new_seed_node.add_child(old_head_node, edge_length =head_node_edge_len)
        		old_tail_node.add_child(new_seed_node, edge_length = tail_node_edge_len)
        		self._tree.reseed_at(new_seed_node, update_splits=False, delete_outdegree_one=delete_outdegree_one)

		self._tree.is_rooted = True
	
		if update_splits:
			self._tree.update_splits(delete_outdegree_one = False)

		return self._tree.seed_node
Ejemplo n.º 5
0
def graph2tree(G, root=0, names=[]):
    # assum G is acyclic
    seed_node = Node()
    seed_node.label = names[root] if names else str(root)
    T = Tree(seed_node=seed_node)
    n = len(G)
    node_refs = [None for i in range(n)]
    node_refs[root] = seed_node
    count = 1
    curr_v = root

    stk = [root]

    while len(stk) > 0:
        curr_v = stk.pop()
        for v, length in G[curr_v]:
            if node_refs[v] is None:
                stk.append(v)
                new_node = Node()
                new_node.label = names[v] if names else str(v)
                node_refs[v] = new_node
                node_refs[curr_v].add_child(new_node)
                new_node.edge_length = length

    for node in T.leaf_node_iter():
        node.taxon = T.taxon_namespace.new_taxon(label=node.label)

    return T
Ejemplo n.º 6
0
def resolve_polytomies(tree, update_splits=False, rng=None):
    """
    Copied from more recent DendroPy than the version that we bundle...
    
    Arbitrarily resolve polytomies using 0-length splits.

    If `rng` is an object with a sample() method then the polytomy will be
        resolved by sequentially adding (generating all tree topologies
        equiprobably
        rng.sample() should behave like random.sample()
    If `rng` is not passed in, then polytomy is broken deterministically by
        repeatedly joining pairs of children.
    """
    _LOG.debug("start resolving polytomies")
    from dendropy import Node
    polytomies = []
    if rng is None:
        rng = POLYTOMY_RNG
    for node in tree.postorder_node_iter():
        if len(node.child_nodes()) > 2:
            polytomies.append(node)
            
    _LOG.debug("Found %d polytomies" %len(polytomies))
    for node in polytomies:
        children = node.child_nodes()
        nc = len(children)
        if nc > 2:
            #if nc == 3 and node.parent_node is None:
            #    continue
            to_attach = children[2:]
            for child in to_attach:
                node.remove_child(child)
            attachment_points = children[:2] + [node]
            while len(to_attach) > 0:
                next_child = to_attach.pop()
                next_sib = rng.sample(attachment_points, 1)[0]
                next_attachment = Node()
                next_attachment.edge.length = 0.0
                p = next_sib.parent_node
                if p is None:
                    c_list = list(next_sib.child_nodes())
                    next_sib.add_child(next_attachment)
                    next_sib.add_child(next_child)
                    for child in c_list:
                        next_sib.remove_child(child)
                        next_attachment.add_child(child)
                else:
                
                    p.add_child(next_attachment)
                    p.remove_child(next_sib)
                    next_attachment.add_child(next_sib)
                    next_attachment.add_child(next_child)
                attachment_points.append(next_attachment)
    _LOG.debug("polytomies resolution - updating splits")
    if update_splits:
        tree.update_splits()
    _LOG.debug("polytomies resolved.")
Ejemplo n.º 7
0
    def get_dendropy_tree(self, cluster_tree):

        consensus_tree_dendropy =  Tree()
        #Rozró¿nienie liœci od wêz³ów i zapisanie ich do dwóch osobnych tablic
        leafs = []
        nodes = []
        for cluster in cluster_tree.get_cluster_list():
            if cluster.taxon is not None:
                leafs.append(cluster)
            if cluster.taxon is None:
                nodes.append(cluster)
        #Posortowanie listy wêz³ów po to, aby zacz¹æ przeszukiwaæ je od tych, które maj¹ najmniej liœci i budowaæ drzewo od do³u
        nodes.sort(key=lambda x: len(x.clusters))
        #Tablica tymczasowych utworzonych ju¿ wêz³ów, z których budowane jest drzewo
        created_nodes = []
        for node in nodes:

            #Dla ka¿ego znalezionego wêz³a, tworzony jest wêze³ drzewa
            # a nastêpnie dodawane s¹ do niego jego liœcie

            created_node = Node()
            for leaf in node.clusters:
                #Je¿eli jego liœci nie jest na liœcie lisci, oznacza to, ¿e zosta³ ju¿ wczeœniej zu¿yty
                #Czyli znajduje siê ju¿ w tymczasowym wêŸle i nale¿y jako dziecko dodaæ ten tymczasowy wêze³
                if not self.is_leaf_in_leafs(leaf,leafs):
                    # Nale¿y znaleŸæ stworzony wêze³ z liœciem i dodaæ go jako dziecko do nowego wêz³a
                    # A zu¿yty wêze³ usun¹æ
                    sub_created_node, created_nodes = self.find_created_node_with_leaf(created_nodes, leaf)
                    if sub_created_node is not None:
                        created_node.add_child(sub_created_node)
                else:
                    #je¿eli liœc nie zosta³ jeszcze zu¿yty, to nale¿y go dodaæ jako dziecko wêz³a
                    # i usun¹æ z listy liœci
                    created_node.add_child(leaf)
                    leafs = self.remove_leaf_from_list(leaf,leafs)

            created_nodes.append(created_node)

        # Finalnie, wêze³ ze wszystkimi liœciami oraz wêz³ami bêdzie na pierwszym i jedynym miejscu w liœcie tymczasowych
        tree = Tree(seed_node=created_nodes[0])
        return tree
Ejemplo n.º 8
0
    def get_dendropy_tree(self, cluster_tree):

        consensus_tree_dendropy =  Tree()
        #Rozr�nienie li�ci od w�z��w i zapisanie ich do dw�ch osobnych tablic
        leafs = []
        nodes = []
        for cluster in cluster_tree.get_cluster_list():
            if cluster.taxon is not None:
                leafs.append(cluster)
            if cluster.taxon is None:
                nodes.append(cluster)
        #Posortowanie listy w�z��w po to, aby zacz�� przeszukiwa� je od tych, kt�re maj� najmniej li�ci i budowa� drzewo od do�u
        nodes.sort(key=lambda x: len(x.clusters))
        #Tablica tymczasowych utworzonych ju� w�z��w, z kt�rych budowane jest drzewo
        created_nodes = []
        for node in nodes:

            #Dla ka�ego znalezionego w�z�a, tworzony jest w�ze� drzewa
            # a nast�pnie dodawane s� do niego jego li�cie

            created_node = Node()
            for leaf in node.clusters:
                #Je�eli jego li�ci nie jest na li�cie lisci, oznacza to, �e zosta� ju� wcze�niej zu�yty
                #Czyli znajduje si� ju� w tymczasowym w�le i nale�y jako dziecko doda� ten tymczasowy w�ze�
                if not self.is_leaf_in_leafs(leaf,leafs):
                    # Nale�y znale�� stworzony w�ze� z li�ciem i doda� go jako dziecko do nowego w�z�a
                    # A zu�yty w�ze� usun��
                    sub_created_node, created_nodes = self.find_created_node_with_leaf(created_nodes, leaf)
                    if sub_created_node is not None:
                        created_node.add_child(sub_created_node)
                else:
                    #je�eli li�c nie zosta� jeszcze zu�yty, to nale�y go doda� jako dziecko w�z�a
                    # i usun�� z listy li�ci
                    created_node.add_child(leaf)
                    leafs = self.remove_leaf_from_list(leaf,leafs)

            created_nodes.append(created_node)

        # Finalnie, w�ze� ze wszystkimi li�ciami oraz w�z�ami b�dzie na pierwszym i jedynym miejscu w li�cie tymczasowych
        tree = Tree(seed_node=created_nodes[0])
        return tree
Ejemplo n.º 9
0
def resolve_polytomies(tree, update_splits=False, rng=None):
    """
    Arbitrarily resolve polytomies using 0-length splits.

    If `rng` is an object with a sample() method then the polytomy will be
        resolved by sequentially adding (generating all tree topologies
        equiprobably
        rng.sample() should behave like random.sample()
    If `rng` is not passed in, then polytomy is broken deterministically by
        repeatedly joining pairs of children.
    """
    polytomies = []
    for node in tree.postorder_node_iter():
        if len(node.child_nodes()) > 2:
            polytomies.append(node)
    for node in polytomies:
        children = node.child_nodes()
        nc = len(children)
        if nc > 2:
            while len(children) > 2:
                nn1 = Node()
                nn1.edge.length = 0
                if rng:
                    sample = random.sample(children, 2)
                else:
                    sample = [children[0], children[1]]
                c1 = sample[0]
                c2 = sample[1]
                node.remove_child(c1)
                node.remove_child(c2)
                nn1.add_child(c1)
                nn1.add_child(c2)
                node.add_child(nn1)
                children = node.child_nodes()
    if update_splits:
        tree.update_splits()
Ejemplo n.º 10
0
def resolve_polytomies(tree, update_splits=False, rng=None):
    """
    Arbitrarily resolve polytomies using 0-length splits.

    If `rng` is an object with a sample() method then the polytomy will be
        resolved by sequentially adding (generating all tree topologies
        equiprobably
        rng.sample() should behave like random.sample()
    If `rng` is not passed in, then polytomy is broken deterministically by
        repeatedly joining pairs of children.
    """
    polytomies = []
    for node in tree.postorder_node_iter():
        if len(node.child_nodes()) > 2:
            polytomies.append(node)
    for node in polytomies:
        children = node.child_nodes()
        nc = len(children)
        if nc > 2:
            while len(children) > 2:
                nn1 = Node()
                nn1.edge.length = 0
                if rng:
                    sample = random.sample(children,2)
                else:
                    sample = [children[0], children[1]]
                c1 = sample[0]
                c2 = sample[1]
                node.remove_child(c1)
                node.remove_child(c2)
                nn1.add_child(c1)
                nn1.add_child(c2)
                node.add_child(nn1)
                children = node.child_nodes()
    if update_splits:
        tree.update_splits()
def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''
    
    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))
        
        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)
            
            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()
            
            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)
            
        if _DEBUGGING:
            out.write('%s %s %s\n' % ( str(s), p, o))
            out.write('%s\n' % ( str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(parentless) # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
Ejemplo n.º 12
0
def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''

    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write('%s %s %s\n' % (str(s), p, o))
            out.write('%s\n' % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(
            parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(
                parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
Ejemplo n.º 13
0
def resolve_node(node):
    S = node.child_nodes()
    B = list_bipartitions(S)

    R = []

    for b in B:
        if len(b) > 1:
            c = [x for x in S if not x in b]
            u = Node()
            v1 = Node()
            v2 = Node()
            for x in b:
                v1.add_child(x)
            for x in c:
                v2.add_child(x)
            u.add_child(v1)
            u.add_child(v2)

            R.append(Tree(seed_node=u).as_string("newick"))

            for x in S:
                node.add_child(x)

    return R
Ejemplo n.º 14
0
treefile = argv[1]
infofile = argv[2]
outfile = argv[3]

with open(infofile, 'r') as f:
    mapping = {}
    for line in f:
        taxa = line.split()
        mapping[taxa[0]] = taxa[1:]

myTree = Tree.get_from_path(treefile, "newick")

leaves = list(myTree.leaf_node_iter())

for node in leaves:
    if node.taxon.label in mapping:
        new_node = Node(edge_length=node.edge_length)
        pNode = node.parent_node
        pNode.remove_child(node)
        pNode.add_child(new_node)
        new_node.add_child(node)
        pNode = new_node

        for taxon_name in mapping[node.taxon.label]:
            new_taxon = Taxon(label=taxon_name)
            myTree.taxon_namespace.add_taxon(new_taxon)
            new_node = Node(edge_length=0, taxon=new_taxon)
            pNode.add_child(new_node)

myTree.write_to_path(outfile, "newick")
Ejemplo n.º 15
0
parser.add_argument("-od",type=float,default=0,required=False,help="Outgroup branch length")
parser.add_argument("-i",type=str,default="infile.tree",required=True,help="Input Newick tree file")
parser.add_argument("-o",type=str,default="outtree.tree",required=False,help="Output Newick tree file")
args = parser.parse_args()

trees=TreeList.get_from_path(args.i,schema="newick",rooting="force-rooted")
if args.gt != 0:
	print "Scaling branch lengths to time with generation time %d\n" % args.gt
	for tree in trees:
		for edge in tree.preorder_edge_iter():
			#print "DEBUG: %s" % edge.length
			if edge.length != None:
				edge.length=edge.length/args.gt

if args.od != 0:
	print "Adding outgroup with branch length %d\n" % args.od
	namespace=trees.taxon_namespace
	outgroup= Taxon("outgroup")
	namespace.add_taxon(outgroup)
	ntree=0
	labels=namespace.labels()
	labels.remove("outgroup")
	for tree in trees:
		outgroup_node=Node(taxon=outgroup,edge_length=args.od)
		new_root_node=Node()
		tree.seed_node.edge_length=args.od-tree.seed_node.distance_from_tip()
		new_root_node.add_child(tree.seed_node)
		new_root_node.add_child(outgroup_node)
		tree.seed_node=new_root_node	
trees.write(path=args.o,schema="newick",suppress_rooting=True)		
Ejemplo n.º 16
0
    def get_super_tree(self, superTree_method, **args):
        def parse_trees(**args):
            n_tree, n_branch = float(len(self.data['trees'])), {}
            for mt_id, mt in enumerate(self.data['trees']):
                w = (float(len(mt.tre.leaf_nodes())) /
                     len(self.data['taxa']))**2
                for node in mt.tre.preorder_node_iter():
                    if node.barcode not in n_branch:
                        n_branch[node.barcode] = [[w, mt_id, node]]
                    else:
                        n_branch[node.barcode].append([w, mt_id, node])
            return n_tree, n_branch

        def consensus(self, **args):
            n_tree, n_branch = parse_trees(**args)
            n_branch = sorted([[len(v) / n_tree, k, v]
                               for k, v in n_branch.iteritems()],
                              reverse=True)
            consensus_tree = []
            for posterior, branch, nodes in n_branch:
                for cbr, _, _ in consensus_tree:
                    b1, b2 = sorted([branch, cbr])
                    if not (((b1 & b2) == b1) or ((b1 & (~b2)) == b1)):
                        branch = 0
                        break
                if branch:
                    consensus_tree.append([branch, posterior, nodes])
            return sorted(consensus_tree, reverse=True)

        def MCC(self, **args):
            n_tree, n_branch = parse_trees(**args)
            for mt_id, mt in enumerate(self.data['trees']):
                if len(mt.tre.leaf_nodes()) == len(self.data['taxa']):
                    mt.score = np.sum([
                        len(n_branch[node.barcode])
                        for node in mt.tre.preorder_node_iter()
                    ])
            tre = max(self.data['trees'], key=lambda x: x.score).tre
            return [[
                n.barcode,
                len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode]
            ] for n in tre.preorder_node_iter()]

        def load_subtree(self, treeLabel, **args):
            n_tree, n_branch = parse_trees(**args)
            for mt_id, mt in enumerate(self.data['trees']):
                if mt.tre.label == treeLabel:
                    tre = mt.tre
                    break
            return [[
                n.barcode,
                len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode], n.age,
                n.edge_length
            ] for n in tre.preorder_node_iter()]

        #def ASTRID(self, **args) :
        #from dendropy import PhylogeneticDistanceMatrix

        def load_tree(self, consFile=None, **args):
            n_tree, n_branch = parse_trees(**args)

            with open(consFile) as fin:
                schema = 'nexus' if fin.readline().upper().startswith(
                    '#NEXUS') else 'newick'
            for tre in Tree.yield_from_files([consFile], schema=schema):
                break

            internal_id = n_taxa = len(self.data['taxa'])
            digit_code = np.power(2, np.arange(n_taxa, dtype='object'))

            for node in tre.postorder_node_iter():
                if node.is_leaf():
                    node.id = self.data['taxa'][node.taxon.label]
                    node.barcode = digit_code[node.id]
                else:
                    node.id, internal_id = internal_id, internal_id + 1
                    node.barcode = sum([c.barcode for c in node.child_nodes()])

            tre.seed_node.age = tre.seed_node.distance_from_tip()
            for node in tre.preorder_node_iter():
                if node.parent_node:
                    node.age = node.parent_node.age - node.edge_length
            return [[
                n.barcode,
                len(n_branch.get(n.barcode, [])) / n_tree,
                n_branch.get(n.barcode, []), n.age, n.edge_length
            ] for n in tre.preorder_node_iter()]

        if superTree_method in ('MCC', 'ASTRID', 'consensus'):
            branches = locals()[superTree_method](self, **args)
        elif os.path.isfile(superTree_method):
            branches = load_tree(self, consFile=superTree_method, **args)
        else:
            branches = load_subtree(self, treeLabel=superTree_method, **args)
        supertree = Tree()
        sn = supertree.seed_node
        sn.barcode, sn.posterior = branches[0][0], branches[0][1]
        sn.age = branches[0][3] if len(branches[0]) > 3 else np.sum(
            [n[2].age * n[0]
             for n in branches[0][2]]) / np.sum([n[0] for n in branches[0][2]])
        sn.contain = [[b[0], b[1], b[2].id] for b in branches[0][2]]
        for br in branches[1:]:
            cbr, posterior, nodes = br[:3]
            while (sn.barcode & cbr) != cbr:
                sn = sn.parent_node
            new_node = Node() if len(nodes) == 0 or (
                not nodes[0][2].taxon) else Node(taxon=Taxon(
                    label=nodes[0][2].taxon.label))
            sn.add_child(new_node)
            sn = new_node
            sn.barcode, sn.posterior = cbr, posterior
            sn.contain = [[b[0], b[1], b[2].id] for b in nodes]
            if len(br) <= 3:
                sn.edge_length = 0.0 if len(nodes) == 0 else np.sum(
                    [n[2].edge_length * n[0]
                     for n in nodes]) / np.sum([n[0] for n in nodes])
                sn.age = sn.parent_node.age if len(nodes) == 0 else np.sum(
                    [n[2].age * n[0]
                     for n in nodes]) / np.sum([n[0] for n in nodes])
            else:
                sn.age, sn.edge_length = br[3:]
        internal_id = len(self.data['taxa'])
        for node in supertree.postorder_node_iter():
            if node.is_leaf():
                node.id = self.data['taxa'][node.taxon.label]
            else:
                node.id = internal_id
                internal_id += 1
        return MetaTree(supertree)
Ejemplo n.º 17
0
    taxonomyTree = sys.argv[3]

    species = {}
    lines = open(speciesList, 'r')
    for line in lines:
        species[line.strip()] = line.strip()

    lines = open(taxonomyFile, 'r')
    header = lines.readline()
    nodes_dict = {}

    #Read first line, root node
    line = lines.readline()
    results = line.strip().split(',')
    tree = Tree()
    root = Node()
    root.__dict__['label'] = results[0].replace("\"", "")
    nodes_dict[results[0].replace("\"", "")] = root

    prune = ['1']

    #Add root node to tree
    tree.__dict__['_seed_node'].add_child(root)
    for line in lines:
        results = line.strip().split(',')
        node = Node()
        node.__dict__['label'] = results[0].replace("\"", "")
        node.taxon = Taxon(results[0].replace("\"", ""))
        nodes_dict[results[0].replace("\"", "")] = node
        nodes_dict[results[1].replace("\"", "")].add_child(node)
        if results[0].replace("\"", "") not in species:
def rdf2dendropyTree(filepath):
    from rdflib.graph import Graph
    from dendropy import Node, Tree, Edge, TaxonSet, Taxon

    graph = Graph()
    graph.parse(filepath)
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open("parse_rdf.txt", "w")
    taxon_set = TaxonSet()
    OBO = Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            # print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write("%s %s %s\n" % (str(s), p, o))
            out.write("%s\n" % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = (
            "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d"
            % len(parentless)
        )
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            sys.exit("no parentless")
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree