Example #1
0
def setBranchLengths(tree, edges, edgelens, paths, resids, 
                     topmat=None, rootedge=None):
    # recreate rooting branches
    if rootedge != None:
        # restore original rooting
        if tree.nodes[rootedge[0]].parent == tree.nodes[rootedge[1]]:
            treelib.reroot(tree, rootedge[0], newCopy=False)
        else:
            treelib.reroot(tree, rootedge[1], newCopy=False)
    
        # find root edge in edges
        for i in xrange(len(edges)):
            if sorted(edges[i]) == rootedge:
                break
                
        edges[i] = [rootedge[0], tree.root.name]
        edges.append([rootedge[1], tree.root.name])
        edgelens[i] /= 2.0
        edgelens.append(edgelens[i])
        resids[i] /= 2.0
        resids.append(resids[i])
        paths[i] /= 2.0
        paths.append(paths[i])
        
        if topmat != None:
            for row in topmat:
                row.append(row[i])
    
    # set branch lengths
    for i in xrange(len(edges)):
        gene1, gene2 = edges[i]
        if tree.nodes[gene2].parent == tree.nodes[gene1]:
            gene1, gene2 = gene2, gene1
        tree.nodes[gene1].dist = edgelens[i]
Example #2
0
    def recon_root(self, gtree, newCopy=True, returnCost=False):
        """
        Reroots the tree by minimizing the cost function.
        Rerooted tree must keep same node names as the original tree.
        Adapted from phylo.recon_root.
        """

        # try all rerootings
        mincost = util.INF
        for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True):
            cost = self.compute_cost(gtree)
            if cost < mincost:
                mincost = cost
                minroot = edge

        # root tree by minroot
        if edge != minroot:
            node1, node2 = minroot
            if node1.parent != node2:
                node1, node2 = node2, node1
            assert node1.parent == node2
            treelib.reroot(gtree, node1.name, newCopy=False, keepName=True)

        if returnCost:
            return gtree, mincost
        else:
            return gtree
Example #3
0
    def recon_root(self, gtree, newCopy=True, returnCost=False):
        """
        Returns the rerooted tree with min deep coalescence cost
        Generalizes compute_cost to multiple trees.
        """

        # write species tree and gene tree using species map
        treeout = util.open_stream(self.treefile, 'w')
        self.stree.write(treeout, oneline=True, writeData=lambda x: "")
        treeout.write('\n')
        edges = []
        for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True):
            gtree.write(treeout, namefunc=lambda name: self.gene2species(name),
                        oneline=True, writeData=lambda x: "")
            treeout.write('\n')
            edges.append(edge)
        treeout.close()

        # execute command
        proc = subprocess.Popen([cmd,
                                 '-i', self.treefile],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True)
        ret = proc.wait()
        if ret != 0:
            raise Exception("genetreereport failed with returncode %d" % ret)

        # parse output
        i = None
        n = len(edges)
        costs = [None]*n
        for line in proc.stdout:
            m = re.match("\[ gene tree #(\d+) \]", line)
            if m:
                i = int(m.groups()[0]) - 1

            if i is not None:
                m = re.match("\[ deep coalecense: (\d+) \]", line)
                if m:
                    costs[i] = int(m.groups()[0])
        assert all(map(lambda x: x is not None, costs))

        # find minimum cost tree
        ndx, mincost = min(enumerate(costs), key=lambda it:it[1])
        minroot = edges[ndx]
        if edge != minroot:
            node1, node2 = minroot
            if node1.parent != node2:
                node1, node2 = node2, node1
            assert node1.parent == node2
            treelib.reroot(gtree, node1.name, newCopy=False, keepName=True)

        if returnCost:
            return gtree, mincost
        else:
            return gtree
Example #4
0
    def recon_root(self, gtree, newCopy=True, returnCost=False):
        """
        Returns the rerooted tree with min DTL cost
        Generalizes compute_cost to multiple trees.
        """

        # write species tree and gene tree using species map
        treeout = util.open_stream(self.treefile, 'w')
        self.stree.write(treeout, oneline=True)
        treeout.write('\n')
        edges = []
        for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True):
            gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True)
            treeout.write('\n')
            edges.append(edge)
        treeout.close()

        # execute command
        proc = subprocess.Popen([cmd,
                                 '-i', self.treefile,
                                 '-D', str(self.dupcost),
                                 '-T', str(self.transfercost),
                                 '-L', str(self.losscost)],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True)
        ret = proc.wait()
        if ret != 0:
            raise Exception("DTL failed with returncode %d" % ret)

        # parse output
        i = 0
        n = len(edges)
        costs = [None]*n
        for line in proc.stdout:
            toks = line.split(':')
            if toks[0] == "The minimum reconciliation cost is":
                assert i < n
                costs[i] = int(toks[1])
                i += 1
        assert all(map(lambda x: x is not None, costs))

        # find minimum cost tree
        ndx, mincost = min(enumerate(costs), key=lambda it:it[1])
        minroot = edges[ndx]
        if edge != minroot:
            node1, node2 = minroot
            if node1.parent != node2:
                node1, node2 = node2, node1
            assert node1.parent == node2
            treelib.reroot(gtree, node1.name, newCopy=False, keepName=True)

        if returnCost:
            return gtree, mincost
        else:
            return gtree
Example #5
0
 def reroot(self, node):
     try:
         treelib.reroot(self.tree, node.name, newCopy=False)
     except e:
         print e
     print "rerooted on node", node.name
     self.set_tree(self.tree)
     
     self.show()
     self.on_reorder_leaves()
Example #6
0
    def _reroot_helper(self, gtree, newCopy=True, returnEdge=False):
        """
        Yields rerooted trees.
        Adapted from phylo.recon_root.
        """

        # make a consistent unrooted copy of gene tree
        if newCopy:
            gtree = gtree.copy()

        if len(gtree.leaves()) == 2:
            raise StopIteration

        oldroot = gtree.root.name
        treelib.unroot(gtree, newCopy=False)
        treelib.reroot(gtree,
                       gtree.nodes[sorted(gtree.leaf_names())[0]].parent.name,
                       onBranch=False, newCopy=False)

        # make rerooting order consistent using hash ordering
        phylo.hash_order_tree(gtree, self.gene2species)

        # get list of edges to root on
        edges = []
        def walk(node):
            edges.append((node, node.parent))
            if not node.is_leaf():
                node.recurse(walk)
                edges.append((node, node.parent))
        for child in gtree.root.children:
            walk(child)

        # try initial root
        treelib.reroot(gtree, edges[0][0].name, newCopy=False)
        gtree.rename(gtree.root.name, oldroot)
        if returnEdge:
            yield gtree, edges[0]
        else:
            yield gtree
        rootedge = sorted(edges[0])

        # try rerooting on everything
        for edge in edges[1:]:
            if sorted(edge) == rootedge:
                continue
            rootedge = sorted(edge)

            node1, node2 = edge
            if node1.parent != node2:
                node1, node2 = node2, node1
            assert node1.parent == node2, "%s %s" % (node1.name, node2.name)

            # new root and cost
            treelib.reroot(gtree, node1.name, newCopy=False, keepName=True)
            if returnEdge:
                yield gtree, edge
            else:
                yield gtree
Example #7
0
def recon_root(gtree, stree, gene2species = gene2species, 
               rootby = "duploss", newCopy=True):
    """Reroot a tree by minimizing the number of duplications/losses/both"""
    
    # make a consistent unrooted copy of gene tree
    if newCopy:
        gtree = gtree.copy()
        
    if len(gtree.leaves()) == 2:
        return
        
    treelib.unroot(gtree, newCopy=False)
    treelib.reroot(gtree, 
                   gtree.nodes[sorted(gtree.leaf_names())[0]].parent.name, 
                   onBranch=False, newCopy=False)
    
    
    # make recon root consistent for rerooting tree of the same names
    # TODO: there is the possibility of ties, they are currently broken
    # arbitrarily.  In order to make comparison of reconRooted trees with 
    # same gene names accurate, hashOrdering must be done, for now.
    hash_order_tree(gtree, gene2species)
    
    # get list of edges to root on
    edges = []
    def walk(node):
        edges.append((node, node.parent))
        if not node.is_leaf():
            node.recurse(walk)
            edges.append((node, node.parent))
    for child in gtree.root.children:
        walk(child)

    
    # try initial root and recon    
    treelib.reroot(gtree, edges[0][0].name, newCopy=False)
    recon = reconcile(gtree, stree, gene2species)
    events = label_events(gtree, recon)
    
    # find reconciliation that minimizes loss
    minroot = edges[0]
    rootedge = sorted(edges[0])
    if rootby == "dup": 
        cost = count_dup(gtree, events)
    elif rootby == "loss":
        cost = len(find_loss(gtree, stree, recon))
    elif rootby == "duploss":
        cost = count_dup_loss(gtree, stree, recon, events)
    else:
        raise "unknown rootby value '%s'"  % rootby
    mincost = cost
    
    
    # try rooting on everything
    for edge in edges[1:]:
        if sorted(edge) == rootedge:
            continue
        rootedge = sorted(edge)
        
        node1, node2 = edge
        if node1.parent != node2:
            node1, node2 = node2, node1
        assert node1.parent == node2, "%s %s" % (node1.name, node2.name)
        
        # uncount cost
        if rootby in ["dup", "duploss"]:
            if events[gtree.root] == "dup":
                cost -= 1
            if events[node2] == "dup":
                cost -= 1
        if rootby in ["loss", "duploss"]:
            cost -= len(find_loss_under_node(gtree.root, recon))
            cost -= len(find_loss_under_node(node2, recon))
        
        # new root and recon
        treelib.reroot(gtree, node1.name, newCopy=False)        
        
        recon[node2] = reconcile_node(node2, stree, recon)
        recon[gtree.root] = reconcile_node(gtree.root, stree, recon)
        events[node2] = label_events_node(node2, recon)
        events[gtree.root] = label_events_node(gtree.root, recon)
        
        if rootby in ["dup", "duploss"]:
            if events[node2] ==  "dup":
                cost += 1
            if events[gtree.root] ==  "dup":
                cost += 1
        if rootby in ["loss", "duploss"]:
            cost += len(find_loss_under_node(gtree.root, recon))
            cost += len(find_loss_under_node(node2, recon))
        
        # keep track of min cost
        if cost < mincost:
            mincost = cost
            minroot = edge

    
    # root tree by minroot
    if edge != minroot:
        node1, node2 = minroot
        if node1.parent != node2:
            node1, node2 = node2, node1
        assert node1.parent == node2
        treelib.reroot(gtree, node1.name, newCopy=False)
    
    return gtree
Example #8
0
def neighborjoin(distmat, genes, usertree=None):
    """Neighbor joining algorithm"""
    
    tree = treelib.Tree()
    leaves = {}
    dists = util.Dict(2, None)
    restdists = {}
    
    
    # initialize distances
    for i in range(len(genes)):
        r = 0
        for j in range(len(genes)):
            dists[genes[i]][genes[j]] = distmat[i][j]
            r += distmat[i][j]
        restdists[genes[i]] = r / (len(genes) - 2)
        
    # initialize leaves
    for gene in genes:
        tree.add(treelib.TreeNode(gene))
        leaves[gene] = 1
    
    # if usertree is given, determine merging order
    merges = []
    newnames = {}
    if usertree != None:
        def walk(node):
            if not node.isLeaf():
                assert len(node.children) == 2, \
                    Exception("usertree is not binary")
            
                for child in node:
                    walk(child)
                merges.append(node)
                newnames[node] = len(merges)
            else:
                newnames[node] = node.name
        walk(usertree.root)
        merges.reverse()
    
    # join loop
    while len(leaves) > 2:
        # search for closest genes
        if not usertree:
            low = util.INF
            lowpair = (None, None)
            leaveslst = leaves.keys()

            for i in range(len(leaves)):
                for j in range(i+1, len(leaves)):
                    gene1, gene2 = leaveslst[i], leaveslst[j]
                    dist = dists[gene1][gene2] - restdists[gene1] \
                                               - restdists[gene2]
                    if dist < low:
                        low = dist
                        lowpair = (gene1, gene2)
        else:
            node = merges.pop()
            lowpair = (newnames[node.children[0]],
                       newnames[node.children[1]])
        
        # join gene1 and gene2
        gene1, gene2 = lowpair
        parent = treelib.TreeNode(tree.new_name())
        tree.add_child(parent, tree.nodes[gene1])
        tree.add_child(parent, tree.nodes[gene2])
        
        # set distances
        tree.nodes[gene1].dist = (dists[gene1][gene2] + restdists[gene1] - 
                                  restdists[gene2]) / 2.0
        tree.nodes[gene2].dist = dists[gene1][gene2] - tree.nodes[gene1].dist
        
        # gene1 and gene2 are no longer leaves
        del leaves[gene1]
        del leaves[gene2]
        
        gene3 = parent.name
        r = 0
        for gene in leaves:
            dists[gene3][gene] = (dists[gene1][gene] + dists[gene2][gene] -
                                  dists[gene1][gene2]) / 2.0
            dists[gene][gene3] = dists[gene3][gene]
            r += dists[gene3][gene]
        leaves[gene3] = 1
        
        if len(leaves) > 2:
            restdists[gene3] = r / (len(leaves) - 2)
    
    # join the last two genes into a tribranch
    gene1, gene2 = leaves.keys()
    if type(gene1) != int:
        gene1, gene2 = gene2, gene1
    tree.add_child(tree.nodes[gene1], tree.nodes[gene2])
    tree.nodes[gene2].dist = dists[gene1][gene2]
    tree.root = tree.nodes[gene1]

    # root tree according to usertree    
    if usertree != None and treelib.is_rooted(usertree):
        roots = set([newnames[usertree.root.children[0]],
                     newnames[usertree.root.children[1]]])
        newroot = None
        for child in tree.root.children:
            if child.name in roots:
                newroot = child
        
        assert newroot != None
        
        treelib.reroot(tree, newroot.name, newCopy=False)
    
    return tree