def setBranchLengths(tree, edges, edgelens, paths, resids, topmat=None, rootedge=None): # recreate rooting branches if rootedge != None: # restore original rooting if tree.nodes[rootedge[0]].parent == tree.nodes[rootedge[1]]: treelib.reroot(tree, rootedge[0], newCopy=False) else: treelib.reroot(tree, rootedge[1], newCopy=False) # find root edge in edges for i in xrange(len(edges)): if sorted(edges[i]) == rootedge: break edges[i] = [rootedge[0], tree.root.name] edges.append([rootedge[1], tree.root.name]) edgelens[i] /= 2.0 edgelens.append(edgelens[i]) resids[i] /= 2.0 resids.append(resids[i]) paths[i] /= 2.0 paths.append(paths[i]) if topmat != None: for row in topmat: row.append(row[i]) # set branch lengths for i in xrange(len(edges)): gene1, gene2 = edges[i] if tree.nodes[gene2].parent == tree.nodes[gene1]: gene1, gene2 = gene2, gene1 tree.nodes[gene1].dist = edgelens[i]
def recon_root(self, gtree, newCopy=True, returnCost=False): """ Reroots the tree by minimizing the cost function. Rerooted tree must keep same node names as the original tree. Adapted from phylo.recon_root. """ # try all rerootings mincost = util.INF for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True): cost = self.compute_cost(gtree) if cost < mincost: mincost = cost minroot = edge # root tree by minroot if edge != minroot: node1, node2 = minroot if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2 treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnCost: return gtree, mincost else: return gtree
def recon_root(self, gtree, newCopy=True, returnCost=False): """ Returns the rerooted tree with min deep coalescence cost Generalizes compute_cost to multiple trees. """ # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True, writeData=lambda x: "") treeout.write('\n') edges = [] for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True): gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True, writeData=lambda x: "") treeout.write('\n') edges.append(edge) treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("genetreereport failed with returncode %d" % ret) # parse output i = None n = len(edges) costs = [None]*n for line in proc.stdout: m = re.match("\[ gene tree #(\d+) \]", line) if m: i = int(m.groups()[0]) - 1 if i is not None: m = re.match("\[ deep coalecense: (\d+) \]", line) if m: costs[i] = int(m.groups()[0]) assert all(map(lambda x: x is not None, costs)) # find minimum cost tree ndx, mincost = min(enumerate(costs), key=lambda it:it[1]) minroot = edges[ndx] if edge != minroot: node1, node2 = minroot if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2 treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnCost: return gtree, mincost else: return gtree
def recon_root(self, gtree, newCopy=True, returnCost=False): """ Returns the rerooted tree with min DTL cost Generalizes compute_cost to multiple trees. """ # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True) treeout.write('\n') edges = [] for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True): gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True) treeout.write('\n') edges.append(edge) treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile, '-D', str(self.dupcost), '-T', str(self.transfercost), '-L', str(self.losscost)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("DTL failed with returncode %d" % ret) # parse output i = 0 n = len(edges) costs = [None]*n for line in proc.stdout: toks = line.split(':') if toks[0] == "The minimum reconciliation cost is": assert i < n costs[i] = int(toks[1]) i += 1 assert all(map(lambda x: x is not None, costs)) # find minimum cost tree ndx, mincost = min(enumerate(costs), key=lambda it:it[1]) minroot = edges[ndx] if edge != minroot: node1, node2 = minroot if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2 treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnCost: return gtree, mincost else: return gtree
def reroot(self, node): try: treelib.reroot(self.tree, node.name, newCopy=False) except e: print e print "rerooted on node", node.name self.set_tree(self.tree) self.show() self.on_reorder_leaves()
def _reroot_helper(self, gtree, newCopy=True, returnEdge=False): """ Yields rerooted trees. Adapted from phylo.recon_root. """ # make a consistent unrooted copy of gene tree if newCopy: gtree = gtree.copy() if len(gtree.leaves()) == 2: raise StopIteration oldroot = gtree.root.name treelib.unroot(gtree, newCopy=False) treelib.reroot(gtree, gtree.nodes[sorted(gtree.leaf_names())[0]].parent.name, onBranch=False, newCopy=False) # make rerooting order consistent using hash ordering phylo.hash_order_tree(gtree, self.gene2species) # get list of edges to root on edges = [] def walk(node): edges.append((node, node.parent)) if not node.is_leaf(): node.recurse(walk) edges.append((node, node.parent)) for child in gtree.root.children: walk(child) # try initial root treelib.reroot(gtree, edges[0][0].name, newCopy=False) gtree.rename(gtree.root.name, oldroot) if returnEdge: yield gtree, edges[0] else: yield gtree rootedge = sorted(edges[0]) # try rerooting on everything for edge in edges[1:]: if sorted(edge) == rootedge: continue rootedge = sorted(edge) node1, node2 = edge if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2, "%s %s" % (node1.name, node2.name) # new root and cost treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnEdge: yield gtree, edge else: yield gtree
def recon_root(gtree, stree, gene2species = gene2species, rootby = "duploss", newCopy=True): """Reroot a tree by minimizing the number of duplications/losses/both""" # make a consistent unrooted copy of gene tree if newCopy: gtree = gtree.copy() if len(gtree.leaves()) == 2: return treelib.unroot(gtree, newCopy=False) treelib.reroot(gtree, gtree.nodes[sorted(gtree.leaf_names())[0]].parent.name, onBranch=False, newCopy=False) # make recon root consistent for rerooting tree of the same names # TODO: there is the possibility of ties, they are currently broken # arbitrarily. In order to make comparison of reconRooted trees with # same gene names accurate, hashOrdering must be done, for now. hash_order_tree(gtree, gene2species) # get list of edges to root on edges = [] def walk(node): edges.append((node, node.parent)) if not node.is_leaf(): node.recurse(walk) edges.append((node, node.parent)) for child in gtree.root.children: walk(child) # try initial root and recon treelib.reroot(gtree, edges[0][0].name, newCopy=False) recon = reconcile(gtree, stree, gene2species) events = label_events(gtree, recon) # find reconciliation that minimizes loss minroot = edges[0] rootedge = sorted(edges[0]) if rootby == "dup": cost = count_dup(gtree, events) elif rootby == "loss": cost = len(find_loss(gtree, stree, recon)) elif rootby == "duploss": cost = count_dup_loss(gtree, stree, recon, events) else: raise "unknown rootby value '%s'" % rootby mincost = cost # try rooting on everything for edge in edges[1:]: if sorted(edge) == rootedge: continue rootedge = sorted(edge) node1, node2 = edge if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2, "%s %s" % (node1.name, node2.name) # uncount cost if rootby in ["dup", "duploss"]: if events[gtree.root] == "dup": cost -= 1 if events[node2] == "dup": cost -= 1 if rootby in ["loss", "duploss"]: cost -= len(find_loss_under_node(gtree.root, recon)) cost -= len(find_loss_under_node(node2, recon)) # new root and recon treelib.reroot(gtree, node1.name, newCopy=False) recon[node2] = reconcile_node(node2, stree, recon) recon[gtree.root] = reconcile_node(gtree.root, stree, recon) events[node2] = label_events_node(node2, recon) events[gtree.root] = label_events_node(gtree.root, recon) if rootby in ["dup", "duploss"]: if events[node2] == "dup": cost += 1 if events[gtree.root] == "dup": cost += 1 if rootby in ["loss", "duploss"]: cost += len(find_loss_under_node(gtree.root, recon)) cost += len(find_loss_under_node(node2, recon)) # keep track of min cost if cost < mincost: mincost = cost minroot = edge # root tree by minroot if edge != minroot: node1, node2 = minroot if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2 treelib.reroot(gtree, node1.name, newCopy=False) return gtree
def neighborjoin(distmat, genes, usertree=None): """Neighbor joining algorithm""" tree = treelib.Tree() leaves = {} dists = util.Dict(2, None) restdists = {} # initialize distances for i in range(len(genes)): r = 0 for j in range(len(genes)): dists[genes[i]][genes[j]] = distmat[i][j] r += distmat[i][j] restdists[genes[i]] = r / (len(genes) - 2) # initialize leaves for gene in genes: tree.add(treelib.TreeNode(gene)) leaves[gene] = 1 # if usertree is given, determine merging order merges = [] newnames = {} if usertree != None: def walk(node): if not node.isLeaf(): assert len(node.children) == 2, \ Exception("usertree is not binary") for child in node: walk(child) merges.append(node) newnames[node] = len(merges) else: newnames[node] = node.name walk(usertree.root) merges.reverse() # join loop while len(leaves) > 2: # search for closest genes if not usertree: low = util.INF lowpair = (None, None) leaveslst = leaves.keys() for i in range(len(leaves)): for j in range(i+1, len(leaves)): gene1, gene2 = leaveslst[i], leaveslst[j] dist = dists[gene1][gene2] - restdists[gene1] \ - restdists[gene2] if dist < low: low = dist lowpair = (gene1, gene2) else: node = merges.pop() lowpair = (newnames[node.children[0]], newnames[node.children[1]]) # join gene1 and gene2 gene1, gene2 = lowpair parent = treelib.TreeNode(tree.new_name()) tree.add_child(parent, tree.nodes[gene1]) tree.add_child(parent, tree.nodes[gene2]) # set distances tree.nodes[gene1].dist = (dists[gene1][gene2] + restdists[gene1] - restdists[gene2]) / 2.0 tree.nodes[gene2].dist = dists[gene1][gene2] - tree.nodes[gene1].dist # gene1 and gene2 are no longer leaves del leaves[gene1] del leaves[gene2] gene3 = parent.name r = 0 for gene in leaves: dists[gene3][gene] = (dists[gene1][gene] + dists[gene2][gene] - dists[gene1][gene2]) / 2.0 dists[gene][gene3] = dists[gene3][gene] r += dists[gene3][gene] leaves[gene3] = 1 if len(leaves) > 2: restdists[gene3] = r / (len(leaves) - 2) # join the last two genes into a tribranch gene1, gene2 = leaves.keys() if type(gene1) != int: gene1, gene2 = gene2, gene1 tree.add_child(tree.nodes[gene1], tree.nodes[gene2]) tree.nodes[gene2].dist = dists[gene1][gene2] tree.root = tree.nodes[gene1] # root tree according to usertree if usertree != None and treelib.is_rooted(usertree): roots = set([newnames[usertree.root.children[0]], newnames[usertree.root.children[1]]]) newroot = None for child in tree.root.children: if child.name in roots: newroot = child assert newroot != None treelib.reroot(tree, newroot.name, newCopy=False) return tree