def __init__(self, tree, stree, gene2species, dupcost=1, losscost=1, coalcost=1, implied=True, init_locus_tree=None, name_internal="n", log=sys.stdout): # rename input tree nodes common.rename_nodes(tree, name_internal) self.coal_tree = tree self.stree = stree self.gene2species = gene2species self.dupcost = dupcost self.losscost = losscost self.coalcost = coalcost self.implied = implied self.name_internal = name_internal self.log_stream = log self.init_locus_tree = init_locus_tree \ if init_locus_tree else tree.copy() self.proposer = DLCReconProposer(tree, stree, gene2species)
def recon(self, nsearch=1000, noimprov=None): """Perform reconciliation""" self.init_search() proposal = self.proposer.init_proposal() self.maxrecon = proposal.copy() best_cost = util.INF i_best_cost = -1 for i in xrange(nsearch): ## if i % 10 == 0: ## print "search", i ## util.tic("eval") cost = self.eval_proposal(proposal) if cost < best_cost: best_cost = cost i_best_cost = i if noimprov != None and (i - i_best_cost) >= noimprov: break ## util.print_dict(proposal.data) ## print '\t'.join(map(lambda key: str(proposal.data[key]), ## ("cost", "ndup", "nloss", "ncoal"))) ## util.toc() ## util.tic("prop") self.eval_search(cost, proposal) proposal = self.proposer.next_proposal() ## util.toc() # rename locus tree nodes common.rename_nodes(self.maxrecon.locus_tree, self.name_internal) return self.maxrecon
def __init__(self, gtree, stree, gene2species, gene2locus=None, duprange=DEFAULT_RANGE, lossrange=DEFAULT_RANGE, max_loci=INF, max_dups=INF, max_losses=INF, name_internal="n", log=sys.stdout): # rename gene tree nodes common.rename_nodes(gtree, name_internal) self.gtree = gtree self.stree = stree self.gene2species = gene2species self.gene2locus = gene2locus dup_min, dup_max = duprange loss_min, loss_max = lossrange assert (dup_min > 0) and (dup_max > 0) and (dup_min < dup_max) and \ (loss_min > 0) and (loss_max > 0) and (loss_min < loss_max) self.duprange = duprange self.lossrange = lossrange self.implied = True self.delay = False self.prescreen = False assert (max_loci > 0) and (max_dups > 0) and (max_losses > 0) self.max_loci = max_loci self.max_dups = max_dups self.max_losses = max_losses self.name_internal = name_internal self.log = util.Timer(log)
def next_proposal(self): if len(self._locus_search.get_tree().leaves()) <= 2: return self._recon # if locus_tree has not yet been accepted, then revert it if not self._accept_locus: self._locus_search.revert() # propose new locus_tree self._locus_search.propose() self._accept_locus = False locus_tree = self._locus_search.get_tree().copy() # TODO: make recon root optional phylo.recon_root(locus_tree, self._stree, self._gene2species, newCopy=False) common.rename_nodes(locus_tree) # propose remaining parts of dlcoal recon self._recon = self._recon_lca(locus_tree) return self._recon
def recon(self, nsearch=1000, nconverge=None): """Perform reconciliation""" # initialize self.init_search() proposal = self.proposer.init_proposal() self.maxrecon = proposal.copy() # keep track of convergence if nconverge: iconverge = 0 mincost = util.INF # search for i in xrange(nsearch): ## if i % 10 == 0: ## print "search", i ## util.tic("eval") # evaluate cost of proposal cost = self.eval_proposal(proposal) ## util.print_dict(proposal.data) ## print '\t'.join(map(lambda key: str(proposal.data[key]), ## ("cost", "ndup", "nloss", "ncoal"))) ## util.toc() ## util.tic("prop") # update maxrecon based on accepting / rejecting proposal self.eval_search(cost, proposal) # stop if converged # why not check accept? because can toggle between # multiple optimal solutions with the same cost if nconverge: if cost < mincost: iconverge = 0 mincost = cost else: iconverge += 1 if iconverge == nconverge: break # make new proposal proposal = self.proposer.next_proposal() ## util.toc() # rename locus tree nodes common.rename_nodes(self.maxrecon.locus_tree, self.name_internal) return self.maxrecon
def recon(self): """Perform reconciliation""" self.log.start("Reconciling") # log input gene and species trees self.log.log("gene tree\n") log_tree(self.gtree, self.log, func=treelib.draw_tree_names) self.log.log("species tree\n") log_tree(self.stree, self.log, func=treelib.draw_tree_names) # infer species map self._infer_species_map() self.log.log("\n\n") # add implied speciation nodes but first start the species tree at the right root substree = treelib.subtree(self.stree, self.srecon[self.gtree.root]) subrecon = util.mapdict(self.srecon, val=lambda snode: substree.nodes[snode.name]) # switch internal storage with subtrees self.stree, subtree = substree, self.stree self.srecon, subrecon = subrecon, self.srecon # add implied nodes (standard speciation, speciation from duplication, delay nodes) # then relabel events (so that factor_tree works) reconlib.add_implied_nodes(self.gtree, self.stree, self.srecon, self.sevents, delay=self.delay) self.sevents = phylo.label_events(self.gtree, self.srecon) common.rename_nodes(self.gtree, self.name_internal) # log gene tree (with species map) self.log.log("gene tree (with species map)\n") log_tree(self.gtree, self.log, func=draw_tree_srecon, srecon=self.srecon) # infer locus map self._infer_locus_map() self.log.stop() return self.count_vectors
def labeledrecon_to_recon(gene_tree, labeled_recon, stree, name_internal="n"): """Convert from DLCpar to DLCoal reconciliation model NOTE: This is non-reversible because it produces NON-dated coalescent and locus trees """ locus_map = labeled_recon.locus_map species_map = labeled_recon.species_map order = labeled_recon.order # coalescent tree equals gene tree coal_tree = gene_tree.copy() # factor gene tree events = phylo.label_events(gene_tree, species_map) subtrees = factor_tree(gene_tree, stree, species_map, events) # gene names genenames = {} for snode in stree: genenames[snode] = {} for leaf in gene_tree.leaves(): genenames[species_map[leaf]][locus_map[leaf]] = leaf.name # 2D dict to keep track of locus tree nodes by hashing by speciation node and locus # key1 = snode, key2 = locus, value = list of nodes (sorted from oldest to most recent) locus_tree_map = {} for snode in stree: locus_tree_map[snode] = {} # initialize locus tree, coal/locus recon, and daughters locus_tree = treelib.Tree() coal_recon = {} locus_recon = {} locus_events = {} daughters = [] # initialize root of locus tree root = treelib.TreeNode(locus_tree.new_name()) locus_tree.add(root) locus_tree.root = root sroot = species_map[gene_tree.root] locus = locus_map[gene_tree.root] coal_recon[coal_tree.root] = root locus_recon[root] = sroot locus_tree_map[sroot][locus] = [root] # build locus tree along each species branch for snode in stree.preorder(sroot): subtrees_snode = subtrees[snode] # skip if no branches in this species branch if len(subtrees_snode) == 0: continue # build locus tree # 1) speciation if snode.parent: for (root, rootchild, leaves) in subtrees_snode: if rootchild: locus = locus_map[root] # use root locus! # create new locus tree node in this species branch if locus not in locus_tree_map[snode]: old_node = locus_tree_map[snode.parent][locus][-1] new_node = treelib.TreeNode(locus_tree.new_name()) locus_tree.add_child(old_node, new_node) locus_recon[new_node] = snode locus_events[old_node] = "spec" locus_tree_map[snode][locus] = [new_node] # update coal_recon cnode = coal_tree.nodes[rootchild.name] lnode = locus_tree_map[snode][locus][-1] coal_recon[cnode] = lnode # 2) duplication if snode in order: # may have to reorder loci (in case of multiple duplications) queue = collections.deque(order[snode].keys()) while len(queue) > 0: plocus = queue.popleft() if plocus not in locus_tree_map[snode]: # punt queue.append(plocus) continue # handle this ordered list lst = order[snode][plocus] for gnode in lst: locus = locus_map[gnode] cnode = coal_tree.nodes[gnode.name] if locus != plocus: # duplication # update locus_tree, locus_recon, and daughters old_node = locus_tree_map[snode][plocus][-1] new_node1 = treelib.TreeNode(locus_tree.new_name()) locus_tree.add_child(old_node, new_node1) locus_recon[new_node1] = snode new_node2 = treelib.TreeNode(locus_tree.new_name()) locus_tree.add_child(old_node, new_node2) coal_recon[cnode] = new_node2 locus_recon[new_node2] = snode daughters.append(new_node2) locus_events[old_node] = "dup" locus_tree_map[snode][plocus].append(new_node1) locus_tree_map[snode][locus] = [new_node2] else: # deep coalescence lnode = locus_tree_map[snode][locus][-1] coal_recon[cnode] = lnode # reconcile remaining coal tree nodes to locus tree # (no duplication so only a single locus tree node with the desired locus) for (root, rootchild, leaves) in subtrees_snode: if rootchild: for gnode in gene_tree.preorder(rootchild, is_leaf=lambda x: x in leaves): cnode = coal_tree.nodes[gnode.name] if cnode not in coal_recon: locus = locus_map[gnode] assert len(locus_tree_map[snode][locus]) == 1 lnode = locus_tree_map[snode][locus][-1] coal_recon[cnode] = lnode # tidy up if at an extant species if snode.is_leaf(): for locus, nodes in locus_tree_map[snode].iteritems(): genename = genenames[snode][locus] lnode = nodes[-1] cnode = coal_tree.nodes[genename] # relabel genes in locus tree locus_tree.rename(lnode.name, genename) # relabel locus events locus_events[lnode] = "gene" # reconcile genes (genes in coal tree reconcile to genes in locus tree) # possible mismatch due to genes having an internal ordering even though all exist to present time # [could also do a new round of "speciation" at bottom of extant species branches, # but this introduces single children nodes that would just be removed anyway] coal_recon[cnode] = lnode # rename internal nodes common.rename_nodes(locus_tree, name_internal) # simplify coal_tree (and reconciliations) removed = treelib.remove_single_children(coal_tree) for cnode in removed: del coal_recon[cnode] # simplify locus_tree (and reconciliations + daughters) removed = treelib.remove_single_children(locus_tree) for cnode, lnode in coal_recon.items(): if lnode in removed: # reconciliation updates to first child that is not removed new_lnode = lnode while new_lnode in removed: new_lnode = new_lnode.children[0] coal_recon[cnode] = new_lnode for lnode in removed: del locus_recon[lnode] del locus_events[lnode] for ndx, lnode in enumerate(daughters): if lnode in removed: # daughter updates to first child that is not removed new_lnode = lnode while new_lnode in removed: new_lnode = new_lnode.children[0] daughters[ndx] = new_lnode ## locus_events = phylo.label_events(locus_tree, locus_recon) assert all([lnode in locus_events for lnode in locus_tree]) #======================================== # put everything together return coal_tree, phyloDLC.Recon(coal_recon, locus_tree, locus_recon, locus_events, daughters)
def recon_to_labeledrecon(coal_tree, recon, stree, gene2species, name_internal="n", locus_mpr=True): """Convert from DLCoal to DLCpar reconciliation model If locus_mpr is set (default), use MPR from locus_tree to stree. """ gene_tree = coal_tree.copy() coal_recon = recon.coal_recon locus_tree = recon.locus_tree if not locus_mpr: locus_recon = recon.locus_recon daughters = recon.daughters else: locus_recon = phylo.reconcile(locus_tree, stree, gene2species) locus_events = phylo.label_events(locus_tree, locus_recon) daughters = filter(lambda node: locus_events[node.parent] == "dup", recon.daughters) #======================================== # find species map # find species tree subtree substree = treelib.subtree(stree, locus_recon[coal_recon[coal_tree.root]]) # find species map species_map = {} for node in gene_tree: cnode = coal_tree.nodes[node.name] lnode = coal_recon[cnode] snode = locus_recon[lnode] species_map[node] = substree[snode.name] # add implied speciation and delay nodes to gene tree events = phylo.label_events(gene_tree, species_map) added_spec, added_dup, added_delay = add_implied_nodes(gene_tree, substree, species_map, events) # rename internal nodes common.rename_nodes(gene_tree, name_internal) #======================================== # helper functions def walk_up(node): if node.name in coal_tree.nodes: return coal_tree.nodes[node.name] return walk_up(node.parent) def walk_down(node): if node.name in coal_tree.nodes: return coal_tree.nodes[node.name] assert len(node.children) == 1, (node.name, node.children) return walk_down(node.children[0]) #======================================== # find locus map # label loci in locus tree loci = {} next = 1 # keep track of duplication ages (measured as dist from leaf since root dist may differ in coal and locus trees) locus_times = treelib.get_tree_ages(locus_tree) dup_times = {} dup_snodes = {} for lnode in locus_tree.preorder(): if not lnode.parent: # root loci[lnode] = next elif lnode in daughters: # duplication next += 1 loci[lnode] = next dup_times[next] = locus_times[lnode.parent] dup_snodes[next] = locus_recon[lnode.parent] else: # regular node loci[lnode] = loci[lnode.parent] # label loci in gene tree locus_map = {} for node in gene_tree: if node.name in coal_tree.nodes: # node in coal tree cnode = coal_tree.nodes[node.name] lnode = coal_recon[cnode] locus_map[node] = loci[lnode] else: # node not in coal tree, so use either parent or child locus cnode_up = walk_up(node) lnode_up = coal_recon[cnode_up] loci_up = loci[lnode_up] cnode_down = walk_down(node) lnode_down = coal_recon[cnode_down] loci_down = loci[lnode_down] if loci_up == loci_down: # parent and child locus match locus_map[node] = loci_up else: # determine whether to use parent or child locus snode = species_map[node] dup_snode = dup_snodes[loci_down] if (snode.name == dup_snode.name) or (snode.name in dup_snode.descendant_names()): locus_map[node] = loci_down else: locus_map[node] = loci_up #======================================== # find order # find loci that give rise to new loci in each sbranch parent_loci = set() for node in gene_tree: if node.parent: locus = locus_map[node] plocus = locus_map[node.parent] if locus != plocus: snode = species_map[node] parent_loci.add((snode, plocus)) # find order (locus tree and coal tree must use same timescale) order = {} for node in gene_tree: if node.parent: snode = species_map[node] plocus = locus_map[node.parent] if (snode, plocus) in parent_loci: order.setdefault(snode, {}) order[snode].setdefault(plocus, []) order[snode][plocus].append(node) # find coalescent/duplication times (= negative age) and depths coal_times = treelib.get_tree_ages(coal_tree) depths = get_tree_depths(gene_tree, distfunc=lambda node: 1) def get_time(node): if locus_map[node.parent] != locus_map[node]: # duplication return -dup_times[locus_map[node]], depths[node] else: # walk up to the nearest node in the coal tree # if the node was added (due to spec or dup), it has a single child # so it can be placed directly after its parent without affecting the extra lineage count if node.name in coal_tree.nodes: cnode = coal_tree.nodes[node.name] else: cnode = walk_up(node) return -coal_times[cnode], depths[node] # sort by node times # 1) larger age (smaller dist from root) are earlier in sort # 2) if equal dist, then smaller depths are earlier in sort for snode, d in order.iteritems(): for plocus, lst in d.iteritems(): lst.sort(key=get_time) #======================================== # put everything together return gene_tree, LabeledRecon(species_map, locus_map, order)