def sample_dlcoal_no_ifix(stree, n, freq, duprate, lossrate, freqdup, freqloss,\ forcetime, namefunc=lambda x: x, \ remove_single=True, name_internal="n", minsize=0): """Sample a gene tree from the DLCoal model using the new simulator""" # generate the locus tree while True: locus_tree, locus_extras = sim_DLILS_gene_tree(stree, n, freq, \ duprate, lossrate, \ freqdup, freqloss, \ forcetime) if len(locus_tree.leaves()) >= minsize: break if len(locus_tree.nodes) <= 1: # TODO: check 1 value # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # create new (expanded) locus tree logged_locus_tree, logged_extras = locus_to_logged_tree(locus_tree, popsize = n) daughters = logged_extras[0] pops = logged_extras[1] log_recon = logged_extras[2] # treelib.assert_tree(logged_locus_tree) # removed locus_tree_copy from below coal_tree, coal_recon = dlcoal.sample_locus_coal_tree(logged_locus_tree, n=pops, daughters=daughters, namefunc=lambda lognamex: log_recon[lognamex] + '_' + str(lognamex)) # print set(coal_tree) - set(coal_tree.postorder()) treelib.assert_tree(coal_tree) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: dlcoal.rename_nodes(coal_tree, name_internal) dlcoal.rename_nodes(locus_tree, name_internal) # store extra information ### TODO: update this now that we're using logged locus tree, new sample function extra = {"locus_tree": locus_tree, "locus_recon": locus_extras['recon'], "locus_events": locus_extras['events'], "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} return coal_tree, extra
def recon_helper(self, nsearch=1000): """Perform reconciliation""" self.maxp = -util.INF self.maxrecon = None proposal = self.proposer.init_proposal() init_proposal = proposal.copy() for i in xrange(nsearch): if i % 10 == 0: print "search", i # evaluate the probability of proposal util.tic("eval") p = self.eval_proposal(proposal) util.toc() # evaluate the search, then keep or discard the proposal util.tic("prop") self.eval_search(p, proposal) proposal = self.proposer.next_proposal() # set the next proposal util.toc() # all proposals bad, use initial proposal if not self.maxrecon: self.maxrecon = init_proposal # rename locus tree nodes dlcoal.rename_nodes(self.maxrecon.locus_tree, self.name_internal) # how about coal_tree names? return self.maxrecon
def next_proposal(self): # if leaves <= 2, no need to propose another tree if len(self._coal_search.get_tree().leaves()) <= 2: return self._recon # always tree now because _num_coal_recon is set 0 if self._i_coal_recons >= self._num_coal_recons: # propose new locus_tree # if locus_tree has not yet been accepted, then revert it if not self._accept_coal: self._coal_search.revert() # propose a new coal tree using _coal_search self._coal_search.propose() self._accept_coal = False self._i_coal_recons = 1 # set coal_tree to be the proposed one coal_tree = self._coal_search.get_tree().copy() # make recon root optimal phylo.recon_root(coal_tree, self._locus_tree, newCopy=False) dlcoal.rename_nodes(coal_tree) # propose remaining parts of dlcoal recon # reconciliation is given by lca self._recon = self._recon_lca(coal_tree) else: # modify coal_recon try: self._i_coal_recons += 1 self._coal_recon_enum.next() except StopIteration: self._i_coal_recon = self._num_coal_recons return self.next_proposal() return self._recon
def recon(self, nsearch=1000): """Perform reconciliation""" self.init_search() proposal = self.proposer.init_proposal() self.maxrecon = proposal.copy() for i in xrange(nsearch): if i % 10 == 0: print "search", i util.tic("eval") p = self.eval_proposal(proposal) util.toc() util.tic("prop") self.eval_search(p, proposal) proposal = self.proposer.next_proposal() util.toc() # rename locus tree nodes dlcoal.rename_nodes(self.maxrecon.locus_tree, self.name_internal) return self.maxrecon
def next_proposal(self): if len(self._locus_search.get_tree().leaves()) <= 2: return self._recon if self._i_coal_recons >= self._num_coal_recons: # propose new locus_tree # if locus_tree has not yet been accepted, then revert it if not self._accept_locus: self._locus_search.revert() self._locus_search.propose() self._accept_locus = False self._i_coal_recons = 0 locus_tree = self._locus_search.get_tree().copy() # TODO: make recon root optional phylo.recon_root(locus_tree, self._stree, self._gene2species, newCopy=False) dlcoal.rename_nodes(locus_tree) # propose remaining parts of dlcoal recon self._recon = self._recon_lca(locus_tree) else: # modify coal_recon try: self._i_coal_recons += 1 self._coal_recon_enum.next() except StopIteration: self._i_coal_recon = self._num_coal_recons return self.next_proposal() return self._recon
def sample_dlcoal(stree, n, duprate, lossrate, leaf_counts=None, namefunc=lambda x: x, remove_single=True, name_internal="n", minsize=0, reject=False): """Sample a gene tree from the DLCoal model""" # generate the locus tree while True: # TODO: does this take a namefunc? locus_tree, locus_recon, locus_events = \ birthdeath.sample_birth_death_gene_tree( stree, duprate, lossrate) if len(locus_tree.leaves()) >= minsize: break # if n is a dict, update it with gene names from locus tree if isinstance(n, dict): n2 = {} for node, snode in locus_recon.iteritems(): n2[node.name] = n[snode.name] else: n2 = n # if leaf_counts is a dict, update it with gene names from locus tree # TODO: how to handle copy number polymorphism? if isinstance(leaf_counts, dict): leaf_counts2 = {} for node in locus_tree.leaves(): snode = locus_recon[node] leaf_counts2[node.name] = leaf_counts[snode.name] else: leaf_counts2 = leaf_counts if len(locus_tree.nodes) <= 1: # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # choose daughter duplications daughters = set() for node in locus_tree: if locus_events[node] == "dup": daughters.add(node.children[random.randint(0, 1)]) if reject: # use slow rejection sampling (for testing) coal_tree, coal_recon = sample_multilocus_tree_reject( locus_tree, n2, leaf_counts=leaf_counts2, daughters=daughters, namefunc=namefunc) else: coal_tree, coal_recon = sample_multilocus_tree( locus_tree, n2, leaf_counts=leaf_counts2, daughters=daughters, namefunc=namefunc) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: dlcoal.rename_nodes(coal_tree, name_internal) dlcoal.rename_nodes(locus_tree, name_internal) # store extra information extra = {"locus_tree": locus_tree, "locus_recon": locus_recon, "locus_events": locus_events, "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} return coal_tree, extra
def sample_dlcoal_hem(stree, n, duprate, lossrate, freq, freqdup, freqloss, steptime, namefunc=lambda x: x, keep_extinct=False, remove_single=True, name_internal="n", minsize=0): """Sample a gene tree from the DLCoal model with hemiplasy""" # generate the locus tree while True: locus_tree, locus_extras = sample_locus_tree_hem( stree, n, duprate, lossrate, freq, freqdup, freqloss, steptime, keep_extinct=keep_extinct) if len(locus_tree.leaves()) >= minsize: break if len(locus_tree.nodes) <= 1: # TODO: check 1 value # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # create new (expanded) locus tree logged_locus_tree, logged_extras = locus_to_logged_tree( locus_tree, popsize=n) daughters = logged_extras[0] pops = logged_extras[1] log_recon = logged_extras[2] #treelib.assert_tree(logged_locus_tree) # removed locus_tree_copy from below coal_tree, coal_recon = dlcoal.sim.sample_multilocus_tree( logged_locus_tree, n=pops, daughters=daughters, namefunc=lambda lognamex: log_recon[lognamex]+'_'+str(lognamex)) #print set(coal_tree) - set(coal_tree.postorder()) treelib.assert_tree(coal_tree) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: dlcoal.rename_nodes(coal_tree, name_internal) dlcoal.rename_nodes(locus_tree, name_internal) # store extra information extra = {"locus_tree": locus_tree, "locus_recon": locus_extras['recon'], "locus_events": locus_extras['events'], "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} if keep_extinct: extra["full_locus_tree"] = locus_extras["full_locus_tree"] return coal_tree, extra