def sample_dlcoal_no_ifix(stree, n, freq, duprate, lossrate, freqdup, freqloss,\ forcetime, namefunc=lambda x: x, \ remove_single=True, name_internal="n", minsize=0): """Sample a gene tree from the DLCoal model using the new simulator""" # generate the locus tree while True: locus_tree, locus_extras = sim_DLILS_gene_tree(stree, n, freq, \ duprate, lossrate, \ freqdup, freqloss, \ forcetime) if len(locus_tree.leaves()) >= minsize: break if len(locus_tree.nodes) <= 1: # TODO: check 1 value # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # create new (expanded) locus tree logged_locus_tree, logged_extras = locus_to_logged_tree(locus_tree, popsize = n) daughters = logged_extras[0] pops = logged_extras[1] log_recon = logged_extras[2] # treelib.assert_tree(logged_locus_tree) # removed locus_tree_copy from below coal_tree, coal_recon = dlcoal.sample_locus_coal_tree(logged_locus_tree, n=pops, daughters=daughters, namefunc=lambda lognamex: log_recon[lognamex] + '_' + str(lognamex)) # print set(coal_tree) - set(coal_tree.postorder()) treelib.assert_tree(coal_tree) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: dlcoal.rename_nodes(coal_tree, name_internal) dlcoal.rename_nodes(locus_tree, name_internal) # store extra information ### TODO: update this now that we're using logged locus tree, new sample function extra = {"locus_tree": locus_tree, "locus_recon": locus_extras['recon'], "locus_events": locus_extras['events'], "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} return coal_tree, extra
def sample_dlcoal(stree, n, duprate, lossrate, namefunc=lambda x: x, remove_single=True, name_internal="n", minsize=0): """Sample a gene tree from the DLCoal model""" # generate the locus tree while True: locus_tree, locus_recon, locus_events = \ birthdeath.sample_birth_death_gene_tree( stree, duprate, lossrate) if len(locus_tree.leaves()) >= minsize: break if len(locus_tree.nodes) <= 1: # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # choose daughter duplications daughters = set() for node in locus_tree: if locus_events[node] == "dup": daughters.add(node.children[random.randint(0, 1)]) coal_tree, coal_recon = sample_multicoal_tree(locus_tree, n, daughters=daughters, namefunc=namefunc) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: rename_nodes(coal_tree, name_internal) rename_nodes(locus_tree, name_internal) # store extra information extra = {"locus_tree": locus_tree, "locus_recon": locus_recon, "locus_events": locus_events, "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} return coal_tree, extra
def eval_proposal(self, proposal): """Compute probability of proposal""" # compute recon probability phylo.add_implied_spec_nodes(proposal["locus_tree"], self.stree, proposal["locus_recon"], proposal["locus_events"]) p = prob_dlcoal_recon_topology(self.coal_tree, proposal["coal_recon"], proposal["locus_tree"], proposal["locus_recon"], proposal["locus_events"], proposal["daughters"], self.stree, self.n, self.duprate, self.lossrate, self.pretime, self.premean, maxdoom=self.maxdoom, nsamples=self.nsamples, add_spec=False) treelib.remove_single_children(proposal["locus_tree"]) phylo.subset_recon(proposal["locus_tree"], proposal["locus_recon"]) return p
def sample_dlcoal(stree, n, duprate, lossrate, leaf_counts=None, namefunc=lambda x: x, remove_single=True, name_internal="n", minsize=0, reject=False): """Sample a gene tree from the DLCoal model""" # generate the locus tree while True: # TODO: does this take a namefunc? locus_tree, locus_recon, locus_events = \ birthdeath.sample_birth_death_gene_tree( stree, duprate, lossrate) if len(locus_tree.leaves()) >= minsize: break # if n is a dict, update it with gene names from locus tree if isinstance(n, dict): n2 = {} for node, snode in locus_recon.iteritems(): n2[node.name] = n[snode.name] else: n2 = n # if leaf_counts is a dict, update it with gene names from locus tree # TODO: how to handle copy number polymorphism? if isinstance(leaf_counts, dict): leaf_counts2 = {} for node in locus_tree.leaves(): snode = locus_recon[node] leaf_counts2[node.name] = leaf_counts[snode.name] else: leaf_counts2 = leaf_counts if len(locus_tree.nodes) <= 1: # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # choose daughter duplications daughters = set() for node in locus_tree: if locus_events[node] == "dup": daughters.add(node.children[random.randint(0, 1)]) if reject: # use slow rejection sampling (for testing) coal_tree, coal_recon = sample_multilocus_tree_reject( locus_tree, n2, leaf_counts=leaf_counts2, daughters=daughters, namefunc=namefunc) else: coal_tree, coal_recon = sample_multilocus_tree( locus_tree, n2, leaf_counts=leaf_counts2, daughters=daughters, namefunc=namefunc) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: dlcoal.rename_nodes(coal_tree, name_internal) dlcoal.rename_nodes(locus_tree, name_internal) # store extra information extra = {"locus_tree": locus_tree, "locus_recon": locus_recon, "locus_events": locus_events, "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} return coal_tree, extra
def sample_dlcoal_hem(stree, n, duprate, lossrate, freq, freqdup, freqloss, steptime, namefunc=lambda x: x, keep_extinct=False, remove_single=True, name_internal="n", minsize=0): """Sample a gene tree from the DLCoal model with hemiplasy""" # generate the locus tree while True: locus_tree, locus_extras = sample_locus_tree_hem( stree, n, duprate, lossrate, freq, freqdup, freqloss, steptime, keep_extinct=keep_extinct) if len(locus_tree.leaves()) >= minsize: break if len(locus_tree.nodes) <= 1: # TODO: check 1 value # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # create new (expanded) locus tree logged_locus_tree, logged_extras = locus_to_logged_tree( locus_tree, popsize=n) daughters = logged_extras[0] pops = logged_extras[1] log_recon = logged_extras[2] #treelib.assert_tree(logged_locus_tree) # removed locus_tree_copy from below coal_tree, coal_recon = dlcoal.sim.sample_multilocus_tree( logged_locus_tree, n=pops, daughters=daughters, namefunc=lambda lognamex: log_recon[lognamex]+'_'+str(lognamex)) #print set(coal_tree) - set(coal_tree.postorder()) treelib.assert_tree(coal_tree) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: dlcoal.rename_nodes(coal_tree, name_internal) dlcoal.rename_nodes(locus_tree, name_internal) # store extra information extra = {"locus_tree": locus_tree, "locus_recon": locus_extras['recon'], "locus_events": locus_extras['events'], "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} if keep_extinct: extra["full_locus_tree"] = locus_extras["full_locus_tree"] return coal_tree, extra