Exemple #1
0
def sample_dlcoal_no_ifix(stree, n, freq, duprate, lossrate, freqdup, freqloss,\
                            forcetime, namefunc=lambda x: x, \
                            remove_single=True, name_internal="n", minsize=0):
    """Sample a gene tree from the DLCoal model using the new simulator"""

    # generate the locus tree
    while True:
        locus_tree, locus_extras = sim_DLILS_gene_tree(stree, n, freq, \
                                                        duprate, lossrate, \
                                                        freqdup, freqloss, \
                                                        forcetime)
        if len(locus_tree.leaves()) >= minsize:
            break

    if len(locus_tree.nodes) <= 1: # TODO: check 1 value
        # total extinction
        coal_tree = treelib.Tree()
        coal_tree.make_root()
        coal_recon = {coal_tree.root: locus_tree.root}
        daughters = set()
    else:
        # simulate coalescence
        
        # create new (expanded) locus tree
        logged_locus_tree, logged_extras = locus_to_logged_tree(locus_tree, popsize = n)
        daughters = logged_extras[0]
        pops = logged_extras[1]
        log_recon = logged_extras[2]
        
#        treelib.assert_tree(logged_locus_tree)
        
        # removed locus_tree_copy from below
        coal_tree, coal_recon = dlcoal.sample_locus_coal_tree(logged_locus_tree,
                                        n=pops, daughters=daughters,
                                        namefunc=lambda lognamex: log_recon[lognamex] + '_' + str(lognamex))

#        print set(coal_tree) - set(coal_tree.postorder())
        treelib.assert_tree(coal_tree)
    
        # clean up coal tree
        if remove_single:
            treelib.remove_single_children(coal_tree)
            phylo.subset_recon(coal_tree, coal_recon)


    if name_internal:
        dlcoal.rename_nodes(coal_tree, name_internal)
        dlcoal.rename_nodes(locus_tree, name_internal)


    # store extra information
    ### TODO: update this now that we're using logged locus tree, new sample function
    extra = {"locus_tree": locus_tree,
             "locus_recon": locus_extras['recon'],
             "locus_events": locus_extras['events'],
             "coal_tree": coal_tree,
             "coal_recon": coal_recon,
             "daughters": daughters}

    return coal_tree, extra
Exemple #2
0
def sample_dlcoal(stree, n, duprate, lossrate, namefunc=lambda x: x,
                  remove_single=True, name_internal="n",
                  minsize=0):
    """Sample a gene tree from the DLCoal model"""

    # generate the locus tree
    while True:
        locus_tree, locus_recon, locus_events = \
                    birthdeath.sample_birth_death_gene_tree(
            stree, duprate, lossrate)
        if len(locus_tree.leaves()) >= minsize:
            break

    if len(locus_tree.nodes) <= 1:
        # total extinction
        coal_tree = treelib.Tree()
        coal_tree.make_root()
        coal_recon = {coal_tree.root: locus_tree.root}
        daughters = set()
    else:
        # simulate coalescence
        
        # choose daughter duplications
        daughters = set()
        for node in locus_tree:
            if locus_events[node] == "dup":
                daughters.add(node.children[random.randint(0, 1)])

        coal_tree, coal_recon = sample_multicoal_tree(locus_tree, n,
                                                      daughters=daughters,
                                                      namefunc=namefunc)

        # clean up coal tree
        if remove_single:
            treelib.remove_single_children(coal_tree)
            phylo.subset_recon(coal_tree, coal_recon)

    if name_internal:
        rename_nodes(coal_tree, name_internal)
        rename_nodes(locus_tree, name_internal)


    # store extra information
    extra = {"locus_tree": locus_tree,
             "locus_recon": locus_recon,
             "locus_events": locus_events,
             "coal_tree": coal_tree,
             "coal_recon": coal_recon,
             "daughters": daughters}

    return coal_tree, extra
Exemple #3
0
    def eval_proposal(self, proposal):
        """Compute probability of proposal"""

        # compute recon probability
        phylo.add_implied_spec_nodes(proposal["locus_tree"], self.stree,
                                     proposal["locus_recon"],
                                     proposal["locus_events"])
        p = prob_dlcoal_recon_topology(self.coal_tree,
                                       proposal["coal_recon"],
                                       proposal["locus_tree"],
                                       proposal["locus_recon"],
                                       proposal["locus_events"],
                                       proposal["daughters"],
                                       self.stree, self.n,
                                       self.duprate, self.lossrate,
                                       self.pretime, self.premean,
                                       maxdoom=self.maxdoom,
                                       nsamples=self.nsamples,
                                       add_spec=False)
        treelib.remove_single_children(proposal["locus_tree"])
        phylo.subset_recon(proposal["locus_tree"], proposal["locus_recon"])

        return p
Exemple #4
0
def sample_dlcoal(stree, n, duprate, lossrate,
                  leaf_counts=None,
                  namefunc=lambda x: x,
                  remove_single=True, name_internal="n",
                  minsize=0, reject=False):
    """Sample a gene tree from the DLCoal model"""

    # generate the locus tree
    while True:
        # TODO: does this take a namefunc?
        locus_tree, locus_recon, locus_events = \
                    birthdeath.sample_birth_death_gene_tree(
                        stree, duprate, lossrate)
        if len(locus_tree.leaves()) >= minsize:
            break

    # if n is a dict, update it with gene names from locus tree
    if isinstance(n, dict):
        n2 = {}
        for node, snode in locus_recon.iteritems():
            n2[node.name] = n[snode.name]
    else:
        n2 = n

    # if leaf_counts is a dict, update it with gene names from locus tree
    # TODO: how to handle copy number polymorphism?
    if isinstance(leaf_counts, dict):
        leaf_counts2 = {}
        for node in locus_tree.leaves():
            snode = locus_recon[node]
            leaf_counts2[node.name] = leaf_counts[snode.name]
    else:
        leaf_counts2 = leaf_counts
        
    if len(locus_tree.nodes) <= 1:
        # total extinction
        coal_tree = treelib.Tree()
        coal_tree.make_root()
        coal_recon = {coal_tree.root: locus_tree.root}
        daughters = set()
    else:
        # simulate coalescence
        
        # choose daughter duplications
        daughters = set()
        for node in locus_tree:
            if locus_events[node] == "dup":
                daughters.add(node.children[random.randint(0, 1)])

        if reject:
            # use slow rejection sampling (for testing)
            coal_tree, coal_recon = sample_multilocus_tree_reject(
                locus_tree, n2, leaf_counts=leaf_counts2, daughters=daughters, namefunc=namefunc)
        else:
            coal_tree, coal_recon = sample_multilocus_tree(
                locus_tree, n2, leaf_counts=leaf_counts2, daughters=daughters, namefunc=namefunc)

        # clean up coal tree
        if remove_single:
            treelib.remove_single_children(coal_tree)
            phylo.subset_recon(coal_tree, coal_recon)

    if name_internal:
        dlcoal.rename_nodes(coal_tree, name_internal)
        dlcoal.rename_nodes(locus_tree, name_internal)

    # store extra information
    extra = {"locus_tree": locus_tree,
             "locus_recon": locus_recon,
             "locus_events": locus_events,
             "coal_tree": coal_tree,
             "coal_recon": coal_recon,
             "daughters": daughters}

    return coal_tree, extra
Exemple #5
0
def sample_dlcoal_hem(stree, n, duprate, lossrate,
                      freq, freqdup, freqloss, steptime,
                      namefunc=lambda x: x,
                      keep_extinct=False,
                      remove_single=True,
                      name_internal="n", minsize=0):
    """Sample a gene tree from the DLCoal model with hemiplasy"""

    # generate the locus tree
    while True:
        locus_tree, locus_extras = sample_locus_tree_hem(
            stree, n, duprate, lossrate, 
            freq, freqdup, freqloss, steptime,
            keep_extinct=keep_extinct)
        if len(locus_tree.leaves()) >= minsize:
            break

    if len(locus_tree.nodes) <= 1: # TODO: check 1 value
        # total extinction
        coal_tree = treelib.Tree()
        coal_tree.make_root()
        coal_recon = {coal_tree.root: locus_tree.root}
        daughters = set()
    else:
        # simulate coalescence
        
        # create new (expanded) locus tree
        logged_locus_tree, logged_extras = locus_to_logged_tree(
            locus_tree, popsize=n)
        daughters = logged_extras[0]
        pops = logged_extras[1]
        log_recon = logged_extras[2]
        
        #treelib.assert_tree(logged_locus_tree)
        
        # removed locus_tree_copy from below
        coal_tree, coal_recon = dlcoal.sim.sample_multilocus_tree(
            logged_locus_tree, n=pops, daughters=daughters,
            namefunc=lambda lognamex: log_recon[lognamex]+'_'+str(lognamex))

        #print set(coal_tree) - set(coal_tree.postorder())
        treelib.assert_tree(coal_tree)
    
        # clean up coal tree
        if remove_single:
            treelib.remove_single_children(coal_tree)
            phylo.subset_recon(coal_tree, coal_recon)


    if name_internal:
        dlcoal.rename_nodes(coal_tree, name_internal)
        dlcoal.rename_nodes(locus_tree, name_internal)


    # store extra information
    extra = {"locus_tree": locus_tree,
             "locus_recon": locus_extras['recon'],
             "locus_events": locus_extras['events'],
             "coal_tree": coal_tree,
             "coal_recon": coal_recon,
             "daughters": daughters}

    if keep_extinct:
        extra["full_locus_tree"] = locus_extras["full_locus_tree"]

    return coal_tree, extra