예제 #1
0
    def test_top(self):

        outdir = 'test/tmp/test_coal/BMC_test_top/'
        make_clean_dir(outdir)

        stree = treelib.parse_newick(
            "(((A:200, E:200):800, B:1000):500, (C:700, D:700):800);")
        n = 500
        T = 2000
        nsamples = 4000

        # compare top hist with simpler rejection sampling
        tops = {}
        tops2 = {}

        for i in xrange(nsamples):
            # use rejection sampling
            tree, recon = coal.sample_bounded_multicoal_tree_reject(
                stree, n, T, namefunc=lambda x: x)

            # sample tree
            tree2, recon2 = coal.sample_bounded_multicoal_tree(
                stree, n, T, namefunc=lambda x: x)

            top = phylo.hash_tree(tree)
            top2 = phylo.hash_tree(tree2)

            tops.setdefault(top, [0, tree, recon])[0] += 1
            tops.setdefault(top2, [0, tree2, recon2])

            tops2.setdefault(top2, [0, tree2, recon2])[0] += 1
            tops2.setdefault(top, [0, tree, recon])

        keys = tops.keys()
        x = [safelog(tops[i][0], default=0) for i in keys]
        y = [safelog(tops2[i][0], default=0) for i in keys]

        self.assertTrue(stats.corr(x, y) > .9)

        p = Gnuplot()
        p.enableOutput(False)
        p.plot(x, y)
        p.plot([min(x), max(x)], [min(x), max(x)], style="lines")
        p.enableOutput(True)
        p.save(outdir + 'plot.png')
예제 #2
0
    def walk(node):
        if node.is_leaf():
            doomtable[nodelookup[node]] = -util.INF
        else:
            for child in node.children:
                walk(child)

            i = nodelookup[node]
            p = 1.0
            for child in node:
                p *= sum(
                    birthdeath.prob_birth_death1(d, child.dist, birth, death) * exp(doomtable[nodelookup[child]]) ** d
                    for d in range(0, maxdoom + 1)
                )
            doomtable[i] = util.safelog(p, e, -util.INF)
예제 #3
0
    def walk(node):
        if node.is_leaf():
            doomtable[nodelookup[node]] = -util.INF
        else:
            for child in node.children:
                walk(child)

            i = nodelookup[node]
            p = 1.0
            for child in node:
                p *= sum(
                    birthdeath.prob_birth_death1(d, child.dist, birth, death) *
                    exp(doomtable[nodelookup[child]])**d
                    for d in range(0, maxdoom + 1))
            doomtable[i] = util.safelog(p, e, -util.INF)
예제 #4
0
def prob_locus_coal_recon_topology_samples(
        coal_tree, coal_recon,
        locus_tree, locus_recon, locus_events, popsizes,
        stree, stimes,
        daughters, duprate, lossrate, nsamples,
        pretime=None, premean=None):
    
    if dlcoalc:
        # sample some reason branch lengths just for logging purposes
        locus_times = duploss.sample_dup_times(
                locus_tree, stree, locus_recon, duprate, lossrate, pretime,
                premean,
                events=locus_events)
        treelib.set_dists_from_timestamps(locus_tree, locus_times)

        # use C code
        return coal.prob_locus_coal_recon_topology_samples(
            coal_tree, coal_recon,
            locus_tree, locus_recon, locus_events, popsizes,
            stree, stimes,
            daughters, duprate, lossrate, nsamples, pretime, premean)
    else:
        # python backup    
        prob = 0.0
        for i in xrange(nsamples):
            # sample duplication times
            locus_times = duploss.sample_dup_times(
                locus_tree, stree, locus_recon, duprate, lossrate, pretime,
                premean,
                events=locus_events)
            treelib.set_dists_from_timestamps(locus_tree, locus_times)

            # coal topology probability
            coal_prob = prob_locus_coal_recon_topology(
                coal_tree, coal_recon, locus_tree, popsizes, daughters)
            
            prob += exp(coal_prob)
        prob = util.safelog(prob / nsamples)

        return prob
예제 #5
0
def prob_coal_recon_topology(tree, recon, locus_tree, n, daughters):
    """
    Returns the log probability of a reconciled gene tree ('tree', 'recon')
    from the coalescent model given a locus_tree 'locus_tree',
    population sizes 'n', and daughters set 'daughters'
    """

    # init population sizes
    popsizes = coal.init_popsizes(locus_tree, n)

    # log probability
    lnp = 0.0

    nodes = set(tree.postorder())

    # init reverse reconciliation
    rev_recon = {}
    for node, snode in recon.iteritems():
        if node not in nodes:
            raise Exception("node '%s' not in tree" % node.name)
        rev_recon.setdefault(snode, []).append(node)

    # init lineage counts
    lineages = {}
    for snode in locus_tree:
        if snode.is_leaf():
            lineages[snode] = len([x for x in rev_recon[snode]
                                   if x.is_leaf()])
        else:
            lineages[snode] = 0

    # iterate through species tree branches
    for snode in locus_tree.postorder():
        if snode.parent:
            # non root branch
            u = lineages[snode]

            # subtract number of coals in branch
            v = u - len([x for x in rev_recon.get(snode, [])
                         if not x.is_leaf()])            
            lineages[snode.parent] += v

            if snode not in daughters:
                try:
                    lnp += util.safelog(
                        coal.prob_coal_counts(u, v, snode.dist,
                                              popsizes[snode.name]))
                except:
                    print u, v, snode.dist, popsizes[snode.name]
                    raise
            else:
                assert v == 1
            lnp -= util.safelog(coal.num_labeled_histories(u, v))
        else:
            # normal coalesent
            u = lineages[snode]
            lnp -= util.safelog(coal.num_labeled_histories(u, 1))

    
    # correct for topologies H(T)
    # find connected subtrees that are in the same species branch
    subtrees = []
    subtree_root = {}
    for node in tree.preorder():
        if node.parent and recon[node] == recon[node.parent]:
            subtree_root[node] = subtree_root[node.parent]
        else:
            subtrees.append(node)
            subtree_root[node] = node

    # find leaves through recursion
    def walk(node, subtree, leaves):
        if node.is_leaf():
            leaves.append(node)
        elif (subtree_root[node.children[0]] != subtree and
              subtree_root[node.children[1]] != subtree):
            leaves.append(node)
        else:
            for child in node.children:
                walk(child, subtree, leaves)

    # apply correction for each subtree
    for subtree in subtrees:
        leaves = []
        for child in subtree.children:
            walk(subtree, subtree, leaves)
        if len(leaves) > 2:
            lnp += util.safelog(
                birthdeath.num_topology_histories(subtree, leaves))

    return lnp
예제 #6
0
def prob_dlcoal_recon_topology(coal_tree, coal_recon,
                               locus_tree, locus_recon, locus_events,
                               daughters,
                               stree, n, duprate, lossrate,
                               pretime=None, premean=None,
                               maxdoom=20, nsamples=100,
                               add_spec=True):
    """
    Probability of a reconcile gene tree in the DLCoal model.

    coal_tree    -- coalescent tree
    coal_recon   -- reconciliation of coalescent tree to locus tree
    locus_tree   -- locus tree (has dup-loss)
    locus_recon  -- reconciliation of locus tree to species tree
    locus_events -- events dict for locus tree
    stree        -- species tree
    n            -- population sizes in species tree
    duprate      -- duplication rate
    lossrate     -- loss rate

    You must also specify one of the following
    pretime      -- starting time before species tree
    premean      -- mean starting time before species tree

    Note: locus tree must have implied speciation nodes present
    """

    dups = phylo.count_dup(locus_tree, locus_events)

    # ensure implicit speciations are present
    if add_spec:
        phylo.add_implied_spec_nodes(locus_tree, stree,
                                     locus_recon, locus_events)
    
    # init popsizes for locus tree
    stree_popsizes = coal.init_popsizes(stree, n)
    popsizes = {}
    for node in locus_tree:
        popsizes[node.name] = stree_popsizes[locus_recon[node].name]


    # duploss probability

    util.tic("top")
    dl_prob = spidir.calc_birth_death_prior(locus_tree, stree, locus_recon,
                                            duprate, lossrate,
                                            maxdoom=maxdoom)
    util.toc()
    
    # daughters probability
    d_prob = dups * log(.5)


    # integrate over duplication times using sampling
    prob = 0.0
    #util.tic("int")
    for i in xrange(nsamples):
        # sample duplication times

        locus_times = spidir.topology_prior.sample_dup_times(
            locus_tree, stree, locus_recon, duprate, lossrate, pretime,
            premean,
            events=locus_events)
        assert len(locus_times) == len(locus_tree.nodes), (
            len(locus_times), len(locus_tree.nodes))
        treelib.set_dists_from_timestamps(locus_tree, locus_times)

        # coal topology probability
        coal_prob = prob_coal_recon_topology(coal_tree, coal_recon,
                                             locus_tree, popsizes, daughters)
        
        prob += exp(coal_prob)
        print coal_prob
    #util.toc()

    return dl_prob + d_prob + util.safelog(prob / nsamples)