Example #1
0
def test_dlcoal_tree(locus_tree, n, daughters, nsamples):
    """test multicoal_tree"""
    tops = {}
    
    for i in xrange(nsamples):
        if i % (nsamples // 100) == 0:
                print i

        coal_tree, coal_recon = dlcoal.sample_locus_coal_tree(
            locus_tree, n, leaf_counts=None,
            daughters=daughters,
            namefunc=lambda x: x)
        
        top = phylo.hash_tree(coal_tree)
        tops.setdefault(top, [0, coal_tree, coal_recon])[0] += 1
    
    tab = Table(headers=["top", "simple_top", "percent", "prob"])
    for top, (num, tree, recon) in tops.items():
        tree2 = tree.copy()
        treelib.remove_single_children(tree2)
        tab.add(top=top,
                simple_top=phylo.hash_tree(tree2),
                percent=num/float(nsamples),
                prob=exp(dlcoal.prob_locus_coal_recon_topology(
            tree, recon, locus_tree, n, daughters)))
    tab.sort(col="prob", reverse=True)

    return tab, tops
Example #2
0
def sample_dlcoal_no_ifix(stree, n, freq, duprate, lossrate, freqdup, freqloss,\
                            forcetime, namefunc=lambda x: x, \
                            remove_single=True, name_internal="n", minsize=0):
    """Sample a gene tree from the DLCoal model using the new simulator"""

    # generate the locus tree
    while True:
        locus_tree, locus_extras = sim_DLILS_gene_tree(stree, n, freq, \
                                                        duprate, lossrate, \
                                                        freqdup, freqloss, \
                                                        forcetime)
        if len(locus_tree.leaves()) >= minsize:
            break

    if len(locus_tree.nodes) <= 1: # TODO: check 1 value
        # total extinction
        coal_tree = treelib.Tree()
        coal_tree.make_root()
        coal_recon = {coal_tree.root: locus_tree.root}
        daughters = set()
    else:
        # simulate coalescence
        
        # create new (expanded) locus tree
        logged_locus_tree, logged_extras = locus_to_logged_tree(locus_tree, popsize = n)
        daughters = logged_extras[0]
        pops = logged_extras[1]
        log_recon = logged_extras[2]
        
#        treelib.assert_tree(logged_locus_tree)
        
        # removed locus_tree_copy from below
        coal_tree, coal_recon = dlcoal.sample_locus_coal_tree(logged_locus_tree,
                                        n=pops, daughters=daughters,
                                        namefunc=lambda lognamex: log_recon[lognamex] + '_' + str(lognamex))

#        print set(coal_tree) - set(coal_tree.postorder())
        treelib.assert_tree(coal_tree)
    
        # clean up coal tree
        if remove_single:
            treelib.remove_single_children(coal_tree)
            phylo.subset_recon(coal_tree, coal_recon)


    if name_internal:
        dlcoal.rename_nodes(coal_tree, name_internal)
        dlcoal.rename_nodes(locus_tree, name_internal)


    # store extra information
    ### TODO: update this now that we're using logged locus tree, new sample function
    extra = {"locus_tree": locus_tree,
             "locus_recon": locus_extras['recon'],
             "locus_events": locus_extras['events'],
             "coal_tree": coal_tree,
             "coal_recon": coal_recon,
             "daughters": daughters}

    return coal_tree, extra
Example #3
0
def debug_test3():
    stree = treelib.read_tree('examples/nbin.stree') # run from ../ of this directory
    for node in stree:
        node.dist *= 1e7 # gen per myr
    popsize = 2e7
    freq = 1e0
    dr = .0000012 / 1e7 #.0012/1e7
    lr = .0000011 / 1e7 #.0006/1e7
    freqdup = freqloss = .05
    forcetime = 1e7
    
    for node in stree:
        print node.name, node.dist, len(node.children)
    print
    
    locus_tree, locus_extras = sim_DLILS_gene_tree(stree, popsize, freq, \
                                                        dr, lr, \
                                                        freqdup, freqloss, \
                                                        forcetime)
    
    for node in locus_tree:
        print node.name, node.dist, len(node.children)
    print
    
    logged_locus_tree, logged_extras = locus_to_logged_tree(locus_tree, popsize)
    daughters = logged_extras[0]
    pops = logged_extras[1]
    
    coal_tree, coal_recon = dlcoal.sample_locus_coal_tree(logged_locus_tree,
                                    n=pops, daughters=daughters,
                                    namefunc=lambda x: logged_extras[2][x] + '_' + str(x))
    
    #begin debug
    print coal_tree.leaf_names()
    try:
#        print set(coal_tree) - set(coal_tree.postorder())
        treelib.assert_tree(coal_tree)
    except AssertionError:
        print 'assertion error thrown on coal_tree being a proper tree'
        from rasmus import util
        hd= util.hist_dict(x.name for x in coal_tree.postorder())
        for key in hd.keys():
            print key if hd[key]>1 else '',
        print
        print len(coal_tree.nodes) - len(list(coal_tree.postorder()))