예제 #1
0
파일: cluster.py 프로젝트: xtmgah/SVclone
def calc_lik_with_clonal(combo, si, di, phi_i, pi, ni):

    # calculate with given phi
    # (lls currently uses precision fudge factor to get around 0 probability errors when pv = 1)
    pvs = np.array([get_pv(phi_i, c, pi, ni) for c in combo])
    lls = np.array([pm.binomial_like(si, di, pvs[i]) for i,c in enumerate(combo)])-0.00000001

    # calculate with clonal phi
    pvs_cl = np.array([get_pv(np.array(1), c, pi, ni) for c in combo])
    lls_cl = np.array([pm.binomial_like(si, di, pvs_cl[i]) for i,c in enumerate(combo)])-0.00000001

    return np.array([[pvs, lls], [pvs_cl, lls_cl]])
예제 #2
0
    def test_withzeros(self):
        "Makes sure no NaN's happen when some probabilities are zero."
        q = np.zeros(npix)
        q[0]=.99

        # Log-distribution of number of pixels positive.        
        lpf = anopheles.utils.ubl(q)
        lpp = unequal_binomial_lp(q)
        pp = np.exp(lpp)
        pf = np.exp(lpf)
        assert_equal(pp,pf)

        lpb = anopheles.utils.bin_ubl(3,8,.1,q)

        assert(not np.isnan(lpb))
        assert(not np.any(np.isnan(pf)))
        
        # Binomial mixture from Fortran.
        pbf = np.exp([anopheles.utils.bin_ubl(x,n_obs,prob_detect,q) for x in xrange(n_obs+1)])
        
        # Do binomial mixture by hand.
        pbp = np.zeros(n_obs+1)
        for i in xrange(npix+1):
            pbp+=np.exp([pm.binomial_like(x,8,prob_detect*float(i)/npix)+lpf[i] for x in xrange(n_obs+1)])
            
        assert_almost_equal(pbf,pbp)
예제 #3
0
def obs(pi=pi, phi=phi):
    logp = pl.log(1 - phi) * num_nonzeros + mc.binomial_like(
        r[nonzeros] * n[nonzeros], n[nonzeros], pi[nonzeros])
    for n_i in n[~nonzeros]:
        logp += pl.log(phi + (1 - phi) *
                       pl.exp(pl.log(1 - pi[~nonzeros]) * n[~nonzeros])).sum()
    return logp
예제 #4
0
파일: model.py 프로젝트: apatil/worms
 def N_pos_now(
     value=pm.utils.round_array(pos[this_slice]),
     n=pm.utils.round_array(pos[this_slice] + neg[this_slice]),
     eps_p_f=eps_p_f_now,
     a1=a1,
     a2=a2,
 ):
     return pm.binomial_like(value, n=n, p=pm.flib.stukel_invlogit(eps_p_f, a1, a2))
예제 #5
0
    def test_binomial_case(self):
        """Checks for correspondence with the binomial distribution in the case of equal
        presence probabilities."""

        q = np.ones(5)*.2
        lpf, lpp, pbf, pbp = standard_things(q)

        # In this case you can compute the log-p of number of pixels positive directly.
        lpo = np.array([pm.binomial_like(x,npix,q[0]) for x in range(npix+1)])    

        assert_almost_equal(lpf, lpo)
        assert_almost_equal(lpp, lpo)
예제 #6
0
def standard_things(q,prob_detect=prob_detect,n_obs=n_obs):
    
    npix=len(q)
    
    # Log-distribution of number of pixels positive.        
    lpf = anopheles.utils.ubl(q)
    lpp = unequal_binomial_lp(q)
    assert_equal(lpf, lpp)
    
    # Binomial mixture from Fortran.
    pbf = np.exp([anopheles.utils.bin_ubl(x,n_obs,prob_detect,q) for x in xrange(n_obs+1)])

    # Do binomial mixture by hand.
    pbp = np.zeros(n_obs+1)
    for i in xrange(npix+1):
        pbp+=np.exp([pm.binomial_like(x,n_obs,prob_detect*float(i)/npix)+lpf[i] for x in xrange(n_obs+1)])

    assert_almost_equal(pbf,pbp)
    
    return lpf, lpp, pbf, pbp
예제 #7
0
 def like(self,
          fdia=256,
          D=14.0**2 * pi,
          r=0.078,
          x0=29.7,
          y0=91.7,
          oo=-9.0,
          pe=0.414,
          dinvasion=296.4,
          Kbaldio=0.0029,
          Dbaldio=14.0**2 * pi,
          per=0.0,
          mxr=1.0,
          vparams=False,
          hilosgpu=32,
          v=0.0024,
          vermapa=False):
     #print fdia, D, r, x0, y0, oo, pe, dinvasion, Kbaldio, Dbaldio, per, mxr, hilosgpu, v,
     self.simula(fdia,
                 D,
                 r,
                 x0,
                 y0,
                 oo,
                 pe,
                 dinvasion,
                 Kbaldio,
                 Dbaldio,
                 per,
                 mxr,
                 hilosgpu=hilosgpu,
                 v=v,
                 vermapa=vermapa)
     l0 = binomial_like(self.mosquitos[:, 4], 1.,
                        clip(self.esperados, 1e-6, 1 - 1e-6))
     if isnan(l0): l0 = -1e9
     elif isinf(l0): l0 = -1e9
     self.likelihood = l0
     return l0
예제 #8
0
파일: cluster.py 프로젝트: xtmgah/SVclone
def calc_lik(combo, si, di, phi_i, pi, ni):
    pvs = np.array([get_pv(phi_i, c, pi, ni) for c in combo])
    lls = np.array([pm.binomial_like(si, di, pvs[i]) for i,c in enumerate(combo)])-0.00000001
    return np.array([pvs, lls])
예제 #9
0
 def data_vivax(value = vivax_pos[where_vivax], splrep = None, p = p_vivax, n = np.sum(cur_obs,axis=1)):
     return pm.binomial_like(x=value, n=n, p=p)
예제 #10
0
def obs(pi=pi):
    return mc.binomial_like(r*n, n, pi)
예제 #11
0
pop_C_prev = pop_C_k.stats()["quantiles"][50] / float(pop_C_N)
pop_C_prev_per_1000 = "%.0f" % (pop_C_prev * 1000)
print pop_C_prev_per_1000

pop_C_ui = pop_C_k.stats()["95% HPD interval"] / float(pop_C_N)
pop_C_ui_per_1000 = "[%.0f, %.0f]" % tuple(pop_C_ui * 1000)
print pop_C_ui_per_1000


### @export 'binomial-model-ppc'
r = pl.array(schiz["r"])
n = pl.array(schiz["n"], dtype=int)
k = r * n

pi = mc.Uninformative("pi", value=0.5)
mc.binomial_like(k, n, pi)


@mc.potential
def obs(pi=pi):
    return mc.binomial_like(k, n, pi)


@mc.deterministic
def pred(pi=pi):
    return mc.rbinomial(n, pi)


mc.MCMC([pi, obs, pred]).sample(20000, 10000, 10, verbose=False, progress_bar=False)

pl.figure(**book_graphics.quarter_page_params)
예제 #12
0
def survey_likelihood(sp_sub, survey_plan, data, i, a1, a2):
    data_ = np.ones_like(sp_sub)*data[i]
    return pm.binomial_like(data_, survey_plan.n[i], pm.stukel_invlogit(sp_sub, a1, a2))
예제 #13
0
pop_C_prev = pop_C_k.stats()['quantiles'][50] / float(pop_C_N)
pop_C_prev_per_1000 = '%.0f' % (pop_C_prev * 1000)
print pop_C_prev_per_1000

pop_C_ui = pop_C_k.stats()['95% HPD interval'] / float(pop_C_N)
pop_C_ui_per_1000 = '[%.0f, %.0f]' % tuple(pop_C_ui * 1000)
print pop_C_ui_per_1000

### @export 'binomial-model-ppc'
r = pl.array(schiz['r'])
n = pl.array(schiz['n'], dtype=int)
k = r * n

pi = mc.Uninformative('pi', value=.5)
mc.binomial_like(k, n, pi)


@mc.potential
def obs(pi=pi):
    return mc.binomial_like(k, n, pi)


@mc.deterministic
def pred(pi=pi):
    return mc.rbinomial(n, pi)


mc.MCMC([pi, obs, pred]).sample(20000,
                                10000,
                                10,
예제 #14
0
 def p_obs(value=p, pi=pi, n=n):
     return mc.binomial_like(value * n, n, pi + 1.0e-9)
예제 #15
0
 def p_obs(value=p, pi=pi, n=n):
     return mc.binomial_like(value * n, n, pi + 1.e-9)
예제 #16
0
파일: model.py 프로젝트: apatil/dufvax
 def d_now(value = vivax_pos[i], splrep = splreps[i_vivax], p = p, n = np.sum(cur_obs)):
     return pm.binomial_like(x=value, n=n, p=p)
예제 #17
0
r = k/n

iter = 20000
burn = 10000
thin = 10
results = {}
xmax = .07

### @export 'distribution-comparison'
pl.figure(**book_graphics.quarter_page_params)

ax = pl.axes([.1, .3, .85, .65])
x = pl.arange(0, n_small*pi_true*4, .1)

# plot binomial distribution
y1 = [pl.exp(mc.binomial_like(x_i, n_small, pi_true)) for x_i in x]
pl.step(x, y1, 'k',
        linewidth=1, linestyle='step:', alpha=.8,
        label='Binomial')

# plot poisson distribution
y2 = [pl.exp(mc.poisson_like(x_i, n_small*pi_true)) for x_i in x]
pl.plot(x, y2, 'k',
        linewidth=1, linestyle='steps--', alpha=.8,
        label='Poisson')

pl.legend(loc='upper right', fancybox=True, shadow=True)
pl.yticks([0, .05])
pl.xticks([25, 50, 75], ['','',''])
pl.axis([-.1, n_small*pi_true*4, -.02, 1.1*max(y1)])
pl.xlabel('Count')
예제 #18
0
 def data_vivax(value=vivax_pos[where_vivax], splrep=None, p=p_vivax, n=np.sum(cur_obs, axis=1)):
     return pm.binomial_like(x=value, n=n, p=p)
예제 #19
0
r = k / n

iter = 20000
burn = 10000
thin = 10
results = {}
xmax = .07

### @export 'distribution-comparison'
pl.figure(**book_graphics.quarter_page_params)

ax = pl.axes([.1, .3, .85, .65])
x = pl.arange(0, n_small * pi_true * 4, .1)

# plot binomial distribution
y1 = [pl.exp(mc.binomial_like(x_i, n_small, pi_true)) for x_i in x]
pl.step(x, y1, 'k', linewidth=1, linestyle='step:', alpha=.8, label='Binomial')

# plot poisson distribution
y2 = [pl.exp(mc.poisson_like(x_i, n_small * pi_true)) for x_i in x]
pl.plot(x,
        y2,
        'k',
        linewidth=1,
        linestyle='steps--',
        alpha=.8,
        label='Poisson')

pl.legend(loc='upper right', fancybox=True, shadow=True)
pl.yticks([0, .05])
pl.xticks([25, 50, 75], ['', '', ''])
예제 #20
0
파일: __init__.py 프로젝트: apatil/worms
def survey_likelihood(x, survey_plan, data, i):
    data_ = np.ones_like(x)*data[i]
    return pm.binomial_like(data_, survey_plan.n[i], pm.invlogit(x))
예제 #21
0
 def p_obs(value=p, pi=pi_latent, n=n):
     pi_flat = pl.array(pi)
     return mc.binomial_like((value * n)[i_nonzero], n[i_nonzero],
                             pi_flat[i_nonzero])
def obs(value=r, n=n, logit_p=logit_p):
    return mc.binomial_like(r, n, mc.invlogit(logit_p))
예제 #23
0
 def p_obs(value=p, pi=pi_latent, n=n):
     pi_flat = pl.array(pi)
     return mc.binomial_like((value * n)[i_nonzero], n[i_nonzero], pi_flat[i_nonzero])
예제 #24
0
파일: run_clus.py 프로젝트: xtmgah/SVclone
def post_process_clusters(mcmc, sv_df, snv_df, clus_out_dir, sup, dep, norm,
                          cn_states, sparams, cparams, output_params, map_):

    merge_clusts = cparams['merge_clusts']
    subclone_diff = cparams['subclone_diff']
    phi_limit = cparams['phi_limit']
    merge_clusts = cparams['merge_clusts']
    cnv_pval = cparams['clonal_cnv_pval']
    hpd_alpha = cparams['hpd_alpha']
    adjust_phis = cparams['adjust_phis']
    clus_penalty = output_params['cluster_penalty']
    smc_het = output_params['smc_het']
    plot = output_params['plot']

    try:
        sv_df = sv_df[sv_df.classification.values != 'SIMU_SV']
    except AttributeError:
        pass
    npoints = len(snv_df) + len(sv_df)
    sup, dep, norm, cn_states = sup[:
                                    npoints], dep[:
                                                  npoints], norm[:
                                                                 npoints], cn_states[:
                                                                                     npoints]

    z_trace = mcmc.trace('z')[:]

    # assign points to highest probability cluster
    clus_counts = [np.bincount(z_trace[:, i]) for i in range(npoints)]
    clus_max_prob = [index_max(c) for c in clus_counts]
    clus_mp_counts = np.bincount(clus_max_prob)
    clus_idx = np.nonzero(clus_mp_counts)[0]
    clus_mp_counts = clus_mp_counts[clus_idx]

    # cluster distribution
    clus_info = pd.DataFrame(clus_idx, columns=['clus_id'])
    clus_info['size'] = clus_mp_counts

    if len(clus_info) < 1:
        print(
            "Warning! Could not converge on any major SV clusters. Skipping.\n"
        )
        return None

    center_trace = mcmc.trace("phi_k")[:]
    phis = np.array([
        mean_confidence_interval(center_trace[:, cid], hpd_alpha)
        for cid in clus_idx
    ])
    original_phis = phis.copy()
    adjusted_phis = get_adjusted_phis(clus_info, center_trace, cparams)

    hpd_lo = '_'.join([str(int(100 - (100 * hpd_alpha))), 'HPD', 'lo'])
    hpd_hi = '_'.join([str(int(100 - (100 * hpd_alpha))), 'HPD', 'hi'])

    phis = adjusted_phis if adjust_phis else phis
    clus_info['phi'] = phis[:, 0]
    clus_info[hpd_lo] = phis[:, 1]
    clus_info[hpd_hi] = phis[:, 2]

    if adjust_phis:
        clus_info['phi_unadjusted'] = original_phis[:, 0]
        clus_info['%s_unadjusted' % hpd_lo] = original_phis[:, 1]
        clus_info['%s_unadjusted' % hpd_hi] = original_phis[:, 2]
    else:
        clus_info['phi_adjusted'] = adjusted_phis[:, 0]
        clus_info['%s_adjusted' % hpd_lo] = adjusted_phis[:, 1]
        clus_info['%s_adjusted' % hpd_hi] = adjusted_phis[:, 2]

    clus_ids = clus_info.clus_id.values
    clus_members = np.array(
        [np.where(np.array(clus_max_prob) == i)[0] for i in clus_ids])

    col_names = map(lambda x: 'cluster' + str(x), clus_ids)
    df_probs = pd.DataFrame(clus_counts, dtype=float)[clus_ids].fillna(0)
    df_probs = df_probs.apply(lambda x: x / sum(x), axis=1)
    df_probs.columns = col_names

    # cluster certainty
    clus_max_df = pd.DataFrame(clus_max_prob,
                               columns=['most_likely_assignment'])
    phi_cols = ["average_ccf", hpd_lo, hpd_hi]
    phi_matrix = pd.DataFrame(phis[:], index=clus_ids,
                              columns=phi_cols).loc[clus_max_prob]
    phi_matrix.index = range(len(phi_matrix))
    ccert = clus_max_df.join(phi_matrix)
    clus_info.index = range(len(clus_info))

    print('\n\n')
    print(clus_info[['clus_id', 'size', 'phi']])
    print('Compiling and writing output...')

    dump_out_dir = clus_out_dir
    if len(snv_df) > 0 and len(sv_df) == 0:
        # snvs only trace output
        dump_out_dir = '%s/snvs' % clus_out_dir
    trace_out = '%s/' % (dump_out_dir)
    write_output.dump_trace(center_trace, trace_out + 'phi_trace.txt')
    write_output.dump_trace(z_trace, trace_out + 'z_trace.txt')

    try:
        alpha_trace = mcmc.trace('alpha')[:]
        write_output.dump_trace(alpha_trace, trace_out + 'alpha_trace.txt')
    except KeyError:
        pass

    # cluster plotting
    if plot:
        plot_clusters(mcmc.trace, clus_idx, clus_max_prob, sup, dep,
                      clus_out_dir, cparams)

    # merge clusters
    if len(clus_info) > 1 and merge_clusts:
        clus_merged = pd.DataFrame(columns=clus_info.columns,
                                   index=clus_info.index)
        clus_merged, clus_members, merged_ids  = merge_clusters(clus_out_dir,clus_info,clus_merged,\
                clus_members,[],sup,dep,norm,cn_states,sparams,cparams)

        if len(clus_merged) != len(clus_info):
            clus_info = clus_merged
            df_probs, ccert = merge_results(clus_merged, merged_ids, df_probs,
                                            ccert)

    snv_probs = pd.DataFrame()
    snv_ccert = pd.DataFrame()
    snv_members = np.empty(0)

    z_phi = get_per_variant_phi(z_trace, center_trace)

    # compile run fit statistics
    run_fit = pd.DataFrame()
    if map_ is not None:
        nclus = len(clus_info)
        # bic = -2 * map_.lnL + (1 + npoints + nclus * 2) + (nclus * clus_penalty) * np.log(npoints)
        phis = ccert.average_ccf.values
        cns, pvs = cluster.get_most_likely_cn_states(cn_states, sup, dep, phis,
                                                     sparams['pi'], cnv_pval,
                                                     norm)
        lls = []
        for si, di, pvi in zip(sup, dep, pvs):
            lls.append(pm.binomial_like(si, di, pvi))
        svc_ic = -2 * np.sum(lls) + (npoints +
                                     nclus * clus_penalty) * np.log(npoints)

        run_fit = pd.DataFrame([['svc_IC', svc_ic], ['BIC', map_.BIC],
                                ['AIC', map_.AIC], ['AICc', map_.AICc],
                                ['lnL', map_.lnL], ['logp', map_.logp],
                                ['logp_at_max', map_.logp_at_max],
                                ['param_len', map_.len],
                                ['data_len', map_.data_len]])

    if len(snv_df) > 0:
        snv_pos = ['chrom', 'pos']
        snv_probs = df_probs.loc[:len(snv_df) - 1]
        snv_probs = snv_df[snv_pos].join(snv_probs)

        snv_ccert = ccert.loc[:len(snv_df) - 1]
        snv_ccert = snv_df[snv_pos].join(snv_ccert)

        snv_max_probs = np.array(clus_max_prob)[:len(snv_df)]
        snv_members = np.array(
            [np.where(np.array(snv_max_probs) == i)[0] for i in clus_ids])

        snv_sup = sup[:len(snv_df)]
        snv_dep = dep[:len(snv_df)]
        snv_norm = norm[:len(snv_df)]
        snv_cn_states = cn_states[:len(snv_df)]
        snv_z_phi = z_phi[:len(snv_df)]
        write_output.write_out_files(snv_df,
                                     clus_info.copy(),
                                     snv_members,
                                     snv_probs,
                                     snv_ccert,
                                     clus_out_dir,
                                     sparams['sample'],
                                     sparams['pi'],
                                     snv_sup,
                                     snv_dep,
                                     snv_norm,
                                     snv_cn_states,
                                     run_fit,
                                     smc_het,
                                     cnv_pval,
                                     snv_z_phi,
                                     are_snvs=True)

    sv_probs = pd.DataFrame()
    sv_ccert = pd.DataFrame()
    sv_members = np.empty(0)
    if len(sv_df) > 0:
        lb = len(snv_df) if len(snv_df) > 0 else 0

        sv_pos = ['chr1', 'pos1', 'dir1', 'chr2', 'pos2', 'dir2']
        sv_probs = df_probs.loc[lb:lb + len(sv_df) - 1]
        sv_probs.index = sv_df.index
        sv_probs = sv_df[sv_pos].join(sv_probs)

        sv_ccert = ccert.loc[lb:lb + len(sv_df) - 1]
        sv_ccert.index = sv_df.index
        sv_ccert = sv_df[sv_pos].join(sv_ccert)

        sv_max_probs = np.array(clus_max_prob)[:len(sv_df)]
        sv_members = np.array(
            [np.where(np.array(sv_max_probs) == i)[0] for i in clus_ids])

        sv_sup = sup[lb:lb + len(sv_df)]
        sv_dep = dep[lb:lb + len(sv_df)]
        sv_norm = norm[lb:lb + len(sv_df)]
        sv_cn_states = cn_states[lb:lb + len(sv_df)]
        sv_z_phi = z_phi[lb:lb + len(sv_df)]
        write_output.write_out_files(sv_df, clus_info.copy(), sv_members,
                                     sv_probs, sv_ccert, clus_out_dir,
                                     sparams['sample'], sparams['pi'], sv_sup,
                                     sv_dep, sv_norm, sv_cn_states, run_fit,
                                     smc_het, cnv_pval, sv_z_phi)
예제 #25
0
def obs(pi=pi):
    return mc.binomial_like(k, n, pi)
예제 #26
0
파일: zero_forest.py 프로젝트: aflaxman/gbd
def obs(pi=pi, phi=phi):
    logp = pl.log(1-phi)*num_nonzeros + mc.binomial_like(r[nonzeros]*n[nonzeros], n[nonzeros], pi[nonzeros])
    for n_i in n[~nonzeros]:
        logp += pl.log(phi + (1-phi) * pl.exp(pl.log(1-pi[~nonzeros]) * n[~nonzeros])).sum()
    return logp