Beispiel #1
0
def pathway_prediction(landa, a_init, mu, gamma, eta, tau, observed_weight_vector, pathway_dict,
                       record_samples=True):
    number_of_pathways = np.size(eta, 0)
    number_of_metabolites = np.size(eta, 1)
    myModel = pm.Model()
    with myModel:

        landa_value = pm.Beta('landa_value', alpha=1, beta=1)
        # define prior
        a = pm.Bernoulli('a', p=landa_value, shape=number_of_pathways)  # 1 x p
        # define posterior:  p (w|a)
        l = pm.math.dot(a, eta)  # 1xf: number of pathways that can generate each metabolite f
        phi = 1 - tt.exp(tt.log(1 - mu) * l)  # 1xf: p(m_j = 1| a)
        psi = 1 - tt.exp(tt.dot(tt.log(1 - (gamma * phi)), tau))  # 1xk: p(w_k=1 | a)
        w = pm.Bernoulli('w', p=psi, observed=observed_weight_vector, shape=observed_weight_vector.shape)

        start_point = {'landa_value': landa, 'a': a_init.astype(np.int32)}
        step1 = pm.Metropolis([landa_value])
        step2 = pm.BinaryGibbsMetropolis([a])
        trace = pm.sample(draws=1000, step=[step1, step2], start=start_point, random_seed=42)

    landa_value_samples_logodds = trace.get_values(trace.varnames[0], burn=100)
    landa_value_samples = logistic.pdf(landa_value_samples_logodds)
    pathways_samples = trace.get_values(trace.varnames[1],  burn=100)

    mean_pathways_activity = np.mean(pathways_samples, axis=0)
    if record_samples:
        outdata_dir = os.environ['PUMA_OUTPUT_DATA']
        pathway_prediction_output = os.path.join(outdata_dir, 'pathway_prediction_output.xlsx')
        mean_pathways_activity_in_samples = np.squeeze(mean_pathways_activity).reshape(1, -1)
        write_data(mean_pathways_activity_in_samples, pathway_prediction_output, sheetname="samples",
                   header=pathway_dict["pathway"])

    print("mean_pathways_activity_PUMA_detected:", list(mean_pathways_activity))
    n_active_pathways = len(
        [pathway_activity for pathway_activity in np.mean(pathways_samples, axis=0) if pathway_activity >= 0.5])
    print("number_active_pathways [PUMA detected]:", n_active_pathways)
    active_pathways_indices = np.nonzero(mean_pathways_activity >= 0.5)[0]
    active_pathways_ID = [pathway_dict["pathway"][index] for index in active_pathways_indices]
    print("active_pathways_PUMA_detected:", active_pathways_ID)
    not_active_pathways_indices = np.nonzero(mean_pathways_activity < 0.5)[0]
    not_active_pathways_ID = [pathway_dict["pathway"][index] for index in not_active_pathways_indices]
    print("not_active_pathways_PUMA_detected:", not_active_pathways_ID)
    return pathways_samples
Beispiel #2
0
def main(argv=None):
    niter = 10000  # 10000
    tune = 5000  # 5000

    model = pm.Model()

    with model:
        tv = [1]
        rain = pm.Bernoulli('rain', 0.2, shape=1, testval=tv)
        sprinkler_p = pm.Deterministic('sprinkler_p',
                                       pm.math.switch(rain, 0.01, 0.40))
        sprinkler = pm.Bernoulli('sprinkler', sprinkler_p, shape=1, testval=tv)
        grass_wet_p = pm.Deterministic(
            'grass_wet_p',
            pm.math.switch(rain, pm.math.switch(sprinkler, 0.99, 0.80),
                           pm.math.switch(sprinkler, 0.90, 0.0)))
        grass_wet = pm.Bernoulli('grass_wet',
                                 grass_wet_p,
                                 observed=np.array([1]),
                                 shape=1)

        trace = pm.sample(20000,
                          step=[pm.BinaryGibbsMetropolis([rain, sprinkler])],
                          tune=tune,
                          random_seed=124)

    # pm.traceplot(trace)

    dictionary = {
        'Rain': [1 if ii[0] else 0 for ii in trace['rain'].tolist()],
        'Sprinkler': [1 if ii[0] else 0 for ii in trace['sprinkler'].tolist()],
        'Sprinkler Probability':
        [ii[0] for ii in trace['sprinkler_p'].tolist()],
        'Grass Wet Probability':
        [ii[0] for ii in trace['grass_wet_p'].tolist()],
    }
    df = pd.DataFrame(dictionary)

    p_rain = df[(df['Rain'] == 1)].shape[0] / df.shape[0]
    print(p_rain)

    p_sprinkler = df[(df['Sprinkler'] == 1)].shape[0] / df.shape[0]
    print(p_sprinkler)
Beispiel #3
0
                                    states=states1,
                                    observed = dataset[4])
    
    states2 = HMMStatesN('states2',P=P,PA=PA, shape=len(dataset[205]))
    
    emission2 = HMMGaussianEmissions('emission2',
                                    A1=A1,
                                    A2=A1,
                                    S1=S1,
                                    S2=S2,
                                    states=states2,
                                    observed = dataset[205])

    start = pm.find_MAP(fmin=optimize.fmin_powell)
    step1 = pm.Metropolis(vars=[P, PA, A1, A2, S1, S2, emission1,emission2])
    step2 = pm.BinaryGibbsMetropolis(vars=[states1,states2])
    trace = pm.sample(10000, start=start, step=[step1, step2])
	
pm.traceplot(trace)
pm.summary(trace[500:])

sample1_avg=np.average(trace['states1'][500:],axis=0)
sample2_avg=np.average(trace['states2'][500:],axis=0)

plt.figure()
plt.plot(dataset[4])
plt.plot((sample1_avg)*0.6)

plt.figure()
plt.plot(dataset[205])
plt.plot((sample2_avg)*0.6)
Beispiel #4
0
                              observed=masked_values(mother_hs, value=-999))

    s = pm.HalfCauchy("s", 5.0, testval=5)
    beta = pm.Laplace("beta", 0.0, 100.0, shape=7, testval=0.1)

    expected_score = (beta[0] + beta[1] * male + beta[2] * siblings_imp +
                      beta[3] * disability_imp + beta[4] * age +
                      beta[5] * mother_imp + beta[6] * early_ident)

    observed_score = pm.Normal("observed_score",
                               expected_score,
                               s,
                               observed=score)

with model:
    start = pm.find_MAP()
    step1 = pm.NUTS([beta, s, p_disab, p_mother, sib_mean], scaling=start)
    step2 = pm.BinaryGibbsMetropolis(
        [mother_imp.missing_values, disability_imp.missing_values])


def run(n=5000):
    if n == "short":
        n = 100
    with model:
        pm.sample(n, step=[step1, step2], start=start)


if __name__ == "__main__":
    run()
def mixture_model_boolean_vnm(
        data_2d,
        N,  # noqa: N803
        M,
        std,
        lam_backg,
        nsteps,
        nchains
):
    """Define the mixture model and sample from it.

    This version of the model was contributed by
    V N Manoharan

    Parameters
    ----------
    data_2d : ndarray of floats
        2D intensity distribution of the collected light
    N : integer
        number of lattice sites along one axis
    M : integer
        number of pixels per lattice site along one axis
    std : float
        Gaussian width of the point spread function
    lam_backg: integer
        Expected value of the Poissonian background
    nsteps : integer
        number of steps taken by each walker in the pymc3 sampling
    nchains : integer
        number of walkers in the pymc3 sampling

    Returns
    -------
    traces : pymc3 MultiTrace
        An object that contains the samples.
    df : dataframe
        Samples converted into a dataframe object

    """
    # x-pixel locations for one lattice site
    x = np.arange(-M/2, M/2)
    # X, Y meshgrid of pixel locations
    X, Y = np.meshgrid(x, x)  # noqa: N806

    # in future gen instead of passing N, use
    # opticalLatticeShape = tuple((np.array(pixel_grid.shape)/M).astype(int))

    with pm.Model() as mixture_model:  # noqa: F841

        # Prior
        # Use an informative prior for P based on what
        # you would know in a real experiment.
        # A Uniform(0,1) prior causes severe problems
        # and probably doesn't represent your
        # true state of knowledge prior to the experiment.
        # I use a Gamma distribution (rather than a Normal)
        # so that P stays positive and the sampler doesn't diverge.
        # You can adjust the width to match what you would
        # know in a typical experiment.

        P = pm.Gamma('P', mu=0.5, sd=0.05)  # noqa: N806
        q = pm.Bernoulli('q', p=P, shape=(N, N), testval=np.ones((N, N)))

        # Here again you need more informative priors.
        # Previously these were Uniform, with limits determined by the data.
        # But priors should not be based on the data.
        # They should be based on what you know prior to to experiment.
        # I use a Gamma distribution for both
        # because that constrains the values to be positive.
        # Adjust mu and sd to match what you
        # would know before a typical experiment.
        aa = pm.Gamma('Aa', mu=3, sd=0.5)
        ab = pm.Gamma('Ab', mu=0.5, sd=0.1)

        # Again, replaced Uniform priors by Gamma priors.
        # Adjust mu and sd to match what you
        # would know before a typical experiment
        sigma_a = pm.Gamma('sigma_a', mu=1, sd=0.1)
        sigma_b = pm.Gamma('sigma_b', mu=1, sd=0.1)

        # Replaced Normal by Gamma distribution to keep atom_std positive
        # atom_std = pm.Normal('std', mu = std, sd = 0.2)
        atom_std = pm.Gamma('std', mu=std, sd=0.1)

        # Removed atom_back as a parameter and
        # assumed background in presence of atom is the
        # same as that without the atom.
        # If you want to keep this, don't use a Uniform prior.
        # atom_back = pm.Uniform('A_back', lower=0, upper=20)

        # Model (gaussian + uniform)
        single_background = ab * np.ones((M, M))
        # Replaced background with Ab rather than atom_back.
        single_atom = aa * np.exp(
            -((X - 0)**2 + (Y - 0)**2) / (2 * atom_std**2)) \
            + Ab * np.ones((M,  M)  # noqa: F821
        )  # noqa: E124

        atom = tt.slinalg.kron(q, single_atom)
        background = tt.slinalg.kron(1-q, single_background)
        # Log-likelihood
        good_data = pm.Normal.dist(mu=atom, sd=sigma_a).logp(data_2d)
        bad_data = pm.Normal.dist(mu=background, sd=sigma_b).logp(data_2d)
        log_like = good_data + bad_data

        # Here I added a binomial log-likelihood term.
        # I used the normal approximation to the
        # binomial (please check my math).
        # This term accounts for deviations from the expected
        # occupancy fraction. If the mean of the q_i are
        # signficantly different from P, the
        # configuration is penalized.
        # This is why you shouldn't put a uniform prior on P.

        log_add = pm.Normal.dist(mu=P, tau=N*N/(P*(1-P))).logp(q.mean())
        pm.Potential('logp', log_like.sum() + log_add)

        # Sample
        # We'll explicitly set the two sampling steps
        # (rather than let pymc3 do it for us), so that
        # we can tune each step.
        # We use binary Gibbs Metropolis for the q and NUTS for everything
        # else.  Note that if you add a variable to the model,
        # you should explicitly add it to the
        # sampling step below.
        steps = [  # noqa: F841
            pm.BinaryGibbsMetropolis([q], transit_p=0.8),
            pm.NUTS(
                [atom_std, sigma_b, sigma_a, Ab, Aa, P],  # noqa: F821
                target_accept=0.8
            )
        ]

        # Sample
        # sample from the log-likelihood
        traces = pm.sample(tune=nsteps, draws=nsteps, chains=nchains)

    # convert the PymC3 traces into a dataframe
    df = pm.trace_to_dataframe(traces)

    return traces, df
    sym = pm.Bernoulli('sym', sym_p, shape=1)

    ### If dis_a is true and dis_b is true, probability of test a = 0.97
    ### If dis_a is true and dis_b is false, probabiliy of test a = 0.85
    ### If dis_a is false and dis_b is true, probability of disease a = 0.2
    ### IF dis_a is false and dis_b is false, probability of of disease a = 0.08
    test_a_p = pm.Deterministic(
        'test_a_p',
        pm.math.switch(dis_a, pm.math.switch(dis_b, 0.97, 0.85),
                       pm.math.switch(dis_b, 0.2, 0.08)))
    test_a = pm.Bernoulli('test_a', test_a_p, shape=1)

    # Starts MCMC
    trace = pm.sample(niter,
                      step=[
                          pm.BinaryGibbsMetropolis(
                              [exposure, risk, dis_b, dis_a, sym, test_a])
                      ],
                      tune=tune,
                      random_seed=123)

pm.summary(trace)  # Prints MCMC statistics

# Extract info from trace data structure into dictionary
results_dict = {
    'Exposure': [1 if ii[0] else 0 for ii in trace['exposure'].tolist()],
    'Risk Factors': [1 if ii[0] else 0 for ii in trace['risk'].tolist()],
    'Disease A Prob': [ii[0] for ii in trace['dis_a_p'].tolist()],
    'Disease A': [1 if ii[0] else 0 for ii in trace['dis_a'].tolist()],
    'Disease B Prob': [ii[0] for ii in trace['dis_b_p'].tolist()],
    'Disease B': [1 if ii[0] else 0 for ii in trace['dis_b'].tolist()],
    'Sym Prob': [ii[0] for ii in trace['sym_p'].tolist()],