def pathway_prediction(landa, a_init, mu, gamma, eta, tau, observed_weight_vector, pathway_dict, record_samples=True): number_of_pathways = np.size(eta, 0) number_of_metabolites = np.size(eta, 1) myModel = pm.Model() with myModel: landa_value = pm.Beta('landa_value', alpha=1, beta=1) # define prior a = pm.Bernoulli('a', p=landa_value, shape=number_of_pathways) # 1 x p # define posterior: p (w|a) l = pm.math.dot(a, eta) # 1xf: number of pathways that can generate each metabolite f phi = 1 - tt.exp(tt.log(1 - mu) * l) # 1xf: p(m_j = 1| a) psi = 1 - tt.exp(tt.dot(tt.log(1 - (gamma * phi)), tau)) # 1xk: p(w_k=1 | a) w = pm.Bernoulli('w', p=psi, observed=observed_weight_vector, shape=observed_weight_vector.shape) start_point = {'landa_value': landa, 'a': a_init.astype(np.int32)} step1 = pm.Metropolis([landa_value]) step2 = pm.BinaryGibbsMetropolis([a]) trace = pm.sample(draws=1000, step=[step1, step2], start=start_point, random_seed=42) landa_value_samples_logodds = trace.get_values(trace.varnames[0], burn=100) landa_value_samples = logistic.pdf(landa_value_samples_logodds) pathways_samples = trace.get_values(trace.varnames[1], burn=100) mean_pathways_activity = np.mean(pathways_samples, axis=0) if record_samples: outdata_dir = os.environ['PUMA_OUTPUT_DATA'] pathway_prediction_output = os.path.join(outdata_dir, 'pathway_prediction_output.xlsx') mean_pathways_activity_in_samples = np.squeeze(mean_pathways_activity).reshape(1, -1) write_data(mean_pathways_activity_in_samples, pathway_prediction_output, sheetname="samples", header=pathway_dict["pathway"]) print("mean_pathways_activity_PUMA_detected:", list(mean_pathways_activity)) n_active_pathways = len( [pathway_activity for pathway_activity in np.mean(pathways_samples, axis=0) if pathway_activity >= 0.5]) print("number_active_pathways [PUMA detected]:", n_active_pathways) active_pathways_indices = np.nonzero(mean_pathways_activity >= 0.5)[0] active_pathways_ID = [pathway_dict["pathway"][index] for index in active_pathways_indices] print("active_pathways_PUMA_detected:", active_pathways_ID) not_active_pathways_indices = np.nonzero(mean_pathways_activity < 0.5)[0] not_active_pathways_ID = [pathway_dict["pathway"][index] for index in not_active_pathways_indices] print("not_active_pathways_PUMA_detected:", not_active_pathways_ID) return pathways_samples
def main(argv=None): niter = 10000 # 10000 tune = 5000 # 5000 model = pm.Model() with model: tv = [1] rain = pm.Bernoulli('rain', 0.2, shape=1, testval=tv) sprinkler_p = pm.Deterministic('sprinkler_p', pm.math.switch(rain, 0.01, 0.40)) sprinkler = pm.Bernoulli('sprinkler', sprinkler_p, shape=1, testval=tv) grass_wet_p = pm.Deterministic( 'grass_wet_p', pm.math.switch(rain, pm.math.switch(sprinkler, 0.99, 0.80), pm.math.switch(sprinkler, 0.90, 0.0))) grass_wet = pm.Bernoulli('grass_wet', grass_wet_p, observed=np.array([1]), shape=1) trace = pm.sample(20000, step=[pm.BinaryGibbsMetropolis([rain, sprinkler])], tune=tune, random_seed=124) # pm.traceplot(trace) dictionary = { 'Rain': [1 if ii[0] else 0 for ii in trace['rain'].tolist()], 'Sprinkler': [1 if ii[0] else 0 for ii in trace['sprinkler'].tolist()], 'Sprinkler Probability': [ii[0] for ii in trace['sprinkler_p'].tolist()], 'Grass Wet Probability': [ii[0] for ii in trace['grass_wet_p'].tolist()], } df = pd.DataFrame(dictionary) p_rain = df[(df['Rain'] == 1)].shape[0] / df.shape[0] print(p_rain) p_sprinkler = df[(df['Sprinkler'] == 1)].shape[0] / df.shape[0] print(p_sprinkler)
states=states1, observed = dataset[4]) states2 = HMMStatesN('states2',P=P,PA=PA, shape=len(dataset[205])) emission2 = HMMGaussianEmissions('emission2', A1=A1, A2=A1, S1=S1, S2=S2, states=states2, observed = dataset[205]) start = pm.find_MAP(fmin=optimize.fmin_powell) step1 = pm.Metropolis(vars=[P, PA, A1, A2, S1, S2, emission1,emission2]) step2 = pm.BinaryGibbsMetropolis(vars=[states1,states2]) trace = pm.sample(10000, start=start, step=[step1, step2]) pm.traceplot(trace) pm.summary(trace[500:]) sample1_avg=np.average(trace['states1'][500:],axis=0) sample2_avg=np.average(trace['states2'][500:],axis=0) plt.figure() plt.plot(dataset[4]) plt.plot((sample1_avg)*0.6) plt.figure() plt.plot(dataset[205]) plt.plot((sample2_avg)*0.6)
observed=masked_values(mother_hs, value=-999)) s = pm.HalfCauchy("s", 5.0, testval=5) beta = pm.Laplace("beta", 0.0, 100.0, shape=7, testval=0.1) expected_score = (beta[0] + beta[1] * male + beta[2] * siblings_imp + beta[3] * disability_imp + beta[4] * age + beta[5] * mother_imp + beta[6] * early_ident) observed_score = pm.Normal("observed_score", expected_score, s, observed=score) with model: start = pm.find_MAP() step1 = pm.NUTS([beta, s, p_disab, p_mother, sib_mean], scaling=start) step2 = pm.BinaryGibbsMetropolis( [mother_imp.missing_values, disability_imp.missing_values]) def run(n=5000): if n == "short": n = 100 with model: pm.sample(n, step=[step1, step2], start=start) if __name__ == "__main__": run()
def mixture_model_boolean_vnm( data_2d, N, # noqa: N803 M, std, lam_backg, nsteps, nchains ): """Define the mixture model and sample from it. This version of the model was contributed by V N Manoharan Parameters ---------- data_2d : ndarray of floats 2D intensity distribution of the collected light N : integer number of lattice sites along one axis M : integer number of pixels per lattice site along one axis std : float Gaussian width of the point spread function lam_backg: integer Expected value of the Poissonian background nsteps : integer number of steps taken by each walker in the pymc3 sampling nchains : integer number of walkers in the pymc3 sampling Returns ------- traces : pymc3 MultiTrace An object that contains the samples. df : dataframe Samples converted into a dataframe object """ # x-pixel locations for one lattice site x = np.arange(-M/2, M/2) # X, Y meshgrid of pixel locations X, Y = np.meshgrid(x, x) # noqa: N806 # in future gen instead of passing N, use # opticalLatticeShape = tuple((np.array(pixel_grid.shape)/M).astype(int)) with pm.Model() as mixture_model: # noqa: F841 # Prior # Use an informative prior for P based on what # you would know in a real experiment. # A Uniform(0,1) prior causes severe problems # and probably doesn't represent your # true state of knowledge prior to the experiment. # I use a Gamma distribution (rather than a Normal) # so that P stays positive and the sampler doesn't diverge. # You can adjust the width to match what you would # know in a typical experiment. P = pm.Gamma('P', mu=0.5, sd=0.05) # noqa: N806 q = pm.Bernoulli('q', p=P, shape=(N, N), testval=np.ones((N, N))) # Here again you need more informative priors. # Previously these were Uniform, with limits determined by the data. # But priors should not be based on the data. # They should be based on what you know prior to to experiment. # I use a Gamma distribution for both # because that constrains the values to be positive. # Adjust mu and sd to match what you # would know before a typical experiment. aa = pm.Gamma('Aa', mu=3, sd=0.5) ab = pm.Gamma('Ab', mu=0.5, sd=0.1) # Again, replaced Uniform priors by Gamma priors. # Adjust mu and sd to match what you # would know before a typical experiment sigma_a = pm.Gamma('sigma_a', mu=1, sd=0.1) sigma_b = pm.Gamma('sigma_b', mu=1, sd=0.1) # Replaced Normal by Gamma distribution to keep atom_std positive # atom_std = pm.Normal('std', mu = std, sd = 0.2) atom_std = pm.Gamma('std', mu=std, sd=0.1) # Removed atom_back as a parameter and # assumed background in presence of atom is the # same as that without the atom. # If you want to keep this, don't use a Uniform prior. # atom_back = pm.Uniform('A_back', lower=0, upper=20) # Model (gaussian + uniform) single_background = ab * np.ones((M, M)) # Replaced background with Ab rather than atom_back. single_atom = aa * np.exp( -((X - 0)**2 + (Y - 0)**2) / (2 * atom_std**2)) \ + Ab * np.ones((M, M) # noqa: F821 ) # noqa: E124 atom = tt.slinalg.kron(q, single_atom) background = tt.slinalg.kron(1-q, single_background) # Log-likelihood good_data = pm.Normal.dist(mu=atom, sd=sigma_a).logp(data_2d) bad_data = pm.Normal.dist(mu=background, sd=sigma_b).logp(data_2d) log_like = good_data + bad_data # Here I added a binomial log-likelihood term. # I used the normal approximation to the # binomial (please check my math). # This term accounts for deviations from the expected # occupancy fraction. If the mean of the q_i are # signficantly different from P, the # configuration is penalized. # This is why you shouldn't put a uniform prior on P. log_add = pm.Normal.dist(mu=P, tau=N*N/(P*(1-P))).logp(q.mean()) pm.Potential('logp', log_like.sum() + log_add) # Sample # We'll explicitly set the two sampling steps # (rather than let pymc3 do it for us), so that # we can tune each step. # We use binary Gibbs Metropolis for the q and NUTS for everything # else. Note that if you add a variable to the model, # you should explicitly add it to the # sampling step below. steps = [ # noqa: F841 pm.BinaryGibbsMetropolis([q], transit_p=0.8), pm.NUTS( [atom_std, sigma_b, sigma_a, Ab, Aa, P], # noqa: F821 target_accept=0.8 ) ] # Sample # sample from the log-likelihood traces = pm.sample(tune=nsteps, draws=nsteps, chains=nchains) # convert the PymC3 traces into a dataframe df = pm.trace_to_dataframe(traces) return traces, df
sym = pm.Bernoulli('sym', sym_p, shape=1) ### If dis_a is true and dis_b is true, probability of test a = 0.97 ### If dis_a is true and dis_b is false, probabiliy of test a = 0.85 ### If dis_a is false and dis_b is true, probability of disease a = 0.2 ### IF dis_a is false and dis_b is false, probability of of disease a = 0.08 test_a_p = pm.Deterministic( 'test_a_p', pm.math.switch(dis_a, pm.math.switch(dis_b, 0.97, 0.85), pm.math.switch(dis_b, 0.2, 0.08))) test_a = pm.Bernoulli('test_a', test_a_p, shape=1) # Starts MCMC trace = pm.sample(niter, step=[ pm.BinaryGibbsMetropolis( [exposure, risk, dis_b, dis_a, sym, test_a]) ], tune=tune, random_seed=123) pm.summary(trace) # Prints MCMC statistics # Extract info from trace data structure into dictionary results_dict = { 'Exposure': [1 if ii[0] else 0 for ii in trace['exposure'].tolist()], 'Risk Factors': [1 if ii[0] else 0 for ii in trace['risk'].tolist()], 'Disease A Prob': [ii[0] for ii in trace['dis_a_p'].tolist()], 'Disease A': [1 if ii[0] else 0 for ii in trace['dis_a'].tolist()], 'Disease B Prob': [ii[0] for ii in trace['dis_b_p'].tolist()], 'Disease B': [1 if ii[0] else 0 for ii in trace['dis_b'].tolist()], 'Sym Prob': [ii[0] for ii in trace['sym_p'].tolist()],