def _noise_variance(hyper_params, verbose=0): dist_std_noise = hyper_params.dist_std_noise if dist_std_noise == 'tr_normal': h1 = pm.HalfNormal('h1', tau=1.0) h2 = pm.HalfNormal('h2', tau=1.0) if 10 <= verbose: print('Truncated normal for prior scales') elif dist_std_noise == 'log_normal': h1 = pm.Lognormal('h1', tau=1.0) h2 = pm.Lognormal('h2', tau=1.0) if 10 <= verbose: print('Log normal for prior scales') elif dist_std_noise == 'uniform': h1 = pm.Uniform('h1', upper=1.0) h2 = pm.Uniform('h2', upper=1.0) if 10 <= verbose: print('Uniform for prior scales') else: raise ValueError("Invalid value of dist_std_noise: %s" % dist_std_noise) return h1, h2
def __init__(self, signal, err, init_period=None, predict_at=None, psd_at=None): super().__init__(signal, err, init_period) with pm.Model() as model: # The mean flux of the time series mean = pm.Normal("mean", mu=self.mean, sd=self.sigma) # A jitter term describing excess white noise log_jitter = pm.Normal("log_jitter", mu=np.log(self.jitter), sd=2.0) # The parameters of the BrownianTerm kernel sigma = pm.Lognormal("sigma", mu=np.log(self.sigma), sd=2.0) period = pm.Lognormal( "period", mu=np.log(self.init_period), sd=self.sigma_period ) log_tau = pm.Uniform("log_tau", lower=0.0, upper=np.log(10)) tau = pm.math.exp(log_tau) * period mix = pm.Uniform("mix", lower=0.0, upper=0.5) Q = 0.01 sigma_1 = sigma * pm.math.sqrt(mix) f = pm.math.sqrt(1 - 4 * Q ** 2) w0 = 2 * Q / (tau * (1 - f)) S0 = (1 - mix) * sigma ** 2 / (0.5 * w0 * Q * (1 + 1 / f)) # Set up the Gaussian Process model kernel1 = celerite2.theano.terms.SHOTerm(sigma=sigma_1, tau=tau, rho=period) kernel2 = celerite2.theano.terms.SHOTerm(S0=S0, w0=w0, Q=Q) kernel = kernel1 + kernel2 gp = celerite2.theano.GaussianProcess(kernel, mean=mean) gp.compute(self.t, diag=self.err ** 2 + pm.math.exp(log_jitter), quiet=True) gp.marginal("obs", observed=self.y) if predict_at is not None: pm.Deterministic("pred", gp.predict(self.y, predict_at)) if psd_at is not None: pm.Deterministic("psd", kernel.get_psd(2 * np.pi * psd_at)) self.model = model
def __init__(self, signal, err, init_period=None, predict_at=None, psd_at=None): super().__init__(signal, err, init_period) with pm.Model() as model: # The mean flux of the time series mean = pm.Normal("mean", mu=self.mean, sd=self.sigma) # A jitter term describing excess white noise log_jitter = pm.Normal("log_jitter", mu=np.log(self.jitter), sd=2.0) # The parameters of the RotationTerm kernel sigma = pm.Lognormal("sigma", mu=np.log(self.sigma), sd=2.0) period = pm.Lognormal( "period", mu=np.log(self.init_period), sd=self.sigma_period ) Q0 = pm.Lognormal("Q0", mu=1.0, sd=5.0) dQ = pm.Lognormal("dQ", mu=2.0, sd=5.0) f = pm.Uniform("f", lower=0.0, upper=1.0) # Set up the Gaussian Process model kernel = celerite2.theano.terms.RotationTerm( sigma=sigma, period=period, Q0=Q0, dQ=dQ, f=f, ) gp = celerite2.theano.GaussianProcess(kernel, mean=mean) gp.compute(self.t, diag=self.err ** 2 + pm.math.exp(log_jitter), quiet=True) gp.marginal("obs", observed=self.y) if predict_at is not None: pm.Deterministic("pred", gp.predict(self.y, predict_at)) if psd_at is not None: pm.Deterministic("psd", kernel.get_psd(2 * np.pi * psd_at)) self.model = model
def construct_model(config, tree, sequence_dict): topology = TreeTopology(tree) sequence_dict_encoded = pylo.transform.encode_sequences(sequence_dict) pattern_dict, pattern_counts = pylo.transform.group_sequences( sequence_dict_encoded) pattern_counts = tt.as_tensor_variable(pattern_counts) child_patterns = tt.as_tensor_variable( topology.build_sequence_table(pattern_dict)) def get_lognormal_params(var): return { 'mu': config['prior_params'][var]['m'], 'sd': config['prior_params'][var]['s'] } with pm.Model() as model: pop_size = pm.Lognormal('pop_size', **get_lognormal_params('pop_size')) pop_func = ConstantPopulationFunction(topology, pop_size) tree_heights = CoalescentTree('tree', topology, pop_func) kappa = pm.Lognormal('kappa', **get_lognormal_params('kappa')) pi = pm.Dirichlet('pi', a=np.ones(4)) substitution_model = HKYSubstitutionModel(kappa, pi) branch_lengths = topology.get_child_branch_lengths(tree_heights) sequences = LeafSequences('sequences', topology, substitution_model, branch_lengths, child_patterns, pattern_counts) return model
def test_scalar_ode_1_param(self): """Test running model for a scalar ODE with 1 parameter""" def system(y, t, p): return np.exp(-t) - p[0] * y[0] times = np.array([ 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5 ]) yobs = np.array([ 0.31, 0.57, 0.51, 0.55, 0.47, 0.42, 0.38, 0.3, 0.26, 0.22, 0.22, 0.14, 0.14, 0.09, 0.1 ])[:, np.newaxis] ode_model = DifferentialEquation(func=system, t0=0, times=times, n_states=1, n_theta=1) with pm.Model() as model: alpha = pm.HalfCauchy("alpha", 1) y0 = pm.Lognormal("y0", 0, 1) sigma = pm.HalfCauchy("sigma", 1) forward = ode_model(theta=[alpha], y0=[y0]) y = pm.Lognormal("y", mu=pm.math.log(forward), sd=sigma, observed=yobs) trace = pm.sample(100, tune=0, chains=1) assert trace["alpha"].size > 0 assert trace["y0"].size > 0 assert trace["sigma"].size > 0
def v2_model(observations, nulls, null_sd, null_b, null_dispersed_prob, iter_count=2000, tune_iters=2000): with pm.Model() as model: # Probability of being a DE gene de_prob = pm.Beta('de_prob', alpha=1., beta=5.) # Probability of being downregulated down_prob = pm.Beta('down_prob', alpha=1., beta=1.) dispersed_prob = null_dispersed_prob mu_pos = pm.Lognormal('mu_pos', mu=-3, sd=1.) mu_neg = pm.Lognormal('mu_neg', mu=-3, sd=1.) sd_pos = pm.Gamma('sd_pos', alpha=0.01, beta=1.) sd_neg = pm.Gamma('sd_neg', alpha=0.01, beta=1.) nu_pos = pm.Gamma('nu_pos', alpha=5., beta=1.) nu_neg = pm.Gamma('nu_neg', alpha=5., beta=1.) spike_component = pm.Normal.dist(mu=0., sd=null_sd) slab_component = pm.Laplace.dist(mu=0., b=null_b) # Sample from Gaussian-Laplace mixture for null (spike-and-slab mixture) pm.Mixture('null', comp_dists=[spike_component, slab_component], w=tt.as_tensor([1. - dispersed_prob, dispersed_prob]), observed=nulls) pos_component = pm.Bound(pm.StudentT, lower=0.).dist(mu=mu_pos, sd=sd_pos, nu=nu_pos) neg_component = pm.Bound(pm.StudentT, upper=0.).dist(mu=-mu_neg, sd=sd_neg, nu=nu_neg) pm.Mixture('obs', w=tt.as_tensor([(1. - de_prob) * (1. - dispersed_prob), (1. - de_prob) * dispersed_prob, de_prob * (1. - down_prob), de_prob * down_prob]), comp_dists=[ spike_component, slab_component, pos_component, neg_component ], observed=observations) pm.Deterministic('log_prob', model.logpt) for RV in model.basic_RVs: print(RV.name, RV.logp(model.test_point)) trace = pm.sample(iter_count, tune=tune_iters, chains=4) ppc = pm.sample_ppc(trace, samples=iter_count, model=model) return ({'trace': trace, 'ppc': ppc})
def build(self): """The PyMC model that incorporates Bayesian Statistics in order to store what the likelihood of the model is for a given point.""" M = pm.Model() with M: kfwd, endo, activeEndo, kRec, kDeg, sortF = commonTraf( trafficking=self.traf) rxnrates = pm.Lognormal( "rxn", sigma=0.5, shape=6) # 6 reverse rxn rates for IL2/IL15 nullRates = T.ones( 4, dtype=np.float64) # k27rev, k31rev, k33rev, k35rev Rexpr_2Ra = pm.Lognormal("Rexpr_2Ra", sigma=0.5, shape=1) # Expression: IL2Ra Rexpr_2Rb = pm.Lognormal("Rexpr_2Rb", sigma=0.5, shape=1) # Expression: IL2Rb Rexpr_15Ra = pm.Lognormal("Rexpr_15Ra", sigma=0.5, shape=1) # Expression: IL15Ra Rexpr_gc = pm.Lognormal("Rexpr_gc", sigma=0.5, shape=1) # Expression: gamma chain unkVec = T.concatenate( (kfwd, rxnrates, nullRates, endo, activeEndo, sortF, kRec, kDeg, Rexpr_2Ra, Rexpr_2Rb, Rexpr_gc, Rexpr_15Ra, nullRates * 0.0)) Y_15 = self.dst15.calc( unkVec ) # fitting the data based on dst15.calc for the given parameters sd_15 = T.minimum( T.std(Y_15), 0.03 ) # Add bounds for the stderr to help force the fitting solution pm.Deterministic("Y_15", T.sum(T.square(Y_15))) pm.Normal("fitD_15", sigma=sd_15, observed=Y_15) # experimental-derived stderr is used if self.traf: Y_int = self.IL2Rb.calc( unkVec) # fitting the data based on IL2Rb surface data sd_int = T.minimum( T.std(Y_int), 0.02 ) # Add bounds for the stderr to help force the fitting solution pm.Deterministic("Y_int", T.sum(T.square(Y_int))) pm.Normal("fitD_int", sigma=sd_int, observed=Y_int) Y_gc = self.gc.calc( unkVec) # fitting the data using IL2Ra- cells sd_gc = T.minimum( T.std(Y_gc), 0.02 ) # Add bounds for the stderr to help force the fitting solution pm.Deterministic("Y_gc", T.sum(T.square(Y_gc))) pm.Normal("fitD_gc", sigma=sd_gc, observed=Y_gc) # Save likelihood pm.Deterministic("logp", M.logpt) return M
def build_model(X1, X2, timeV, conv0=0.1, confl=None, apop=None, dna=None): """ Builds then returns the PyMC model. """ assert X1.shape == X2.shape M = pm.Model() with M: conversions = conversionPriors(conv0) d, apopfrac = deathPriors(1) # parameters for drug 1, 2; assumed to be the same for both phenotypes hill = pm.Lognormal("hill", shape=2) IC50 = pm.Lognormal("IC50", shape=2) EmaxGrowth = pm.Beta("EmaxGrowth", 1.0, 1.0, shape=2) EmaxDeath = pm.Lognormal("EmaxDeath", -2.0, 0.5, shape=2) # E_con values; first death then growth GrowthCon = pm.Lognormal("GrowthCon", np.log10(0.03), 0.1) # Calculate the death rate death_rates = blissInteract(X1, X2, hill, IC50, EmaxDeath, justAdd=True) # pylint: disable=unsubscriptable-object # Calculate the growth rate growth_rates = GrowthCon * (1 - blissInteract(X1, X2, hill, IC50, EmaxGrowth)) # pylint: disable=unsubscriptable-object pm.Deterministic("EmaxGrowthEffect", GrowthCon * EmaxGrowth) # Test the dimension of growth_rates growth_rates = T.opt.Assert("growth_rates did not match X1 size")(growth_rates, T.eq(growth_rates.size, X1.size)) lnum, eap, deadapop, deadnec = theanoCore(timeV, growth_rates, death_rates, apopfrac, d) # Test the size of lnum lnum = T.opt.Assert("lnum did not match X1*timeV size")(lnum, T.eq(lnum.size, X1.size * timeV.size)) confl_exp, apop_exp, dna_exp = convSignal(lnum, eap, deadapop, deadnec, conversions) # Compare to experimental observation if confl is not None: confl_obs = T.flatten(confl_exp - confl) pm.Normal("confl_fit", sd=T.std(confl_obs), observed=confl_obs) conflmean = T.mean(confl, axis=1) confl_exp_mean = T.mean(confl_exp, axis=1) pm.Deterministic("conflResid", (confl_exp_mean - conflmean) / conflmean[0]) if apop is not None: apop_obs = T.flatten(apop_exp - apop) pm.Normal("apop_fit", sd=T.std(apop_obs), observed=apop_obs) if dna is not None: dna_obs = T.flatten(dna_exp - dna) pm.Normal("dna_fit", sd=T.std(dna_obs), observed=dna_obs) return M
def commonTraf(trafficking=True): """ Set the common trafficking parameter priors. """ kfwd = pm.Lognormal("kfwd", mu=np.log(0.001), sigma=0.5, shape=1) if trafficking: endo = pm.Lognormal("endo", mu=np.log(0.1), sigma=0.1, shape=1) activeEndo = pm.Lognormal("activeEndo", sigma=0.1, shape=1) kRec = pm.Lognormal("kRec", mu=np.log(0.1), sigma=0.1, shape=1) kDeg = pm.Lognormal("kDeg", mu=np.log(0.01), sigma=0.2, shape=1) sortF = pm.Beta("sortF", alpha=12, beta=80, shape=1) else: # Assigning trafficking to zero to fit without trafficking endo = activeEndo = kRec = kDeg = T.zeros(1, dtype=np.float64) sortF = T.ones(1, dtype=np.float64) * 0.5 return kfwd, endo, activeEndo, kRec, kDeg, sortF
def fit(self, X, B, T): n, k = X.shape with pymc3.Model() as m: beta_sd = pymc3.Exponential( 'beta_sd', 1.0) # Weak prior for the regression coefficients beta = pymc3.Normal('beta', mu=0, sd=beta_sd, shape=(k, )) # Regression coefficients c = sigmoid(dot(X, beta)) # Conversion rates for each example k = pymc3.Lognormal('k', mu=0, sd=1.0) # Weak prior around k=1 lambd = pymc3.Exponential('lambd', 0.1) # Weak prior # PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k) LL_observed = log(c) + log(k) + log( lambd) + (k - 1) * (log(T) + log(lambd)) - (T * lambd)**k # CDF of Weibull: 1 - exp(-(t * lambda)^k) LL_censored = log((1 - c) + c * exp(-(T * lambd)**k)) # We need to implement the likelihood using pymc3.Potential (custom likelihood) # https://github.com/pymc-devs/pymc3/issues/826 logp = B * LL_observed + (1 - B) * LL_censored logpvar = pymc3.Potential('logpvar', logp.sum()) self.trace = pymc3.sample(n_simulations=500, tune=500, discard_tuned_samples=True, njobs=1) print('done') print('done 2')
def track_1var_2par_ode_ess(self): def freefall(y, t, p): return 2.0 * p[1] - p[0] * y[0] # Times for observation times = np.arange(0, 10, 0.5) y = np.array([ -2.01, 9.49, 15.58, 16.57, 27.58, 32.26, 35.13, 38.07, 37.36, 38.83, 44.86, 43.58, 44.59, 42.75, 46.9, 49.32, 44.06, 49.86, 46.48, 48.18 ]).reshape(-1, 1) ode_model = pm.ode.DifferentialEquation(func=freefall, times=times, n_states=1, n_theta=2, t0=0) with pm.Model() as model: # Specify prior distributions for some of our model parameters sigma = pm.HalfCauchy("sigma", 1) gamma = pm.Lognormal("gamma", 0, 1) # If we know one of the parameter values, we can simply pass the value. ode_solution = ode_model(y0=[0], theta=[gamma, 9.8]) # The ode_solution has a shape of (n_times, n_states) Y = pm.Normal("Y", mu=ode_solution, sd=sigma, observed=y) t0 = time.time() trace = pm.sample(500, tune=1000, chains=2, cores=2, random_seed=0) tot = time.time() - t0 ess = pm.ess(trace) return np.mean([ess.sigma, ess.gamma]) / tot
def create_model(change_points, params_model): with cov19.Cov19Model(**params_model) as model: lambda_t_log = cov19.lambda_t_with_sigmoids( pr_median_lambda_0=0.4, pr_sigma_lambda_0=0.5, change_points_list=change_points, ) mu = pm.Lognormal(name="mu", mu=np.log(1 / 8), sigma=0.2) pr_median_delay = 10 prior_I = cov19.make_prior_I(lambda_t_log, mu, pr_median_delay=pr_median_delay) new_I_t = cov19.SIR(lambda_t_log, mu, pr_I_begin=prior_I) new_cases_inferred_raw = cov19.delay_cases( new_I_t, pr_median_delay=pr_median_delay, pr_median_scale_delay=0.3) new_cases_inferred = cov19.week_modulation(new_cases_inferred_raw) cov19.student_t_likelihood(new_cases_inferred) return model
def create_model(change_points, params_model): with cov19.Cov19Model(**params_model) as model: lambda_t_log = cov19.model.lambda_t_with_sigmoids( pr_median_lambda_0=0.4, pr_sigma_lambda_0=0.5, change_points_list=change_points, name_lambda_t="lambda_t", ) mu = pm.Lognormal(name="mu", mu=np.log(1 / 8), sigma=0.2) pr_median_delay = 10 prior_I = cov19.model.uncorrelated_prior_I( lambda_t_log=lambda_t_log, mu=mu, pr_median_delay=pr_median_delay) new_I_t = cov19.model.SIR(lambda_t_log=lambda_t_log, mu=mu, pr_I_begin=prior_I) new_cases = cov19.model.delay_cases( cases=new_I_t, pr_mean_of_median=pr_median_delay, name_cases="delayed_cases", ) new_cases = cov19.model.week_modulation(new_cases, name_cases="new_cases") cov19.model.student_t_likelihood(new_cases) return model
def test_sample_find_MAP_does_not_modify_start(): # see https://github.com/pymc-devs/pymc3/pull/4458 with pm.Model(): pm.Lognormal("untransformed") # make sure find_Map does not modify the start dict start = {"untransformed": 2} pm.find_MAP(start=start) assert start == {"untransformed": 2} # make sure sample does not modify the start dict start = {"untransformed": 0.2} pm.sample(draws=10, step=pm.Metropolis(), tune=5, start=start, chains=3) assert start == {"untransformed": 0.2} # make sure sample does not modify the start when passes as list of dict start = [{"untransformed": 2}, {"untransformed": 0.2}] pm.sample(draws=10, step=pm.Metropolis(), tune=5, start=start, chains=2) assert start == [{"untransformed": 2}, {"untransformed": 0.2}]
def build_model(conv0, doses, timeV, expTable): """ Builds then returns the pyMC model. """ growth_model = pm.Model() with growth_model: conversions = conversionPriors(conv0) d, apopfrac = deathPriors(len(doses)) # Specify vectors of prior distributions # Growth rate div = pm.Uniform("div", lower=0.0, upper=0.035, shape=len(doses)) # Rate of entering apoptosis or skipping straight to death deathRate = pm.Lognormal("deathRate", np.log(0.001), 0.5, shape=len(doses)) lnum, eap, deadapop, deadnec = theanoCore(timeV, div, deathRate, apopfrac, d) # Convert model calculations to experimental measurement units confl_exp, apop_exp, dna_exp = convSignal(lnum, eap, deadapop, deadnec, conversions) # Observed error values for confl confl_obs = T.reshape(confl_exp, (-1,)) - expTable["confl"] pm.Normal("dataFit", sd=T.std(confl_obs), observed=confl_obs) # Observed error values for apop apop_obs = T.reshape(apop_exp, (-1,)) - expTable["apop"] pm.Normal("dataFita", sd=T.std(apop_obs), observed=apop_obs) # Observed error values for dna dna_obs = T.reshape(dna_exp, (-1,)) - expTable["dna"] pm.Normal("dataFitd", sd=T.std(dna_obs), observed=dna_obs) return growth_model
def make_model(spec: ModelSpec, dat, basis, unfold, aPosterior): with pm.Model() as model: # Prior for alpha if isinstance(spec.alphaPrior, float): alpha = spec.alphaPrior elif isinstance(spec.alphaPrior, AlphaLognormal): alphaLN = aPosterior.lognormal(scale=spec.alphaPrior.scale) alpha = pm.Lognormal('alpha', mu=alphaLN.mu, sd=alphaLN.sig) elif spec.alphaPrior is None: alpha = pm.HalfFlat('alpha') else: raise Exception("Unknown prior for alpha") # Prior for phi nPhi = len(basis) om = unfold.omegas[0].mat chol = np.linalg.cholesky(np.linalg.inv(om)) low = np.repeat(0, nPhi) if spec.phiPrior == "positive": phiDistr = pm.Bound(pm.MvNormal, lower=low) elif spec.phiPrior == "any": phiDistr = pm.MvNormal phi = phiDistr('phi', mu=np.zeros(nPhi), chol=chol / np.sqrt(alpha), shape=nPhi) # Experimental data f = pm.Normal( 'f', mu=pm.math.dot(unfold.K, phi), sd=dat['err'].values, shape=len(dat), observed=dat['cnt'].values, ) return model
def _sample_pymc3(cls, dist, size): """Sample from PyMC3.""" import pymc3 pymc3_rv_map = { 'BetaDistribution': lambda dist: pymc3.Beta('X', alpha=float(dist.alpha), beta=float(dist.beta)), 'CauchyDistribution': lambda dist: pymc3.Cauchy('X', alpha=float(dist.x0), beta=float(dist.gamma)), 'ChiSquaredDistribution': lambda dist: pymc3.ChiSquared('X', nu=float(dist.k)), 'ExponentialDistribution': lambda dist: pymc3.Exponential('X', lam=float(dist.rate)), 'GammaDistribution': lambda dist: pymc3.Gamma('X', alpha=float(dist.k), beta=1/float(dist.theta)), 'LogNormalDistribution': lambda dist: pymc3.Lognormal('X', mu=float(dist.mean), sigma=float(dist.std)), 'NormalDistribution': lambda dist: pymc3.Normal('X', float(dist.mean), float(dist.std)), 'GaussianInverseDistribution': lambda dist: pymc3.Wald('X', mu=float(dist.mean), lam=float(dist.shape)), 'ParetoDistribution': lambda dist: pymc3.Pareto('X', alpha=float(dist.alpha), m=float(dist.xm)), 'UniformDistribution': lambda dist: pymc3.Uniform('X', lower=float(dist.left), upper=float(dist.right)) } dist_list = pymc3_rv_map.keys() if dist.__class__.__name__ not in dist_list: return None with pymc3.Model(): pymc3_rv_map[dist.__class__.__name__](dist) return pymc3.sample(size, chains=1, progressbar=False)[:]['X']
def main(): n = 4 mu1 = np.ones(n) * (1. / 2) with pm.Model() as ATMIP_test: X = pm.Uniform('X', shape=n, lower=-2. * np.ones_like(mu1), upper=2. * np.ones_like(mu1), testval=-1. * np.ones_like(mu1)) kd4 = pm.Lognormal('kd4', mu=np.log(0.3), sd=1) # k_d4 = pm.Lognormal('k_d4', mu=np.log(6E-3), sd=9) # kSOCSon = pm.Lognormal('kSOCSon', mu=np.log(1E-6), sd = 2) # kpa = pm.Lognormal('kpa', mu=np.log(1E-6), sd = 2) # R1 = pm.Uniform('R1', lower=900, upper=5000) # R2 = pm.Uniform('R2', lower=900, upper=5000) # gamma = pm.Uniform('gamma', lower=2, upper=30) llk = pm.Potential('llk', two_gaussians(X, kd4)) with ATMIP_test: trace = pm.sample(100, chains=50, step=pm.SMC()) plt.figure() pm.traceplot(trace) plt.savefig("mc_testing.pdf") s = pm.stats.summary(trace) s.to_csv('mcmc_parameter_summary.csv')
def create_model(change_points, params_model): with cov19.model.Cov19Model(**params_model) as model: # Create the an array of the time dependent infection rate lambda lambda_t_log = cov19.model.lambda_t_with_sigmoids( pr_median_lambda_0=0.4, pr_sigma_lambda_0=0.5, change_points_list= change_points, # The change point priors we constructed earlier name_lambda_t= "lambda_t", # Name for the variable in the trace (see later) ) # set prior distribution for the recovery rate mu = pm.Lognormal(name="mu", mu=np.log(1 / 8), sigma=0.01) # This builds a decorrelated prior for I_begin for faster inference. # It is not necessary to use it, one can simply remove it and use the default argument # for pr_I_begin in cov19.SIR prior_I = cov19.model.uncorrelated_prior_I( lambda_t_log=lambda_t_log, mu=mu, pr_median_delay=10, name_I_begin="I_begin", name_I_begin_ratio_log="I_begin_ratio_log", pr_sigma_I_begin=2, n_data_points_used=5, ) # Use lambda_t_log and mu to run the SIR model new_cases = cov19.model.SIR( lambda_t_log=lambda_t_log, mu=mu, name_new_I_t="new_I_t", name_I_t="I_t", name_I_begin="I_begin", pr_I_begin=prior_I, ) # Delay the cases by a lognormal reporting delay new_cases = cov19.model.delay_cases( cases=new_cases, name_cases="delayed_cases", name_delay="delay", name_width="delay-width", pr_mean_of_median=10, pr_sigma_of_median=0.2, pr_median_of_width=0.5, ) # Modulate the inferred cases by a abs(sin(x)) function, to account for weekend effects # Also adds the "new_cases" variable to the trace that has all model features. new_cases = cov19.model.week_modulation( cases=new_cases, name_cases="new_cases", ) # Define the likelihood, uses the new_cases_obs set as model parameter cov19.model.student_t_likelihood(new_cases) return model
def test_vector_ode_1_param(self): """Test running model for a vector ODE with 1 parameter""" def system(y, t, p): ds = -p[0] * y[0] * y[1] di = p[0] * y[0] * y[1] - y[1] return [ds, di] times = np.array( [0.0, 0.8, 1.6, 2.4, 3.2, 4.0, 4.8, 5.6, 6.4, 7.2, 8.0]) yobs = np.array([ [1.02, 0.02], [0.86, 0.12], [0.43, 0.37], [0.14, 0.42], [0.05, 0.43], [0.03, 0.14], [0.02, 0.08], [0.02, 0.04], [0.02, 0.01], [0.02, 0.01], [0.02, 0.01], ]) ode_model = DifferentialEquation(func=system, t0=0, times=times, n_states=2, n_odeparams=1) with pm.Model() as model: R = pm.Lognormal("R", 1, 5) sigma = pm.HalfCauchy("sigma", 1, shape=2) forward = ode_model(odeparams=[R], y0=[0.99, 0.01]).reshape(yobs.shape) y = pm.Lognormal("y", mu=pm.math.log(forward), sd=sigma, observed=yobs) trace = pm.sample(100, tune=0, chains=1) assert trace["R"].size > 0 assert trace["sigma"].size > 0
def deathPriors(numApop): """ Setup priors for cell death parameters. """ # Rate of moving from apoptosis to death, assumed invariant wrt. treatment d = pm.Lognormal("d", np.log(0.01), 0.5) # Fraction of dying cells that go through apoptosis apopfrac = pm.Beta("apopfrac", 1.0, 1.0, shape=numApop) return d, apopfrac
def nurse_model_p1(df): xo = df['pos'].values.copy() yo = df['Ns_size'].values.copy() ind, _ = pmu.multilabel_encoder(df, ['i_ind']) nind = len(np.unique(ind)) trt, _ = pmu.multilabel_encoder(df, ['i_treatday']) ntrt = len(np.unique(trt)) trt_in_ind = pmu.embeded_index(df, 'i_ind', 'i_treatday') with pm.Model() as ns_p1_mod: # single Vmax_n_mu = pm.Normal('Vmax_n_mu', mu=np.log(80), sd=0.15) Vmax_n_l = pm.Deterministic('Vmax_n_l', Vmax_n_mu * tt.ones(nind)) Vmax_n = pm.Deterministic('Vmax_n', tt.exp(Vmax_n_l)) r_n_mu = pm.Normal('r_n_mu', mu=np.log(3), sd=0.25) # location of grand mean r_n_sd = pm.HalfNormal('r_n_sd', sd=0.1) r_n_vn = pm.Normal('r_n_vn', mu=0, sd=1, shape=nind) r_n_l = pm.Deterministic('r_n_l', r_n_mu + r_n_sd * r_n_vn) r_n = pm.Deterministic('r_n', tt.exp(r_n_l)) t_n_mu = pm.Normal('t_n_mu', mu=3, sd=3) t_n_sd = pm.HalfNormal('t_n_sd', sd=3) t_n_vn = pm.Normal('t_n_vn', mu=0, sd=1, shape=nind) t_n = pm.Deterministic('t_n', t_n_mu + t_n_sd * t_n_vn) r_d_mu = pm.Normal('r_d_mu', mu=np.log(0.85), sd=0.20) # location of grand mean r_d_sd = pm.HalfNormal('r_d_sd', sd=0.25) r_d_vn = pm.Normal('r_d_vn', mu=0, sd=1, shape=nind) r_d_l = pm.Deterministic('r_d_l', r_d_mu + r_d_sd * r_d_vn) r_d = pm.Deterministic('r_d', tt.exp(r_d_l)) t_d_mu = pm.Normal('t_d_mu', mu=np.log(1), sd=0.25) t_d_sd = pm.HalfNormal('t_d_sd', sd=0.25) t_d_vn = pm.Normal('t_d_vn', mu=0, sd=1, shape=nind) t_d = pm.Deterministic('t_d', -2.50 + tt.exp(t_d_mu + t_d_sd * t_d_vn)) Vmin_n_mu = pm.Normal('Vmin_n_mu', mu=np.log(10), sd=0.25) Vmin_n = pm.Deterministic('Vmin_n', tt.exp(Vmin_n_mu) * tt.ones(nind)) nu_n = pm.Deterministic('nu_n', tt.ones(nind)) nu_d = pm.Deterministic('nu_d', tt.ones(nind)) f=bu.rise_and_fall_r( xo, Vmax_n[ind], t_n[ind], r_n[ind], nu_n[ind], \ t_d[ind], r_d[ind], nu_d[ind], Vmin_n[ind]) sdo_o = pm.Lognormal('sdo_o', mu=np.log(5.), sd=0.25) yobs = pm.Normal('yobs', f, sd=sdo_o, observed=yo) return ns_p1_mod
def graded_response_model(dataset, n_categories): """Defines the mcmc model for the graded response model. Args: dataset: [n_items, n_participants] 2d array of measured responses n_categories: number of polytomous values (i.e. Number of Likert Levels) Returns: model: PyMC3 model to run """ n_items, n_people = dataset.shape n_levels = n_categories - 1 # Need small deviation in offset to # fit into pymc framework mu_value = linspace(-0.1, 0.1, n_levels) # Run through 0, K - 1 observed = dataset - dataset.min() graded_mcmc_model = pm.Model() with graded_mcmc_model: # Ability Parameters ability = pm.Normal("Ability", mu=0, sigma=1, shape=n_people) # Discrimination multilevel prior rayleigh_scale = pm.Lognormal("Rayleigh_Scale", mu=0, sigma=1 / 4, shape=1) discrimination = pm.Bound(Rayleigh, lower=0.25)(name='Discrimination', beta=rayleigh_scale, offset=0.25, shape=n_items) # Threshold multilevel prior sigma_difficulty = pm.HalfNormal('Difficulty_SD', sigma=1, shape=1) for ndx in range(n_items): thresholds = pm.Normal( f"Thresholds{ndx}", mu=mu_value, sigma=sigma_difficulty, shape=n_levels, transform=pm.distributions.transforms.ordered) # Compute the log likelihood kernel = discrimination[ndx] * ability probabilities = pm.OrderedLogistic(f'Log_Likelihood{ndx}', cutpoints=thresholds, eta=kernel, observed=observed[ndx]) return graded_mcmc_model
def build(self): with pm.Model() as model: w = pm.Lognormal('lengthscale', 0, 4) h2 = pm.Lognormal('variance', 0, 4) # sigma = pm.Lognormal('sigma', 0, 4) p = pm.Lognormal('p', 5, 4) f_cov = h2 * pm.gp.cov.Periodic(1, period=p, ls=w) gp = pm.gp.Latent(cov_func=f_cov) f = gp.prior('f', X=self.X_train) s2 = pm.Lognormal('Gaussian_noise', -4, 4) y_ = pm.StudentT('y', mu=f, nu=s2, observed=self.Y_train) #start = pm.find_MAP() step = pm.Metropolis() db = pm.backends.Text('trace') trace = pm.sample(2000, step, chains=1, njobs=1)# start=start) pm.traceplot(trace, varnames=['lengthscale', 'variance', 'Gaussian_noise']) plt.show() return trace
def build(self): """The PyMC model that incorporates Bayesian Statistics in order to store what the likelihood of the model is for a given point.""" M = pm.Model() with M: kfwd, endo, activeEndo, kRec, kDeg, sortF = commonTraf() nullRates = T.ones( 6, dtype=np.float64) # associated with IL2 and IL15 Tone = T.ones(1, dtype=np.float64) k27rev = pm.Lognormal("k27rev", mu=np.log(0.1), sigma=1, shape=1) # associated with IL7 k33rev = pm.Lognormal("k33rev", mu=np.log(0.1), sigma=1, shape=1) # associated with IL4 # constant according to measured number per cell. gc, blank, IL7R, blank, IL4R Rexpr = (np.array([0.0, 0.0, 328.0, 0.0, 2591.0, 0.0, 254.0, 0.0]) * endo) / (1.0 + ((kRec * (1.0 - sortF)) / (kDeg * sortF))) # indexing same as in model.hpp unkVec = T.concatenate( (kfwd, nullRates, k27rev, Tone, k33rev, Tone, endo, activeEndo, sortF, kRec, kDeg, Rexpr)) self.act.calc( unkVec, M ) # fitting the data based on act.calc for the given parameters if self.pretreat is True: Y_cross = self.cross.calc( unkVec) # fitting the data based on cross.calc pm.Deterministic("Y_cross", T.sum(T.square(Y_cross))) sd_cross = T.minimum(T.std(Y_cross), 0.1) pm.Normal( "fitD_cross", sigma=sd_cross, observed=Y_cross) # the stderr is definitely less than 0.2 # Save likelihood pm.Deterministic("logp", M.logpt) return M
def build_model(self): """ Builds then returns the pyMC model. """ M = pm.Model() with M: # The three values here are div and deathrate # Assume just one IC50 for simplicity lIC50 = pm.Normal("IC50s", 2.0) Emin_growth = pm.Uniform("Emin_growth", lower=0.0, upper=self.Emax_growth) Emax_death = pm.Lognormal("Emax_death", -2.0, 2.0) # Import drug concentrations into theano vector drugCs = T._shared(self.drugCs) # Drug term since we're using constant IC50 and hill slope drugTerm = 1.0 / (1.0 + T.pow(10.0, (lIC50 - drugCs) * pm.Lognormal("hill"))) # Do actual conversion to parameters for each drug condition growthV = self.Emax_growth + (Emin_growth - self.Emax_growth) * drugTerm # Calculate the growth rate # _Assuming deathrate in the absence of drug is zero GR = growthV - Emax_death * drugTerm # Calculate the number of live cells lnum = T.exp(GR * self.time) # Normalize live cell data to control, as is similar to measurements # Residual between model prediction and measurement residual = self.lObs - (lnum / lnum[0]) pm.Normal("dataFitlnum", sd=T.std(residual), observed=residual) return M
def multidimensional_twopl_model(dataset, n_factors): """Defines the mcmc model for multidimensional 2PL logistic estimation. Args: dataset: [n_items, n_participants] 2d array of measured responses n_factors: (int) number of factors to extract Returns: model: PyMC3 model to run """ if n_factors < 2: raise AssertionError(f"Multidimensional 2PL model requires " f"two or more factors specified!") n_items, n_people = dataset.shape observed = dataset.astype('int') diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors) lower_length = lower_indices[0].shape[0] twopl_pymc_model = pm.Model() with twopl_pymc_model: # Ability Parameters (Standardized Normal) ability = pm.Normal("Ability", mu=0, sigma=1, shape=(n_factors, n_people)) # Difficuly multilevel prior sigma_difficulty = pm.HalfNormal('Difficulty_SD', sigma=1, shape=1) difficulty = pm.Normal("Difficulty", mu=0, sigma=sigma_difficulty, shape=n_items) # The main diagonal must be non-negative discrimination = tt.zeros((n_items, n_factors), dtype=theano.config.floatX) diagonal_discrimination = pm.Lognormal('Diagonal Discrimination', mu=0, sigma=0.25, shape=n_factors) lower_discrimination = pm.Normal('Lower Discrimination', sigma=1, shape=lower_length) discrimination = tt.set_subtensor(discrimination[diagonal_indices], diagonal_discrimination) discrimination = tt.set_subtensor(discrimination[lower_indices], lower_discrimination) # Compute the probabilities kernel = pm.math.dot(discrimination, ability) kernel += difficulty[:, None] probabilities = pm.Deterministic("PL_Kernel", pm.math.invlogit(kernel)) # Compute the log likelihood log_likelihood = pm.Bernoulli("Log_Likelihood", p=probabilities, observed=observed) return twopl_pymc_model
def make_model(scale=1e-3, dims=slice(None), prior=10): if is_multico is False: A = data['A'][index,dims] Ainv = data['Ainv'][dims,index] b = data['b'][index] s_Ainv = theano.shared(Ainv) s_A = theano.shared(A) s_b = theano.shared(b) samp_mapping, dev_start = project(t_samp, A, Ainv, b) with pm.Model() as model: decomp = pm.Dirichlet('decomp', ct_prior*prior, shape=ct_prior.shape, testval=ct_start)#, transform=StickBreaking5(ct_prior.shape[0])) ct_expr = list() if samp_scale is True: scale = pm.Lognormal('scale', testval=10) for i, cell_type in enumerate(cell_types): if cell_type == 'background': dev_samp = pm.Normal('comb '+cell_type, mu=background['mean'][index], sigma=background['std'][index]/scale, shape=(1, n_features), testval=t_samp) ct_expr.append(dev_samp) continue if is_multico is True: A = chars[cell_type]['A'][index,dims] Ainv = chars[cell_type]['Ainv'][dims,index] b = chars[cell_type]['b'][index] s_Ainv = theano.shared(Ainv) s_A = theano.shared(A) s_b = theano.shared(b) samp_mapping, dev_start = project(t_samp, A, Ainv, b) n = A.shape[1] samp = pm.Normal(cell_type, sigma=scale, shape=(1, n)) else: samp = pm.MvNormal(cell_type, chars[cell_type]['mean'][dims], cov=chars[cell_type]['sigma'][dims, dims]*scale, shape=(1, A.shape[1]), testval=chars[cell_type]['mean'][dims]) if sample_deviation is True: deviation = pm.Normal('deviation '+cell_type, mu=var[cell_type]['mean'][index], sigma=var[cell_type]['std'][index]*scale, shape=(1, n_features), testval=dev_start) else: deviation = theano.shared(dev_start) dev_samp = pm.Deterministic('comb '+cell_type, combine_and_embed(samp, deviation, s_A, s_Ainv, s_b)) ct_expr.append(dev_samp) ct_expr = tt.concatenate(ct_expr, axis=0) transcriptome = pm.Deterministic('trans', mix(ct_expr, decomp)) pot = pm.Potential('obs', dist.logp(transcriptome)) #obs = pm.Multinomial('obs', seq_depth, transcriptome, observed=sample, dtype='int64') return model
def model_gp(self): """ TODO - Need to describe what is happening here. Complete docs when model is settled on. Probably quiet a long docs needed to explain. """ warnings.warn('This model is developmental - use carefully') dnu = 10**self.asy_result['summary'].loc['mean'].dnu self.pm_model = pm.Model() dnu_fac = 0.03 # Prior on mode frequency has width 3% of Dnu. height_fac = 0.4 # Lognorrmal prior on height has std=0.4. back_fac = 0.5 # Lognorrmal prior on back has std=0.5. with self.pm_model: l0 = pm.Normal('l0', self.start['l0'], dnu * dnu_fac, shape=len(self.start['l0'])) l2 = pm.Normal('l2', self.start['l2'], dnu * dnu_fac, shape=len(self.start['l2'])) # Place a GP over the l=0 mode widths ... m0 = pm.Normal('gradient0', 0, 10) c0 = pm.Normal('intercept0', 0, 10) sigma0 = pm.Lognormal('sigma0', np.log(1.0), 1.0) ls = pm.Lognormal('ls', np.log(0.3), 1.0) mean_func0 = pm.gp.mean.Linear(coeffs=m0, intercept=c0) cov_func0 = sigma0 * pm.gp.cov.ExpQuad(1, ls=ls) self.gp0 = pm.gp.Latent(cov_func=cov_func0, mean_func=mean_func0) ln_width0 = self.gp0.prior('ln_width0', X=self.n) width0 = pm.Deterministic('width0', pm.math.exp(ln_width0)) # and on the l=2 mode widths m2 = pm.Normal('gradient2', 0, 10) c2 = pm.Normal('intercept2', 0, 10) sigma2 = pm.Lognormal('sigma2', np.log(1.0), 1.0) mean_func2 = pm.gp.mean.Linear(coeffs=m2, intercept=c2) cov_func2 = sigma2 * pm.gp.cov.ExpQuad(1, ls=ls) self.gp2 = pm.gp.Latent(cov_func=cov_func2, mean_func=mean_func2) ln_width2 = self.gp2.prior('ln_width2', X=self.n) width2 = pm.Deterministic('width2', pm.math.exp(ln_width2)) #Carry on height0 = pm.Lognormal('height0', np.log(self.start['height0']), height_fac, shape=len(self.start['l0'])) height2 = pm.Lognormal('height2', np.log(self.start['height2']), height_fac, shape=len(self.start['l2'])) back = pm.Lognormal('back', np.log(1.0), back_fac, shape=len(self.start['l2'])) limit = self.model(l0, l2, width0, width2, height0, height2, back) yobs = pm.Gamma('yobs', alpha=1, beta=1.0 / limit, observed=self.ladder_p)
def conversionPriors(conv0): """ Sets the various fluorescence conversion priors. """ # Set up conversion rates confl_conv = pm.Lognormal("confl_conv", np.log(conv0), 0.1) apop_conv = pm.Lognormal("apop_conv", np.log(conv0) - 2.06, 0.1) dna_conv = pm.Lognormal("dna_conv", np.log(conv0) - 1.85, 0.1) # Priors on conv factors pm.Lognormal("confl_apop", -2.06, 0.0647, observed=apop_conv / confl_conv) pm.Lognormal("confl_dna", -1.85, 0.125, observed=dna_conv / confl_conv) pm.Lognormal("apop_dna", 0.222, 0.141, observed=dna_conv / apop_conv) # Offset values for apop and dna apop_offset = pm.Lognormal("apop_offset", np.log(0.1), 0.1) dna_offset = pm.Lognormal("dna_offset", np.log(0.1), 0.1) return ((confl_conv, apop_conv, dna_conv), (apop_offset, dna_offset))