def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta("theta", alpha=1, beta=1) psi = pm.HalfNormal("psi", sd=1) pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data["theta"].shape == (5000,) assert gen_data["psi"].shape == (5000,) assert gen_data["suppliers"].shape == (5000, 20)

def test_n_obj_mc(self): n_samples = 100 xs = np.random.binomial(n=1, p=0.2, size=n_samples) with pm.Model(): p = pm.Beta('p', alpha=1, beta=1) pm.Binomial('xs', n=1, p=p, observed=xs) inf = self.inference(scale_cost_to_minibatch=True) # should just work inf.fit(10, obj_n_mc=10, obj_optimizer=self.optimizer)

def deathPriors(numApop): """ Setup priors for cell death parameters. """ # Rate of moving from apoptosis to death, assumed invariant wrt. treatment d = pm.Lognormal("d", np.log(0.001), 0.5) # Fraction of dying cells that go through apoptosis apopfrac = pm.Beta("apopfrac", 1.0, 1.0, shape=numApop) return d, apopfrac

def hierarchical_beta(name, name_sigma, pr_mean, pr_sigma, len_L2): if not len_L2: # not hierarchical Y = pm.Beta(name, alpha=pr_mean / pr_sigma, beta=1 / pr_sigma * (1 - pr_mean)) X = None else: sigma_Y = pm.HalfCauchy(name_sigma + "_L2", beta=pr_sigma) X = pm.Beta(name + "_L1", alpha=pr_mean / pr_sigma, beta=1 / pr_sigma * (1 - pr_mean)) Y = pm.Beta(name + "_L2", alpha=X / sigma_Y, beta=1 / sigma_Y * (1 - X), shape=len_L2) return Y, X

def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta('theta', alpha=1, beta=1) psi = pm.HalfNormal('psi', sd=1) pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data['theta'].shape == (5000, ) assert gen_data['psi'].shape == (5000, ) assert gen_data['suppliers'].shape == (5000, 20)

def test_pymc3_convert_dists(): """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs.""" tt.config.compute_test_value = "ignore" theano.config.cxx = "" with pm.Model() as model: norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0) mvnorm_rv = pm.MvNormal("mvnorm_rv", np.r_[0.0], np.c_[1.0], shape=1, observed=np.r_[1.0]) cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0) halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0) uniform_rv = pm.Uniform("uniform_rv", observed=1.0) gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0) invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0) exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0) halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0) beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0) binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5) dirichlet_rv = pm.Dirichlet("dirichlet_rv", np.r_[0.1, 0.1], observed=np.r_[0.1, 0.1]) poisson_rv = pm.Poisson("poisson_rv", 10, observed=5) bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0) betabinomial_rv = pm.BetaBinomial("betabinomial_rv", 0.1, 0.1, 10, observed=5) categorical_rv = pm.Categorical("categorical_rv", np.r_[0.5, 0.5], observed=1) multinomial_rv = pm.Multinomial("multinomial_rv", 5, np.r_[0.5, 0.5], observed=np.r_[2]) # Convert to a Theano `FunctionGraph` fgraph = model_graph(model) rvs_by_name = { n.owner.inputs[1].name: n.owner.inputs[1] for n in fgraph.outputs } pymc_rv_names = {n.name for n in model.observed_RVs} assert all( isinstance(rvs_by_name[n].owner.op, RandomVariable) for n in pymc_rv_names) # Now, convert back to a PyMC3 model pymc_model = graph_model(fgraph) new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs} pymc_rv_names == new_pymc_rv_names

def ab_test(obs, return_p, dnase_sense, dnase_antisense, naoh_sense, naoh_antisense): ''' Modeling empirical beta distribytion and use that as prior add new evidence (NaOH or DNase) sample delta strand calulate bayes factor for DNase > NaOH by at least 10% ''' with pm.Model() as model: # fit beta binom naoh_empirical_alpha = pm.Exponential('alpha', 1) naoh_empirical_beta = pm.Exponential('beta', 1) beta_binom_prior = pm.Beta('beta_prior', naoh_empirical_alpha, naoh_empirical_beta, observed=obs) alpha = pm.Normal('alpha1', mu=naoh_empirical_alpha, sd=1) beta = pm.Normal('beta1', mu=naoh_empirical_beta, sd=1) #inference dnase_sense = pm.Beta('dnase_sense', alpha=alpha + dnase_sense, beta=beta + dnase_antisense) naoh_sense = pm.Beta('naoh_sense', alpha=alpha + naoh_sense, beta=beta + naoh_antisense) diff = pm.Deterministic('delta', dnase_sense - naoh_sense) step = pm.NUTS() progressbar = not return_p trace = pm.sample(1000, step, tune=1000, progressbar=True, cores=24) if return_p: delta = trace['delta'] h1 = np.sum(delta >= 0.1) h0 = np.sum(delta <= 0) p_h0 = h0 / len(delta) p_h1 = h1 / len(delta) bf = p_h1 / p_h0 if p_h0 > 0 else EPSILON return bf, delta.mean() else: return trace

def stickbreak_prior(name, a, shape): """truncated stick-breaking construction""" gamma = pm.Gamma('gamma_{}'.format(name), 1., 1.) delta = pm.Gamma('delta_{}'.format(name), 1., a) beta_prime = tt.stack([ pm.Beta('beta_prime_{}_{}'.format(name, k), 1., gamma) for k in range(shape) ]) beta = GEM(beta_prime) return (beta * delta)

def __init__(self, bandit, policy, ts=True): super(BetaAgent, self).__init__(bandit, policy) self.n = bandit.n self.ts = ts self.model = pm.Model() with self.model: self._prior = pm.Beta('prior', alpha=np.ones(self.k), beta=np.ones(self.k), shape=(1, self.k), transform=None) self._value_estimates = np.zeros(self.k)

def sample_posterior(self, t, T, n_samp, n_burnin=None): """ Get samples from the posterior, e.g. for posterior inference or computing Bayesian credible intervals. This routine samples via the random walk Metropolis (RWM) algorithm using the ``pymc3`` library. The function returns a ``pymc3.MultiTrace`` object that can be operated on simply like a ``numpy.array``. Furthermore, ``pymc3`` can be used to create "traceplots". For example via .. code-block:: python from matplotlib import pyplot as plt import pymc3 trace = uvb.fit(t, T) pymc3.traceplot(trace["mu"]) plt.plot(trace["mu"], trace["alpha"]) :param numpy.array[float] t: Observation timestamps of the process up to time T. 1-d array of timestamps. must be sorted (asc) :param T: (optional) maximum time :type T: float or None :param int n_samp: number of posterior samples to take :param int n_burnin: number of samples to discard (as the burn-in samples) :rtype: pymc3.MultiTrace :return: the posterior samples for mu, alpha and theta as a trace object """ t, T = self._prep_t_T(t, T) if n_burnin is None: n_burnin = int(n_samp / 5) with pm.Model() as model: mu = pm.Gamma("mu", alpha=self.mu_hyp[0], beta=1. / self.mu_hyp[1]) theta = pm.Gamma("theta", alpha=self.theta_hyp[0], beta=1. / self.theta_hyp[1]) alpha = pm.Beta("alpha", alpha=self.alpha_hyp[0], beta=self.alpha_hyp[1]) op = HPLLOp(t, T) a = pm.Deterministic('a', op(mu, alpha, theta)) llop = pm.Potential('ll', a) trace = pm.sample(n_samp, step=pm.Metropolis(), cores=1, nchains=1, tune=n_burnin, discard_tuned_samples=True) return trace[n_burnin:]

def test_transformed(self): n = 18 at_bats = 45 * np.ones(n, dtype=int) hits = np.random.randint(1, 40, size=n, dtype=int) draws = 50 with pm.Model() as model: phi = pm.Beta("phi", alpha=1.0, beta=1.0) kappa_log = pm.Exponential("logkappa", lam=5.0) kappa = pm.Deterministic("kappa", aet.exp(kappa_log)) thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=n) y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) assert gen["phi"].shape == (draws,) assert gen["y"].shape == (draws, n) assert "thetas_logodds__" in gen

def build_model(X1, X2, timeV, conv0=0.1, confl=None, apop=None, dna=None): """ Builds then returns the PyMC model. """ assert X1.shape == X2.shape M = pm.Model() with M: conversions = conversionPriors(conv0) d, apopfrac = deathPriors(1) # parameters for drug 1, 2; assumed to be the same for both phenotypes hill = pm.Lognormal("hill", shape=2) IC50 = pm.Lognormal("IC50", shape=2) EmaxGrowth = pm.Beta("EmaxGrowth", 1.0, 1.0, shape=2) EmaxDeath = pm.Lognormal("EmaxDeath", -2.0, 0.5, shape=2) # E_con values; first death then growth GrowthCon = pm.Lognormal("GrowthCon", np.log10(0.03), 0.1) # Calculate the death rate death_rates = blissInteract(X1, X2, hill, IC50, EmaxDeath, justAdd=True) # pylint: disable=unsubscriptable-object # Calculate the growth rate growth_rates = GrowthCon * (1 - blissInteract(X1, X2, hill, IC50, EmaxGrowth)) # pylint: disable=unsubscriptable-object pm.Deterministic("EmaxGrowthEffect", GrowthCon * EmaxGrowth) # Test the dimension of growth_rates growth_rates = T.opt.Assert("growth_rates did not match X1 size")(growth_rates, T.eq(growth_rates.size, X1.size)) lnum, eap, deadapop, deadnec = theanoCore(timeV, growth_rates, death_rates, apopfrac, d) # Test the size of lnum lnum = T.opt.Assert("lnum did not match X1*timeV size")(lnum, T.eq(lnum.size, X1.size * timeV.size)) confl_exp, apop_exp, dna_exp = convSignal(lnum, eap, deadapop, deadnec, conversions) # Compare to experimental observation if confl is not None: confl_obs = T.flatten(confl_exp - confl) pm.Normal("confl_fit", sd=T.std(confl_obs), observed=confl_obs) conflmean = T.mean(confl, axis=1) confl_exp_mean = T.mean(confl_exp, axis=1) pm.Deterministic("conflResid", (confl_exp_mean - conflmean) / conflmean[0]) if apop is not None: apop_obs = T.flatten(apop_exp - apop) pm.Normal("apop_fit", sd=T.std(apop_obs), observed=apop_obs) if dna is not None: dna_obs = T.flatten(dna_exp - dna) pm.Normal("dna_fit", sd=T.std(dna_obs), observed=dna_obs) return M

def test_1latent(self): with self.test_session(): x_obs = theano.shared(np.zeros(1)) with pm.Model() as pm_model: p = pm.Beta('p', 1, 1, transform=None) x = pm.Bernoulli('x', p, observed=x_obs) model = PyMC3Model(pm_model) data = {x_obs: np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1])} zs = {'p': np.array(0.5)} _test(model, data, zs)

def hierarchical_beta(name, name_sigma, pr_mean, pr_sigma, len_L2, model=None): model = modelcontext(model) if not model.is_hierarchical: # not hierarchical Y = pm.Beta(name, alpha=pr_mean / pr_sigma, beta=1 / pr_sigma * (1 - pr_mean)) X = None else: sigma_Y = pm.HalfCauchy(name_sigma + "_hc_L2", beta=pr_sigma) X = pm.Beta(name + "_hc_L1", alpha=pr_mean / pr_sigma, beta=1 / pr_sigma * (1 - pr_mean)) Y = pm.Beta(name + "_hc_L2", alpha=X / sigma_Y, beta=1 / sigma_Y * (1 - X), shape=len_L2) return Y, X

def build_model(self): with pm.Model() as model: # Estimated occupancy psi = pm.Beta('psi', 1, 1) # Latent variable for occupancy pm.Bernoulli('z', psi, self.y.shape) # Estimated mean count theta = pm.Uniform('theta', 0, 100) # Poisson likelihood pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y) return model

def case_count_model_us_states(df): # Normalize inputs in a way that is sensible: # People per test: normalize to South Korea # assuming S.K. testing is "saturated" ppt_sk = np.log10(51500000. / 250000) df['people_per_test_normalized'] = ( np.log10(df['people_per_test_7_days_ago']) - ppt_sk) n = len(df) # For each country, let: # c_obs = number of observed cases c_obs = df['num_pos_7_days_ago'].values # c_star = number of true cases # d_obs = number of observed deaths d_obs = df[['death', 'num_pos_7_days_ago']].min(axis=1).values # people per test people_per_test = df['people_per_test_normalized'].values covid_case_count_model = pm.Model() with covid_case_count_model: # Priors: mu_0 = pm.Beta('mu_0', alpha=1, beta=100, testval=0.01) # sig_0 = pm.Uniform('sig_0', lower=0.0, upper=mu_0 * (1 - mu_0)) alpha = pm.Bound(pm.Normal, lower=0.0)( 'alpha', mu=8, sigma=3, shape=1) beta = pm.Bound(pm.Normal, upper=0.0)( 'beta', mu=-1, sigma=1, shape=1) # beta = pm.Normal('beta', mu=0, sigma=1, shape=3) sigma = pm.HalfNormal('sigma', sigma=0.5, testval=0.1) # sigma_1 = pm.HalfNormal('sigma_1', sigma=2, testval=0.1) # Model probability of case under-reporting as logistic regression: mu_model_logit = alpha + beta * people_per_test tau_logit = pm.Normal('tau_logit', mu=mu_model_logit, sigma=sigma, shape=n) tau = np.exp(tau_logit) / (np.exp(tau_logit) + 1) c_star = c_obs / tau # Binomial likelihood: d = pm.Binomial('d', n=c_star, p=mu_0, observed=d_obs) return covid_case_count_model

def build_model(self): with pm.Model() as model: # Estimated occupancy psi = pm.Beta("psi", 1, 1) # Latent variable for occupancy pm.Bernoulli("z", psi, shape=self.y.shape) # Estimated mean count theta = pm.Uniform("theta", 0, 100) # Poisson likelihood pm.ZeroInflatedPoisson("y", psi, theta, observed=self.y) return model

def test_sample_vp(self): n_samples = 100 xs = np.random.binomial(n=1, p=0.2, size=n_samples) with pm.Model(): p = pm.Beta('p', alpha=1, beta=1) pm.Binomial('xs', n=1, p=p, observed=xs) v_params = advi(n=1000) trace = sample_vp(v_params, draws=1, hide_transformed=True) self.assertListEqual(trace.varnames, ['p']) trace = sample_vp(v_params, draws=1, hide_transformed=False) self.assertListEqual(sorted(trace.varnames), ['p', 'p_logodds_'])

def test_transformed(self): n = 18 at_bats = 45 * np.ones(n, dtype=int) hits = np.random.randint(1, 40, size=n, dtype=int) draws = 50 with pm.Model() as model: phi = pm.Beta('phi', alpha=1., beta=1.) kappa_log = pm.Exponential('logkappa', lam=5.) kappa = pm.Deterministic('kappa', tt.exp(kappa_log)) thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=n) y = pm.Binomial('y', n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) assert gen['phi'].shape == (draws,) assert gen['y'].shape == (draws, n) assert 'thetas_logodds__' in gen

def runModel(df_Train, df_Val, i, t, param, smpls, burns): dataTrn = df_Train[i] X = dataTrn[param].values t_idx = dataTrn.Hour.astype(int).values dataVal = df_Val[i] validate = pd.DataFrame(np.transpose(np.array([dataVal.Hour, dataVal[param].values])), columns=['hr','y']) validate = validate.groupby('hr').mean() validate['int'] = np.round(validate.y.values) # define bernoulli hierarchical model with pm.Model() as model: # define the hyperparameters mu = pm.Beta('mu', 2, 2) #mu = pm.Beta('mu', 0.5, 0.5) kappa = pm.Gamma('kappa', 1, 0.1) # define the prior theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=t) # define the likelihood y_lik = pm.Bernoulli('y_like', p=theta[t_idx], observed=X) # Generate a MCMC chain trace = pm.sample(smpls, chains=4, tune=burns, cores=1) ppc = pm.sample_posterior_predictive(trace) out_smry = pd.DataFrame(pm.summary(trace)) ppcMean = np.array((t_idx, np.mean(ppc['y_like'], axis=0))) ppcStd = np.array((t_idx, np.std(ppc['y_like'], axis=0))) ppc_all = np.append(np.reshape(t_idx, (-1, 1)), ppc['y_like'].T, axis=1) predVals = pd.DataFrame(np.transpose(ppcMean), columns=['hr', 'y']) predVals = predVals.groupby('hr').mean() predVals['int'] = np.round(predVals.y.values) # Calculate SMAPE Error err_y = np.round(SMAPE(validate.y, predVals.y),4) err_int = np.round(SMAPE(validate.int, predVals.int),4) print('\n Error: ', (err_y, err_int), '\n') return trace, ppc_all, out_smry, [err_y, err_int]

def gev0_shift_1(dataset): locm = dataset.mean() locs = dataset.std() / (np.sqrt(len(dataset))) scalem = dataset.std() scales = dataset.std() / (np.sqrt(2 * (len(dataset) - 1))) with pm.Model() as model: # Priors for unknown model parameters c1 = pm.Beta( 'c1', alpha=6, beta=9 ) # c=x-0,5: transformation in gev_logp is required due to Beta domain between 0 and 1 loc1 = pm.Normal('loc1', mu=locm, sd=locs) scale1 = pm.Normal('scale1', mu=scalem, sd=scales) c2 = pm.Beta('c2', alpha=6, beta=9) loc2 = pm.Normal('loc2', mu=locm, sd=locs) scale2 = pm.Normal('scale2', mu=scalem, sd=scales) def gev_logp(value): scaled = (value - loc_) / scale_ logp = -(tt.log(scale_) + (((c_ - 0.5) + 1) / (c_ - 0.5) * tt.log1p( (c_ - 0.5) * scaled) + (1 + (c_ - 0.5) * scaled)**(-1 / (c_ - 0.5)))) bound1 = loc_ - scale_ / (c_ - 0.5) bounds = tt.switch((c_ - 0.5) > 0, value > bound1, value < bound1) return bound(logp, bounds, c_ != 0) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1) idx = np.arange(n_count_data) c_ = pm.math.switch(tau > idx, c1, c2) loc_ = pm.math.switch(tau > idx, loc1, loc2) scale_ = pm.math.switch(tau > idx, scale1, scale2) gev = pm.DensityDist('gev', gev_logp, observed=dataset) trace = pm.sample(1000, chains=1, progressbar=True) # geweke_plot = pm.geweke(trace, 0.05, 0.5, 20) # gelman_and_rubin = pm.diagnostics.gelman_rubin(trace) posterior = pm.trace_to_dataframe(trace) summary = pm.summary(trace) return summary, posterior

def model_uncertainty(splits, stakes, actions, temp=1., sd=1.): with pm.Model() as repeated_model: r = pm.Gamma('r', alpha=1, beta=1) p = pm.Gamma('p', alpha=1, beta=1) t = pm.Beta('t', alpha=2, beta=5) st = pm.Beta('st', alpha=1, beta=1) c = pm.Gamma('c', alpha=1, beta=1) odds_a = np.exp(2 * r * splits + c * stakes**st) odds_r = np.exp(p * (splits < 0.5 - t / 2)) p = odds_a / (odds_r + odds_a) a = pm.Binomial('a', 1, p, observed=actions) fitted = pm.fit(method='advi') trace_repeated = fitted.sample(2000) # trace_repeated = pm.sample(200000, step=pm.Slice(), chains=2, cores=4) # with pm.Model() as simple_model: # r = pm.Normal('r', mu=0, sd=1) # p = np.exp(r*splits) / (1 + np.exp(r*splits)) # a = pm.Binomial('a', 1, p, observed=actions) # trace_simple = pm.sample(2000, init='map') with pm.Model() as fairness_model: r = pm.Gamma('r', alpha=1, beta=1) t = pm.Beta('t', alpha=2, beta=5) f = pm.Normal('f', mu=0, sd=sd) st = pm.Beta('st', alpha=1, beta=1) c = pm.Gamma('c', alpha=1, beta=1) odds = np.exp(c * stakes**st + splits * r - f * (splits < 0.5 - t / 2)) p = odds / (1 + odds) a = pm.Binomial('a', 1, p, observed=actions) fitted = pm.fit(method='advi') trace_fairness = fitted.sample(2000) # trace_fairness = pm.sample(200000, step=pm.Slice(), chains=2, cores=4) fairness_model.name = 'fair' repeated_model.name = 'repeated' model_dict = dict( zip([fairness_model, repeated_model], [trace_fairness, trace_repeated])) comp = pm.compare(model_dict, ic='LOO', method='BB-pseudo-BMA') return trace_fairness, trace_repeated, comp

def test_bernoulli_process(self): """Testing the Bridge Sampler with a Beta-Bernoulli-Process model""" # prior parameters alpha = np.random.gamma(1.0, 2.0) beta = np.random.gamma(1.0, 2.0) n = 100 draws = 10000 tune = 1000 print("Testing with alpha = ", alpha, "and beta = ", beta) # random data p0 = np.random.random() expected_error = np.sqrt(p0 * (1 - p0) / n) # reasonable approximation observations = (np.random.random(n) <= p0).astype("int") with pm.Model() as BernoulliBeta: theta = pm.Beta('pspike', alpha=alpha, beta=beta) obs = pm.Categorical('obs', p=pm.math.stack([theta, 1.0 - theta]), observed=observations) trace = pm.sample(draws=draws, tune=tune) # calculate exact marginal likelihood n = len(observations) k = sum(observations) print(n, k) exact_log_marg_ll = spf.betaln(alpha + k, beta + (n - k)) - spf.betaln(alpha, beta) # estimate with bridge sampling logml_dict = marginal_llk(trace, model=BernoulliBeta, maxiter=10000) expected_p = 1.0 - trace["pspike"].mean() # should be true in 95% of the runs self.assertTrue( np.abs(expected_p - p0) < 2 * expected_error, msg= "Estimated probability is {0:5.3f}, exact is {1:5.3f}, estimated standard deviation is {2:5.3f}. Is this OK?" .format(expected_p, p0, expected_error)) estimated_log_marg_ll = logml_dict["logml"] # 3.2 corresponds to a bayes factor of 'Not worth more than a bare mention' self.assertTrue( np.abs(estimated_log_marg_ll - exact_log_marg_ll) < np.log(3.2), msg= "Estimated marginal log likelihood {0:2.5f}, exact marginal log likelihood {1:2.5f}. Is this OK?" .format(estimated_log_marg_ll, exact_log_marg_ll))

def test_1d(): x_obs = theano.shared(np.zeros(1)) with pm.Model() as pm_model: beta = pm.Beta('beta', 1, 1, transform=None) x = pm.Bernoulli('x', beta, observed=x_obs) model = PyMC3Model(pm_model) data = {x_obs: np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1])} zs = np.array([[0.5]]) _test(model, data, zs) zs = np.array([[0.4], [0.2], [0.2351], [0.6213]]) _test(model, data, zs)

def fit_spindle_density_prior(): #data from purcell data = [[85, 177], [89, 148], [93, 115], [98, 71], [105, 42], [117, 20], [134, 17], [148, 27], [157, 39], [165, 53], [170, 68], [174, 84], [180, 102], [184, 123], [190, 143], [196, 156], [202, 165], [210, 173], [217, 176], [222, 177]] xscale = [0, 4] yscale = [0, 800] data_df = get_target_curve(data, xscale, yscale, scale=False) sample_data = np.random.choice(a=data_df['x'], p=data_df['y'], size=1000) with pm.Model() as model: a = pm.HalfNormal('a', 100 * 10) b = pm.HalfNormal('b', 100 * 10) pm.Beta('spindle_density', alpha=a, beta=b, observed=sample_data) trace = pm.sample(2000) summary_df = pm.summary(trace) a_est = summary_df.loc['a', 'mean'] b_est = summary_df.loc['b', 'mean'] n_samples = 10000 with pm.Model() as model: pm.Beta('spindle_density_mean_params', alpha=a_est, beta=b_est) outcome = pm.sample(n_samples, njobs=1, nchains=1) # pm.traceplot(trace) # plt.show() samples = outcome['spindle_density_mean_params'] sns.distplot(samples, kde=True) x = data_df['x'] y = data_df['y'] * len(samples) * (x[1] - x[0]) sns.lineplot(x, y) plt.show() print(summary_df) sp_per_epoch = xscale[1] * outcome['spindle_density_mean_params'] * 25 / 60 counts, bins, patches = plt.hist(sp_per_epoch, np.arange(0, 8) - 0.5, density=True) sns.distplot(sp_per_epoch, kde=True, hist=False) plt.show() print(counts, bins)

def bayesian_inference_SEIR(day_array, cluster_vel_cases_array, N_SAMPLES): # https://discourse.pymc.io/t/how-to-sample-efficiently-from-time-series-data/4928 N_SAMPLES = 1000 s0, e0, i0 = 100., 50., 25. st0, et0, it0 = [theano.shared(x) for x in [s0, e0, i0]] C = np.array([3, 5, 8, 13, 21, 26, 10, 3], dtype=np.float64) D = np.array([1, 2, 3, 7, 9, 11, 5, 1], dtype=np.float64) def seir_one_step(st0, et0, it0, beta, gamma, delta): bt0 = st0 * beta ct0 = et0 * gamma dt0 = it0 * delta st1 = st0 - bt0 et1 = et0 + bt0 - ct0 it1 = it0 + ct0 - dt0 return st1, et1, it1 with pm.Model() as model: beta = pm.Beta('beta', 2, 10) gamma = pm.Beta('gamma', 2, 10) delta = pm.Beta('delta', 2, 10) (st, et, it), updates = theano.scan(fn=seir_one_step, outputs_info=[st0, et0, it0], non_sequences=[beta, gamma, delta], n_steps=len(C)) ct = pm.Binomial('c_t', et, gamma, observed=C) dt = pm.Binomial('d_t', it, delta, observed=D) trace = pm.sample(N_SAMPLES) print(trace) visualize_trace(trace["beta"][:, None], trace["gamma"][:, None], trace["delta"][:, None], N_SAMPLES) with model: bt = pm.Binomial('b_t', st, beta, shape=len(C)) ppc_trace = pm.sample_posterior_predictive(trace, var_names=['b_t'])

def make_model(Gd, Cd, *P): n, = Gd[0].shape with pm.Model() as model: theta1 = pm.Beta('theta1', alpha=zeros(n), beta=ones(n), shape=n) theta2 = pm.Beta('theta2', alpha=zeros(n), beta=ones(n), shape=n) Gamma = pm.Bernoulli('goal', p=theta1, shape=n, observed=Gd) C = pm.Bernoulli('context', p=theta2, shape=n, observed=Cd) # Results are degree of adjacency for i, Pi in enumerate(P): beta_i = DUMvNormal('beta {}'.format(i), mu=zeros(m), cov=identity(m), shape=m) DUMvNormal('premise {}'.format(i), mu=beta_i[0] * Gamma + beta_i[1] * C, \ cov=identity(n), shape=n, observed=Pi) return model

def __init__(self, n_to_sample=1000, *args, **kwargs): super(BetaBayesianSolver, self).__init__(*args, **kwargs) self.n_to_sample = n_to_sample self.model = pm.Model() self.shared_data = theano.shared(np.ones(1) * 0.5, borrow=True) with self.model: self.alpha_dist = pm.Uniform('alpha', lower=1.0, upper=7.0) self.beta_dist = pm.Uniform('beta', lower=1.0, upper=7.0) observed = pm.Beta('obs', alpha=self.alpha_dist, beta=self.beta_dist, observed=self.shared_data) self.step = pm.Metropolis()

def test_sample(self): n_samples = 100 xs = np.random.binomial(n=1, p=0.2, size=n_samples) with pm.Model(): p = pm.Beta('p', alpha=1, beta=1) pm.Binomial('xs', n=1, p=p, observed=xs) app = self.inference().approx trace = app.sample(draws=1, include_transformed=False) assert trace.varnames == ['p'] assert len(trace) == 1 trace = app.sample(draws=10, include_transformed=True) assert sorted(trace.varnames) == ['p', 'p_logodds__'] assert len(trace) == 10

def elicit(name, data): y = np.sort(data) width = y.max() - y.min() par = stats.beta.fit(y[1:-1], floc=y.min(), fscale=y.max()) var = stats.beta(*par) scaled_mu = var.mean() / width scaled_sd = var.std() / width scaled = mc.Beta(f"{name}_scaled__", mu=scaled_mu, sd=scaled_sd) dist = mc.Deterministic(name, y.min() + (scaled * width)) dist.var = var return dist