def create_changepoint_model(spike_array, states, fit, samples): """ spike_array :: Shape : tastes, trials, neurons, time_bins states :: number of states to include in the model fit :: number of iterations to fit for samples :: number of samples to generate from the fit model """ # If model already doesn't exist, then create new one #spike_array = this_dat_binned # Unroll arrays along taste axis spike_array_long = np.reshape(spike_array, (-1, *spike_array.shape[-2:])) # Find mean firing for initial values tastes = spike_array.shape[0] split_list = np.array_split(spike_array, states, axis=-1) # Cut all to the same size min_val = min([x.shape[-1] for x in split_list]) split_array = np.array([x[..., :min_val] for x in split_list]) mean_vals = np.mean(split_array, axis=(2, -1)).swapaxes(0, 1) mean_vals += 0.01 # To avoid zero starting prob mean_nrn_vals = np.mean(mean_vals, axis=(0, 1)) # Find evenly spaces switchpoints for initial values idx = np.arange(spike_array.shape[-1]) # Index array_idx = np.broadcast_to(idx, spike_array_long.shape) idx_range = idx.max() - idx.min() even_switches = np.linspace(0, idx.max(), states + 1) even_switches_normal = even_switches / np.max(even_switches) taste_label = np.repeat([0, 1, 2, 3], 30) trial_num = array_idx.shape[0] # Being constructing model with pm.Model() as model: # Hierarchical firing rates # Refer to model diagram # Mean firing rate of neuron AT ALL TIMES lambda_nrn = pm.Exponential('lambda_nrn', 1 / mean_nrn_vals, shape=(mean_vals.shape[-1])) # Priors for each state, derived from each neuron # Mean firing rate of neuron IN EACH STATE (averaged across tastes) lambda_state = pm.Exponential('lambda_state', lambda_nrn, shape=(mean_vals.shape[1:])) # Mean firing rate of neuron PER STATE PER TASTE lambda_latent = pm.Exponential('lambda', lambda_state[np.newaxis, :, :], testval=mean_vals, shape=(mean_vals.shape)) # Changepoint time variable # INDEPENDENT TAU FOR EVERY TRIAL a = pm.HalfNormal('a_tau', 3., shape=states - 1) b = pm.HalfNormal('b_tau', 3., shape=states - 1) # Stack produces states x trials --> That gets transposed # to trials x states and gets sorted along states (axis=-1) # Sort should work the same way as the Ordered transform --> # see rv_sort_test.ipynb tau_latent = pm.Beta('tau_latent', a, b, shape = (trial_num, states-1), testval = \ tt.tile(even_switches_normal[1:(states)], (array_idx.shape[0],1))).sort(axis=-1) tau = pm.Deterministic( 'tau', idx.min() + (idx.max() - idx.min()) * tau_latent) # Sigmoing to create transitions based off tau # Hardcoded 3-5 states weight_1_stack = tt.nnet.sigmoid(\ array_idx - tau[:,0][...,np.newaxis,np.newaxis]) weight_2_stack = tt.nnet.sigmoid(\ array_idx - tau[:,1][...,np.newaxis,np.newaxis]) if states > 3: weight_3_stack = tt.nnet.sigmoid(\ array_idx - tau[:,2][...,np.newaxis,np.newaxis]) if states > 4: weight_4_stack = tt.nnet.sigmoid(\ array_idx - tau[:,3][...,np.newaxis,np.newaxis]) # Generate firing rates from lambda and sigmoid weights if states == 3: # 3 states lambda_ = np.multiply(1 - weight_1_stack, lambda_latent[taste_label,0][:,:,np.newaxis]) + \ np.multiply(weight_1_stack * (1 - weight_2_stack), lambda_latent[taste_label][:,1][:,:,np.newaxis]) + \ np.multiply(weight_2_stack, lambda_latent[taste_label,2][:,:,np.newaxis]) elif states == 4: # 4 states lambda_ = np.multiply(1 - weight_1_stack, lambda_latent[taste_label,0][:,:,np.newaxis]) + \ np.multiply(weight_1_stack * (1 - weight_2_stack), lambda_latent[taste_label][:,1][:,:,np.newaxis]) + \ np.multiply(weight_2_stack * (1 - weight_3_stack), lambda_latent[taste_label][:,2][:,:,np.newaxis]) + \ np.multiply(weight_3_stack, lambda_latent[taste_label,3][:,:,np.newaxis]) elif states == 5: # 5 states lambda_ = np.multiply(1 - weight_1_stack, lambda_latent[taste_label,0][:,:,np.newaxis]) + \ np.multiply(weight_1_stack * (1 - weight_2_stack), lambda_latent[taste_label][:,1][:,:,np.newaxis]) + \ np.multiply(weight_2_stack * (1 - weight_3_stack), lambda_latent[taste_label][:,2][:,:,np.newaxis]) +\ np.multiply(weight_3_stack * (1 - weight_4_stack), lambda_latent[taste_label][:,3][:,:,np.newaxis])+ \ np.multiply(weight_4_stack, lambda_latent[taste_label,4][:,:,np.newaxis]) # Add observations observation = pm.Poisson("obs", lambda_, observed=spike_array_long) return model
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1 ], value=-999) year = np.arange(1851, 1962) '''Model generation''' HansModel = pm.Model() with HansModel: switchpoint = pm.DiscreteUniform('switchpoint', lower=year.min(), upper=year.max(), testval=1900) # prior early_rate = pm.Exponential('early_rate', 1) late_rate = pm.Exponential('late_rate', 1) # Allocate rate rate = pm.switch(switchpoint >= year, early_rate, late_rate) # Likelihood disaster = pm.Poisson('disaster', mu=rate, observed=disaster_data) ''' MCMC setting ''' with HansModel: # Step1 = pm.Slice(vars=[early_rate,late_rate,switchpoint,disaster.missing_values[0]]) trace = pm.sample(1000, step=pm.NUTS()) pm.traceplot(trace) print pm.summary(trace) plt.show()
from pandas_datareader import data import pymc3 as pm import matplotlib.pyplot as plt import numpy as np returns = data.get_data_google('SPY', start='2008-5-1', end='2009-12-1')['Close'].pct_change() print(returns) with pm.Model() as sp500_model: nu = pm.Exponential('nu', 1. / 10, testval=5.) sigma = pm.Exponential('sigma', 1. / .02, testval=.1) s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns)) volatility_process = pm.Deterministic('volatility_process', pm.math.exp(-2 * s)) r = pm.StudentT('r', nu, lam=volatility_process, observed=returns) with sp500_model: trace = pm.sample(2000) pm.traceplot(trace, [nu, sigma]) fig, ax = plt.subplots(figsize=(15, 8)) returns.plot(ax=ax) ax.plot(returns.index, 1 / np.exp(trace['s', ::5].T), 'r', alpha=.03) ax.set(title='volatility_process', xlabel='time', ylabel='volatility') ax.legend(['S&P500', 'stochastic volatility process']) plt.show()
predict = tt.set_subtensor(predict[counter:counter + 1], th_g_pred_s) return predict # In[ ]: model_C = pm.Model() alpha1 = 3. beta1 = 0.05 alpha2 = 1.0 # define the distribution with model_C: sigma2s = pm.InverseGamma('sigma2s', alpha=alpha1, beta=beta1, shape=1) sigma2 = pm.Deterministic('sigma2', tt.tile(sigma2s, th.shape[0])) gamma2 = pm.Exponential(name='gamma2', lam=alpha2) ln_k_guess = pm.Normal(name='ln_k_guess', mu=0, sigma=tt.sqrt(gamma2), shape=1) y_mean = pm.Deterministic('y_mean', Solver(ln_k_guess)) y = pm.Normal(name='y', mu=y_mean, sigma=tt.sqrt(sigma2), observed=thg) # In[12]: with model_C: mcmc_res_C = pm.sample(draws=5000, step=pm.NUTS()) #_=pm.plot_posterior(mcmc_res_C, var_names=['ln_k_guess']) # In[ ]:
plt.plot(x_values, x_pdf, label=r'$\nu$ = {}'.format(df)) x_pdf = stats.norm.pdf(x_values) plt.plot(x_values, x_pdf, label=r'$\nu = \infty$') plt.xlabel('$x$') plt.ylabel('$p(x)$') plt.legend(loc=0, fontsize=14) plt.xlim(-7, 7) plt.savefig('img306.png', dpi=300, figsize=(5.5, 5.5)) plt.figure() with pm.Model() as model_t: mu = pm.Uniform('mu', 40, 75) sigma = pm.HalfNormal('sigma', sd=10) nu = pm.Exponential('nu', 1/30) y = pm.StudentT('y', mu=mu, sd=sigma, nu=nu, observed=data) trace_t = pm.sample(1100) chain_t = trace_t[100:] pm.traceplot(chain_t) plt.savefig('img308.png', dpi=300, figsize=(5.5, 5.5)) plt.figure() #pm.df_summary(chain_t) pm.summary(chain_t) y_pred = pm.sample_ppc(chain_t, 100, model_t, size=len(data)) sns.kdeplot(data, c='b') for i in y_pred['y']:
def set_likelihood(self): """ Convert any bilby likelihoods to PyMC3 distributions. """ # create theano Op for the log likelihood if not using a predefined model pymc3, STEP_METHODS, floatX = self._import_external_sampler() theano, tt, as_op = self._import_theano() class LogLike(tt.Op): itypes = [tt.dvector] otypes = [tt.dscalar] def __init__(self, parameters, loglike, priors): self.parameters = parameters self.likelihood = loglike self.priors = priors # set the fixed parameters for key in self.priors.keys(): if isinstance(self.priors[key], float): self.likelihood.parameters[key] = self.priors[key] self.logpgrad = LogLikeGrad(self.parameters, self.likelihood, self.priors) def perform(self, node, inputs, outputs): theta, = inputs for i, key in enumerate(self.parameters): self.likelihood.parameters[key] = theta[i] outputs[0][0] = np.array(self.likelihood.log_likelihood()) def grad(self, inputs, g): theta, = inputs return [g[0] * self.logpgrad(theta)] # create theano Op for calculating the gradient of the log likelihood class LogLikeGrad(tt.Op): itypes = [tt.dvector] otypes = [tt.dvector] def __init__(self, parameters, loglike, priors): self.parameters = parameters self.Nparams = len(parameters) self.likelihood = loglike self.priors = priors # set the fixed parameters for key in self.priors.keys(): if isinstance(self.priors[key], float): self.likelihood.parameters[key] = self.priors[key] def perform(self, node, inputs, outputs): theta, = inputs # define version of likelihood function to pass to derivative function def lnlike(values): for i, key in enumerate(self.parameters): self.likelihood.parameters[key] = values[i] return self.likelihood.log_likelihood() # calculate gradients grads = derivatives(theta, lnlike, abseps=1e-5, mineps=1e-12, reltol=1e-2) outputs[0][0] = grads with self.pymc3_model: # check if it is a predefined likelhood function if isinstance(self.likelihood, GaussianLikelihood): # check required attributes exist if (not hasattr(self.likelihood, 'sigma') or not hasattr(self.likelihood, 'x') or not hasattr(self.likelihood, 'y')): raise ValueError( "Gaussian Likelihood does not have all the correct attributes!" ) if 'sigma' in self.pymc3_priors: # if sigma is suppled use that value if self.likelihood.sigma is None: self.likelihood.sigma = self.pymc3_priors.pop('sigma') else: del self.pymc3_priors['sigma'] for key in self.pymc3_priors: if key not in self.likelihood.function_keys: raise ValueError( "Prior key '{}' is not a function key!".format( key)) model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors) # set the distribution pymc3.Normal('likelihood', mu=model, sd=self.likelihood.sigma, observed=self.likelihood.y) elif isinstance(self.likelihood, PoissonLikelihood): # check required attributes exist if (not hasattr(self.likelihood, 'x') or not hasattr(self.likelihood, 'y')): raise ValueError( "Poisson Likelihood does not have all the correct attributes!" ) for key in self.pymc3_priors: if key not in self.likelihood.function_keys: raise ValueError( "Prior key '{}' is not a function key!".format( key)) # get rate function model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors) # set the distribution pymc3.Poisson('likelihood', mu=model, observed=self.likelihood.y) elif isinstance(self.likelihood, ExponentialLikelihood): # check required attributes exist if (not hasattr(self.likelihood, 'x') or not hasattr(self.likelihood, 'y')): raise ValueError( "Exponential Likelihood does not have all the correct attributes!" ) for key in self.pymc3_priors: if key not in self.likelihood.function_keys: raise ValueError( "Prior key '{}' is not a function key!".format( key)) # get mean function model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors) # set the distribution pymc3.Exponential('likelihood', lam=1. / model, observed=self.likelihood.y) elif isinstance(self.likelihood, StudentTLikelihood): # check required attributes exist if (not hasattr(self.likelihood, 'x') or not hasattr(self.likelihood, 'y') or not hasattr(self.likelihood, 'nu') or not hasattr(self.likelihood, 'sigma')): raise ValueError( "StudentT Likelihood does not have all the correct attributes!" ) if 'nu' in self.pymc3_priors: # if nu is suppled use that value if self.likelihood.nu is None: self.likelihood.nu = self.pymc3_priors.pop('nu') else: del self.pymc3_priors['nu'] for key in self.pymc3_priors: if key not in self.likelihood.function_keys: raise ValueError( "Prior key '{}' is not a function key!".format( key)) model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors) # set the distribution pymc3.StudentT('likelihood', nu=self.likelihood.nu, mu=model, sd=self.likelihood.sigma, observed=self.likelihood.y) elif isinstance( self.likelihood, (GravitationalWaveTransient, BasicGravitationalWaveTransient)): # set theano Op - pass _search_parameter_keys, which only contains non-fixed variables logl = LogLike(self._search_parameter_keys, self.likelihood, self.pymc3_priors) parameters = dict() for key in self._search_parameter_keys: try: parameters[key] = self.pymc3_priors[key] except KeyError: raise KeyError( "Unknown key '{}' when setting GravitationalWaveTransient likelihood" .format(key)) # convert to theano tensor variable values = tt.as_tensor_variable(list(parameters.values())) pymc3.DensityDist('likelihood', lambda v: logl(v), observed={'v': values}) else: raise ValueError("Unknown likelihood has been provided")
def __init__( self, cell_state_mat: np.ndarray, X_data: np.ndarray, n_comb: int = 50, data_type: str = "float32", n_iter=20000, learning_rate=0.005, total_grad_norm_constraint=200, verbose=True, var_names=None, var_names_read=None, obs_names=None, fact_names=None, sample_id=None, cell_number_prior={ "cells_per_spot": 8, "factors_per_spot": 7, "combs_per_spot": 2.5 }, cell_number_var_prior={ "cells_mean_var_ratio": 1, "factors_mean_var_ratio": 1, "combs_mean_var_ratio": 1 }, phi_hyp_prior={ "mean": 3, "sd": 1 }, spot_fact_mean_var_ratio=5, exper_gene_level_mean_var_ratio=10, ): ############# Initialise parameters ################ super().__init__( cell_state_mat, X_data, data_type, n_iter, learning_rate, total_grad_norm_constraint, verbose, var_names, var_names_read, obs_names, fact_names, sample_id, ) self.phi_hyp_prior = phi_hyp_prior self.n_comb = n_comb self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio self.exper_gene_level_mean_var_ratio = exper_gene_level_mean_var_ratio # generate parameters for samples self.spot2sample_df = pd.get_dummies(sample_id) # convert to np.ndarray self.spot2sample_mat = self.spot2sample_df.values self.n_exper = self.spot2sample_mat.shape[1] # assign extra data to dictionary with (1) shared parameters (2) input data self.extra_data_tt = { "spot2sample": theano.shared(self.spot2sample_mat.astype(self.data_type)) } self.extra_data = { "spot2sample": self.spot2sample_mat.astype(self.data_type) } cell_number_prior["factors_per_combs"] = ( cell_number_prior["factors_per_spot"] / cell_number_prior["combs_per_spot"]) for k in cell_number_var_prior.keys(): cell_number_prior[k] = cell_number_var_prior[k] self.cell_number_prior = cell_number_prior ############# Define the model ################ self.model = pm.Model() with self.model: # =====================Gene expression level scaling======================= # # scale cell state factors by gene_level self.gene_factors = pm.Deterministic("gene_factors", self.cell_state) # self.gene_factors = self.cell_state # tt.printing.Print('gene_factors sum')(gene_factors.sum(0).shape) # tt.printing.Print('gene_factors sum')(gene_factors.sum(0)) # =====================Spot factors======================= # # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured, # times heterogeniety in the total number of mRNA between individual cells with each cell type self.cells_per_spot = pm.Gamma( "cells_per_spot", mu=cell_number_prior["cells_per_spot"], sigma=np.sqrt(cell_number_prior["cells_per_spot"] / cell_number_prior["cells_mean_var_ratio"]), shape=(self.n_obs, 1), ) self.comb_per_spot = pm.Gamma( "combs_per_spot", mu=cell_number_prior["combs_per_spot"], sigma=np.sqrt(cell_number_prior["combs_per_spot"] / cell_number_prior["combs_mean_var_ratio"]), shape=(self.n_obs, 1), ) shape = self.comb_per_spot / np.array(self.n_comb).reshape((1, 1)) rate = tt.ones((1, 1)) / self.cells_per_spot * self.comb_per_spot self.combs_factors = pm.Gamma("combs_factors", alpha=shape, beta=rate, shape=(self.n_obs, self.n_comb)) self.factors_per_combs = pm.Gamma( "factors_per_combs", mu=cell_number_prior["factors_per_combs"], sigma=np.sqrt(cell_number_prior["factors_per_combs"] / cell_number_prior["factors_mean_var_ratio"]), shape=(self.n_comb, 1), ) c2f_shape = self.factors_per_combs / np.array(self.n_fact).reshape( (1, 1)) self.comb2fact = pm.Gamma("comb2fact", alpha=c2f_shape, beta=self.factors_per_combs, shape=(self.n_comb, self.n_fact)) self.spot_factors = pm.Gamma( "spot_factors", mu=pm.math.dot(self.combs_factors, self.comb2fact), sigma=pm.math.sqrt( pm.math.dot(self.combs_factors, self.comb2fact) / self.spot_fact_mean_var_ratio), shape=(self.n_obs, self.n_fact), ) # =====================Spot-specific additive component======================= # # molecule contribution that cannot be explained by cell state signatures # these counts are distributed between all genes not just expressed genes self.spot_add_hyp = pm.Gamma("spot_add_hyp", 1, 1, shape=2) self.spot_add = pm.Gamma("spot_add", self.spot_add_hyp[0], self.spot_add_hyp[1], shape=(self.n_obs, 1)) # =====================Gene-specific additive component ======================= # # per gene molecule contribution that cannot be explained by cell state signatures # these counts are distributed equally between all spots (e.g. background, free-floating RNA) self.gene_add_hyp = pm.Gamma("gene_add_hyp", 1, 1, shape=2) self.gene_add = pm.Gamma("gene_add", self.gene_add_hyp[0], self.gene_add_hyp[1], shape=(self.n_exper, self.n_var)) # =====================Gene-specific overdispersion ======================= # self.phi_hyp = pm.Gamma("phi_hyp", mu=phi_hyp_prior["mean"], sigma=phi_hyp_prior["sd"], shape=(1, 1)) self.gene_E = pm.Exponential("gene_E", self.phi_hyp, shape=(self.n_exper, self.n_var)) # =====================Expected expression ======================= # # expected expression self.mu_biol = ( pm.math.dot(self.spot_factors, self.gene_factors.T) + pm.math.dot(self.extra_data_tt["spot2sample"], self.gene_add) + self.spot_add) # tt.printing.Print('mu_biol')(self.mu_biol.shape) # =====================DATA likelihood ======================= # # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson self.data_target = pm.NegativeBinomial( "data_target", mu=self.mu_biol, alpha=pm.math.dot(self.extra_data_tt["spot2sample"], 1 / tt.pow(self.gene_E, 2)), observed=self.x_data, total_size=self.X_data.shape, ) # =====================Compute nUMI from each factor in spots ======================= # self.nUMI_factors = pm.Deterministic("nUMI_factors", (self.spot_factors * (self.gene_factors).sum(0)))
def test_pymc3_convert_dists(): """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs.""" tt.config.compute_test_value = "ignore" theano.config.cxx = "" with pm.Model() as model: norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0) mvnorm_rv = pm.MvNormal("mvnorm_rv", np.r_[0.0], np.c_[1.0], shape=1, observed=np.r_[1.0]) cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0) halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0) uniform_rv = pm.Uniform("uniform_rv", observed=1.0) gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0) invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0) exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0) halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0) beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0) binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5) dirichlet_rv = pm.Dirichlet("dirichlet_rv", np.r_[0.1, 0.1], observed=np.r_[0.1, 0.1]) poisson_rv = pm.Poisson("poisson_rv", 10, observed=5) bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0) betabinomial_rv = pm.BetaBinomial("betabinomial_rv", 0.1, 0.1, 10, observed=5) categorical_rv = pm.Categorical("categorical_rv", np.r_[0.5, 0.5], observed=1) multinomial_rv = pm.Multinomial("multinomial_rv", 5, np.r_[0.5, 0.5], observed=np.r_[2]) negbinomial_rv = pm.NegativeBinomial("negbinomial_rv", 10.2, 0.5, observed=5) # Convert to a Theano `FunctionGraph` fgraph = model_graph(model) rvs_by_name = { n.owner.inputs[1].name: n.owner.inputs[1] for n in fgraph.outputs } pymc_rv_names = {n.name for n in model.observed_RVs} assert all( isinstance(rvs_by_name[n].owner.op, RandomVariable) for n in pymc_rv_names) # Now, convert back to a PyMC3 model pymc_model = graph_model(fgraph) new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs} pymc_rv_names == new_pymc_rv_names
with pm.Model() as model: # Community's prior. community_prior = pm.HalfCauchy('community_diric', beta=1, shape=93) # Community distribution for this user community_weight = pm.Dirichlet('community_weight', a=community_prior, shape=93) # Action's prior. action_prior = pm.HalfCauchy('action_diric', beta=1, shape=3) # Action distribution for this user action_weight = pm.Dirichlet('action_weight', a=action_prior, shape=3) # Score Prior score_sd = pm.Exponential('score_sd', lam=1) # Score for this action score_numeral = pm.Lognormal('score_numeral', mu=data[:, 2].astype(float), sd=score_sd, shape=len(data)) # Numerize community and action community_numeral = tt.dot(community_matrix, community_weight) action_numeral = tt.dot(action_matrix, action_weight) # Draw coefficient of community, action, score and intercept community_coef = pm.Normal('community_coef', mu=0, sd=1) action_coef = pm.Normal('action_coef', mu=0, sd=1) score_coef = pm.Normal('score_coef', mu=0, sd=1) intercept = pm.Normal('intercept', mu=0, sd=1)
return std_series # %% data["Divorce_std"] = standardize(data["Divorce"]) data["Marriage_std"] = standardize(data["Marriage"]) data["MedianAgeMarriage_std"] = standardize(data["MedianAgeMarriage"]) # %% data["MedianAgeMarriage"].std() # %% with pm.Model() as m_5_1: a = pm.Normal("a", 0, 0.2) bA = pm.Normal("bA", 0, 0.5) sigma = pm.Exponential("sigma", 1) mu = pm.Deterministic("mu", a + bA * data["MedianAgeMarriage_std"]) divorce_rate_std = pm.Normal("divorce_rate_std", mu=mu, sigma=sigma, observed=data["Divorce_std"].values) prior_samples = pm.sample_prior_predictive() m_5_1_trace = pm.sample() # %% az.plot_trace(m_5_1_trace, var_names=["a", "bA"]) # %% fig, ax = plt.subplots()
# chain2 = trace2 # varnames1 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4'] # pm.traceplot(chain2, varnames1) # plt.show() # # # 画出自相关曲线 # pm.autocorrplot(chain2) # plt.show() # ====================================================================== # student分布有较好的效果,但是部分参数的收敛性不是很好 # 加了误差项 with pm.Model() as mulpartial_model: # define priors sigma = pm.HalfCauchy('sigma', 10) nu = pm.Exponential('nu', 1 / 10) mu_a = pm.Uniform('mu_a', -10, 10) sigma_a = pm.HalfNormal('sigma_a', sd=20) sigma_a1 = pm.HalfCauchy('sigma_a1', 10) beta = pm.Normal('beta', mu=mu_a, sd=sigma_a, shape=companiesABC) beta1 = pm.Normal('beta1', 0, 5) beta2 = pm.Normal('beta2', 0, 12) beta3 = pm.Normal('beta3', 0, 20) beta4 = pm.Normal('beta4', 0, sd=sigma_a1) # define likelihood 建立与时间相关的函数 theta = beta[ companyABC] + beta1 * elec_year1 + beta2 * elec_tem1 + beta3 * elec_RH1 + beta4 * elec_tem1 * elec_RH1 Observed = pm.StudentT("Observed", mu=theta,
def find_self_ref_increases_for_spec(refcounts_main, stats_table, expecrefs, provs_and_specs, specialty=None): ## takes referrals data with "dater", "self_ref", "ref_spec" and returns potential change increases in ## change points. if specialty == None: provs_and_specs = provs_and_specs else: provs_and_specs = provs_and_specs[provs_and_specs['ref_spec'].isin( specialty)] provs = list(set(provs_and_specs['ref_prov'])) length = len(provs) counter = 0 for spec in specialty: provs = list( set(provs_and_specs[provs_and_specs['ref_spec'] == spec] ['ref_prov'])) for idx, prov in enumerate(provs): counter += 1 print('{0:0.4f} complete'.format(counter / length)) ## assign lambdas and tau to stochastic variables refcounts = np.array( refcounts_main.loc[np.in1d(refcounts_main['ref_prov'], prov), 'self_ref']) n_refcounts = len(refcounts) with pm.Model() as model: alpha = 1.0 / refcounts.mean() # Recall count_data is the # variable that holds our txt counts lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_refcounts) ## create a combined function for lambda (it is still a RV) with model: idx = np.arange(n_refcounts) # Index lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2) ## combine the data with our proposed data generation scheme with model: observation = pm.Poisson("obs", lambda_, observed=refcounts) ## inference with model: step = pm.Metropolis() trace = pm.sample(25, tune=2500, step=step) lambda_1_samples = trace['lambda_1'] lambda_2_samples = trace['lambda_2'] tau_samples = trace['tau'] N = tau_samples.shape[0] expected_refs_per_week = np.zeros(n_refcounts) for week in range(0, n_refcounts): ix = week < tau_samples expected_refs_per_week[week] = ( lambda_1_samples[ix].sum() + lambda_2_samples[~ix].sum()) / N expecrefs[prov] = expected_refs_per_week stats_table.loc[prov, 'specialty'] = spec stats_table.loc[prov, 'tau_mean'] = np.mean(tau_samples) stats_table.loc[prov, 'tau_std'] = np.std(tau_samples) stats_table.loc[prov, 'mean1'] = np.mean(lambda_1_samples) stats_table.loc[prov, 'mean2'] = np.mean(lambda_2_samples) stats_table.loc[prov, 'mean_diff'] = st.ttest_ind( lambda_1_samples, lambda_2_samples)[1] return stats_table, expecrefs
import numpy as np from matplotlib import pyplot as plt import scipy.stats as stats import pymc3 as pm plt.figure(figsize=(8.5, 4.5)) with pm.Model() as model: parameter = pm.Exponential("poisson_param", 1) data_generator = pm.Poisson("data_generator", parameter) data_plus_one = data_generator + 1 print(parameter.tag.test_value) with pm.Model() as model: theta = pm.Exponential("theta", 2) data_generator = pm.Poisson("data_generator", theta) print(theta.tag.test_value) with pm.Model() as ab_testing: p_A = pm.Uniform("P(A)", 0, 1) p_B = pm.Uniform("P(B)", 0, 1) print(theta.random) print("parameter.tag.test_value =", parameter.tag.test_value) print("data_generator.tag.test_value =", data_generator.tag.test_value) print("data_plus_one.tag.test_value =", data_plus_one.tag.test_value) with pm.Model() as model:
with pm.Model() as model: # ------------------------------------------------------------------------- # Priors # ------------------------------------------------------------------------- beta = pm.Normal('beta', mu=0, sd=10) beta_day = pm.Normal('beta_day', mu=0, sd=10) # ------------------------------------------------------------------------- # Likelihood # ------------------------------------------------------------------------- loglamb_observed = beta + beta_day * day_within_period1 lamb_observed = np.exp(loglamb_observed) # Y_hat_observed = pm.Exponential('Y_hat_observed', lam = lamb_observed, observed = time_to_next_event[~censored]) Y_latent = pm.Exponential('Y_latent', lam=lamb_observed, shape=len(test_obs1), testval=test_obs1) Y_observed = pm.Potential( 'Y_observed', selfreport_mem(Y_latent, time_to_next_event1, windowmin1, windowmax1)) loglamb_censored = beta + beta_day * day_within_period[ censored] # Switched model to 1 parameter for both censored/uncensored (makes sense if final obs is "real") lamb_censored = np.exp(loglamb_censored) Y_hat_censored = pm.Potential( 'Y_hat_censored', exponential_log_complementary_cdf(x=time_to_next_event[censored], lam=lamb_censored)) #%% # Sample from posterior distribution
def exponential_beta(n=2): with pm.Model() as model: x = pm.Beta('x', 3, 1, shape=n, transform=None) y = pm.Exponential('y', 1, shape=n, transform=None) return model.test_point, model, None
# deaths total_deaths = deaths.groupby("day").sum() dt_d = total_deaths.index xx_d = np.arange(len(total_cases.index)) yy_d = total_deaths['deaths'] n_samples = 300 n_tune = 300 SEED = 1 N_COMPS = 2 N_DATA = len(xx) xx2 = np.stack([xx, xx]).T yy2 = np.stack([yy, yy]).T with pm.Model() as model: k = pm.TruncatedNormal('k', mu=2 * yy[-1], sigma=yy[-1], lower=0, shape=N) sigma = pm.Exponential('sigma', lam=1 / 1e5, shape=N) dt = pm.Normal('dt', mu=30, sd=10, shape=N) tm = pm.Uniform('tm', lower=xx[0], upper=xx[-1], shape=N) yhat = k * pm.math.invlogit(np.log(81) / dt * (xx2 - tm)) comps = pm.Normal.dist(mu=yhat, sigma=sigma, shape=(N, len(xx))) w = pm.Dirichlet('w', np.ones(N)) obs = pm.Mixture('obs', w=w, comp_dists=comps, observed=yy) trace = pm.sample(draws=n_samples, tune=n_tune, random_seed=SEED, cores=3) ALPHA = 0.05 params = np.vstack([trace['k'], trace['dt'], trace['tm']]) def plot_projection(ax, p=0.05, **kwargs): extended_xx = np.arange(len(xx) * 2)
# 50% 52.875000 # 75% 54.960000 # max 68.580000 # normal with pm.Model() as model_g: mu = pm.Uniform('mu', lower=40, upper=70) sigma = pm.HalfNormal('sigma', sd=10) y = pm.Normal('y', mu=mu, sd=sigma, observed=data) trace_g = pm.sample(1000) # students t with pm.Model() as model_t: mu = pm.Uniform('mu', 40, 70) sigma = pm.HalfNormal('sigma', sd=10) v = pm.Exponential('v', 1 / 30) y = pm.StudentT('y', mu=mu, sd=sigma, nu=v, observed=data) trace_t = pm.sample(1000) data2 = Series(data, copy=True) data2[48] = 65 data2[49] = 63 data2[50] = 69 data2.loc[data2 < 60].describe() data2.describe() # add some outliers with pm.Model() as model_g2: mu = pm.Uniform('mu', lower=40, upper=70) sigma = pm.HalfNormal('sigma', sd=10)
distri = stats.t(df) x_pdf = distri.pdf(x_values) plt.plot(x_values, x_pdf, label=fr'$\nu = {df}$', lw=3) x_pdf = stats.norm.pdf(x_values) plt.plot(x_values, x_pdf, 'k--', label=r'$\nu = \infty$') plt.xlabel('x') plt.yticks([]) plt.legend() plt.xlim(-5, 5) # %% with pm.Model() as model_t: μ = pm.Uniform('μ', 40, 75) σ = pm.HalfNormal('σ', sd = 10) ν = pm.Exponential('ν', 1/30) y = pm.StudentT('y', mu=μ, sd=σ, nu=ν, observed=data) trace_t = pm.sample(1000) az.plot_trace(trace_t) # %% az.summary(trace_t) # %% y_ppc_t = pm.sample_posterior_predictive( trace_t, 100, model_t, random_seed=123) y_pred_t = az.from_pymc3(trace=trace_t, posterior_predictive=y_ppc_t) az.plot_ppc(y_pred_t, figsize=(12, 6), mean=False) ax[0].legend(fontsize=15)
def model(): global data alpha_prior = 10. beta_prior = 0.1 alpha_init = np.ones((N_GROUPS,1)) noise_init = np.ones((N_GROUPS,1))*1e-2 parts_ones = np.ones((TOTAL_PARTS)) data_ones = np.ones(len(data[0])) hds = store_hds_old(paren_lst,filt) ns = np.sum(data, axis=1) m_ass = np.where(assignments == 0) k_ass = np.where(assignments == 1) t_ass = np.where(assignments==2) a_ass = np.where(assignments==3) n_monk = len(m_ass[0]) n_kid = len(k_ass[0]) n_tsim = len(t_ass[0]) n_adult = len(a_ass[0]) smooth = np.ones((TOTAL_PARTS,N_ALGS)) * beta_prior #bias in choice of starting parenthesis start_p = store_start_p(paren_lst, n=TOTAL_PARTS, lst = ["("]) start_np = 1 - start_p with pm.Model() as m: alpha = pm.Exponential('alpha', alpha_prior, shape=(N_GROUPS,1)) alpha = np.ones((N_GROUPS, 1)) * 10. beta = pm.Dirichlet('beta', np.ones((N_GROUPS, N_ALGS))*beta_prior, # testval=np.ones(N_ALGS), shape=(N_GROUPS,N_ALGS)) theta = pm.Dirichlet('theta', alpha[assignments] * beta[assignments], shape=(TOTAL_PARTS,N_ALGS)) #noise_pr_a = pm.Exponential('n_pr_a', 1.,shape=N_GROUPS) #noise_pr_b = pm.Exponential('n_pr_b', 1.,shape=N_GROUPS) #noise_pr_a = np.ones(N_GROUPS) * 10. #noise_pr_b = np.ones(N_GROUPS) * 10. noise = pm.Beta("noise", 1,2, shape=TOTAL_PARTS, testval=0.1) #noise = pm.Beta("noise", noise_pr_a[assignments],noise_pr_b[assignments], # shape=TOTAL_PARTS) # noise = pm.Beta("noise", 1,1, shape=N_GROUPS, testval=0.1) new_algs = map(lambda x: theta[x].dot(format_algs_theano(hds, noise[x])), np.arange(TOTAL_PARTS)) theta_resp = tt.concatenate([new_algs], axis=0) #theta_resp = theta.dot(algorithms) """ noise = pm.Beta("noise", 1,9, shape=N_GROUPS, testval=0.1) noise_alg = algorithms + noise[assignments] new_algs = format_algs_theano_bypart(hds, noise, total_parts=TOTAL_PARTS, n_algs=N_ALGS,max_hd=max_hd) theta_resp = theta.dot(new_algs) #theta_resp = theta.dot(algorithms) monkey_theta = theta[m_ass] kid_theta = theta[k_ass] tsim_theta = theta[t_ass] adult_theta = theta[a_ass] new_algs_monkey = format_algs_theano(hds, noise[0]) new_algs_kid = format_algs_theano(hds, noise[1]) new_algs_tsim = format_algs_theano(hds, noise[2]) new_algs_adult = format_algs_theano(hds, noise[3]) monkey_algs = monkey_theta.dot(new_algs_monkey) kid_algs = kid_theta.dot(new_algs_kid) tsim_algs = tsim_theta.dot(new_algs_tsim) adult_algs = adult_theta.dot(new_algs_adult) theta_resp = tt.concatenate([monkey_algs, kid_algs, tsim_algs, adult_algs], axis=0) """ bias = pm.Beta("bias", 1,1,shape=(TOTAL_PARTS,1)) biased_theta_resps = start_p * bias * theta_resp + start_np * (1.-bias) * theta_resp sum_norm = biased_theta_resps.sum(axis=1).reshape((TOTAL_PARTS,1)) biased_theta_resps = biased_theta_resps / sum_norm #biased_theta_resps = theta_resp pm.Multinomial('resp', n=ns, p = biased_theta_resps, shape=(TOTAL_PARTS, N_RESPS), observed=data) #db = Text('trace') trace = pm.sample(MCMC_STEPS, tune=BURNIN,target_accept=0.9, thin=MCMC_THIN) print_star("Model Finished!") if MCMC_CHAINS > 1: print pm.gelman_rubin(trace) summary = pm.df_summary(trace) which = 45 samp =100 return trace, summary
import pymc3 as pm from scipy.stats import poisson import seaborn as sns # Config os.chdir("/home/jovyan/work") %config InlineBackend.figure_format = 'retina' %matplotlib inline plt.rcParams["figure.figsize"] = (12, 3) # Preparation N = 1000 true_lams = [20, 50] true_tau = 300 data = np.hstack([ poisson(true_lams[0]).rvs(true_tau), poisson(true_lams[1]).rvs(N - true_tau), ]) # Modeling with pm.Model() as model: lam_1 = pm.Exponential("lam_1", data.mean()) lam_2 = pm.Exponential("lam_2", data.mean()) tau = pm.DiscreteUniform("tau", lower=0, upper=N-1) idx = np.arange(N) lam = pm.math.switch(tau > idx, lam_1, lam_2) female = pm.Poisson("target", lam, observed=data) step = pm.Metropolis() trace = pm.sample(20000, tune=5000, step=step, chains=10) pm.traceplot(trace[1000:], grid=True) plt.savefig("./results/3-15-c-inference.png")
def __init__(self, cell_state_mat: np.ndarray, X_data: np.ndarray, Y_data: np.ndarray, n_comb: int = 50, data_type: str = 'float32', n_iter=20000, learning_rate=0.005, total_grad_norm_constraint=200, verbose=True, var_names=None, var_names_read=None, obs_names=None, fact_names=None, sample_id=None, gene_level_prior={ 'mean': 1 / 2, 'sd': 1 / 4 }, gene_level_var_prior={'mean_var_ratio': 1}, cell_number_prior={ 'cells_per_spot': 8, 'factors_per_spot': 7, 'combs_per_spot': 2.5 }, cell_number_var_prior={ 'cells_mean_var_ratio': 1, 'factors_mean_var_ratio': 1, 'combs_mean_var_ratio': 1 }, phi_hyp_prior={ 'mean': 3, 'sd': 1 }, spot_fact_mean_var_ratio=0.5): ############# Initialise parameters ################ super().__init__(cell_state_mat, X_data, data_type, n_iter, learning_rate, total_grad_norm_constraint, verbose, var_names, var_names_read, obs_names, fact_names, sample_id) self.Y_data = Y_data self.n_npro = Y_data.shape[1] self.y_data = theano.shared(Y_data.astype(self.data_type)) self.n_rois = Y_data.shape[0] # Total number of gene counts in each region of interest, divided by 10^5: self.l_r = np.array([np.sum(X_data[i, :]) for i in range(self.n_rois) ]).reshape(self.n_rois, 1) * 10**(-5) for k in gene_level_var_prior.keys(): gene_level_prior[k] = gene_level_var_prior[k] self.gene_level_prior = gene_level_prior self.phi_hyp_prior = phi_hyp_prior self.n_comb = n_comb self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio cell_number_prior['factors_per_combs'] = ( cell_number_prior['factors_per_spot'] / cell_number_prior['combs_per_spot']) for k in cell_number_var_prior.keys(): cell_number_prior[k] = cell_number_var_prior[k] self.cell_number_prior = cell_number_prior ############# Define the model ################ self.model = pm.Model() with self.model: # ============================ Negative Probe Binding ===================== # # Negative probe counts scale linearly with the total number of counts in a region of interest. # The linear slope is drawn from a gamma distribution. Mean and variance are inferred from the data # and are the same for the non-specific binding term for gene probes further below. self.b_n_hyper = pm.Gamma('b_n_hyper', alpha=np.array((3, 1)), beta=np.array((1, 1)), shape=2) self.b_n = pm.Gamma('b_n', mu=self.b_n_hyper[0], sigma=self.b_n_hyper[1], shape=(1, self.n_npro)) self.y_rn = self.b_n * self.l_r # ===================== Non-specific binding additive component ======================= # # Additive term for non-specific binding of gene probes are drawn from a gamma distribution with # the same mean and variance as for negative probes above. self.gene_add = pm.Gamma('gene_add', mu=self.b_n_hyper[0], sigma=self.b_n_hyper[1], shape=(1, self.n_genes)) # =====================Gene expression level scaling======================= # # Explains difference in expression between genes and # how it differs in single cell and spatial technology # compute hyperparameters from mean and sd shape = gene_level_prior['mean']**2 / gene_level_prior['sd']**2 rate = gene_level_prior['mean'] / gene_level_prior['sd']**2 shape_var = shape / gene_level_prior['mean_var_ratio'] rate_var = rate / gene_level_prior['mean_var_ratio'] self.gene_level_alpha_hyp = pm.Gamma('gene_level_alpha_hyp', mu=shape, sigma=np.sqrt(shape_var), shape=(1, 1)) self.gene_level_beta_hyp = pm.Gamma('gene_level_beta_hyp', mu=rate, sigma=np.sqrt(rate_var), shape=(1, 1)) self.gene_level = pm.Gamma('gene_level', self.gene_level_alpha_hyp, self.gene_level_beta_hyp, shape=(self.n_genes, 1)) self.gene_factors = pm.Deterministic('gene_factors', self.cell_state) # =====================Spot factors======================= # # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured, # times heterogeniety in the total number of mRNA between individual cells with each cell type self.cells_per_spot = pm.Gamma('cells_per_spot', mu=cell_number_prior['cells_per_spot'], sigma=np.sqrt(cell_number_prior['cells_per_spot'] \ / cell_number_prior['cells_mean_var_ratio']), shape=(self.n_cells, 1)) self.comb_per_spot = pm.Gamma('combs_per_spot', mu=cell_number_prior['combs_per_spot'], sigma=np.sqrt(cell_number_prior['combs_per_spot'] \ / cell_number_prior['combs_mean_var_ratio']), shape=(self.n_cells, 1)) shape = self.comb_per_spot / np.array(self.n_comb).reshape((1, 1)) rate = tt.ones((1, 1)) / self.cells_per_spot * self.comb_per_spot self.combs_factors = pm.Gamma('combs_factors', alpha=shape, beta=rate, shape=(self.n_cells, self.n_comb)) self.factors_per_combs = pm.Gamma('factors_per_combs', mu=cell_number_prior['factors_per_combs'], sigma=np.sqrt(cell_number_prior['factors_per_combs'] \ / cell_number_prior['factors_mean_var_ratio']), shape=(self.n_comb, 1)) c2f_shape = self.factors_per_combs / np.array(self.n_fact).reshape( (1, 1)) self.comb2fact = pm.Gamma('comb2fact', alpha=c2f_shape, beta=self.factors_per_combs, shape=(self.n_comb, self.n_fact)) self.spot_factors = pm.Gamma('spot_factors', mu=pm.math.dot(self.combs_factors, self.comb2fact), sigma=pm.math.sqrt(pm.math.dot(self.combs_factors, self.comb2fact) \ / self.spot_fact_mean_var_ratio), shape=(self.n_cells, self.n_fact)) # =====================Spot-specific additive component======================= # # molecule contribution that cannot be explained by cell state signatures # these counts are distributed between all genes not just expressed genes self.spot_add_hyp = pm.Gamma('spot_add_hyp', 1, 1, shape=2) self.spot_add = pm.Gamma('spot_add', self.spot_add_hyp[0], self.spot_add_hyp[1], shape=(self.n_cells, 1)) # =====================Gene-specific overdispersion ======================= # self.phi_hyp = pm.Gamma('phi_hyp', mu=phi_hyp_prior['mean'], sigma=phi_hyp_prior['sd'], shape=(1, 1)) self.gene_E = pm.Exponential('gene_E', self.phi_hyp, shape=(self.n_genes, 1)) # =====================Expected expression ======================= # # Expected counts for negative probes and gene probes concatenated into one array. Note that non-specific binding # scales linearly with the total number of counts (l_r) in this model. self.mu_biol = tt.concatenate([self.y_rn, pm.math.dot(self.spot_factors, self.gene_factors.T) * self.gene_level.T \ + self.gene_add * self.l_r + self.spot_add], axis = 1) # =====================DATA likelihood ======================= # # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson self.data_target = pm.NegativeBinomial( 'data_target', mu=self.mu_biol, alpha=tt.concatenate([ np.repeat(10**10, self.n_npro).reshape(1, self.n_npro), 1 / (self.gene_E.T * self.gene_E.T) ], axis=1), observed=tt.concatenate([self.y_data, self.x_data], axis=1)) # =====================Compute nUMI from each factor in spots ======================= # self.nUMI_factors = pm.Deterministic( 'nUMI_factors', (self.spot_factors * (self.gene_factors * self.gene_level).sum(0)))
from IPython.core.pylabtools import figsize import numpy as np import pymc3 as pm import theano.tensor as tt import matplotlib.pyplot as plt count_data = np.loadtxt("demos/bayesian-programming/014_poisson.csv") n_count_data = len(count_data) with pm.Model() as model: alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1) with model: idx = np.arange(n_count_data) # Index lambda_ = pm.math.switch(tau > idx, lambda_1, lambda_2) with model: observation = pm.Poisson("obs", lambda_, observed=count_data) with model: step = pm.Metropolis() trace = pm.sample(10000, tune=5000, step=step) lambda_1_samples = trace['lambda_1'] lambda_2_samples = trace['lambda_2'] tau_samples = trace['tau']
def build(self): """ Builds and returns the Generative model. Also sets self.model """ p_delay = get_delay_distribution(incubation=self.delay) nonzero_days = self.observed.total.gt(0) len_observed = len(self.observed) convolution_ready_gt = self._get_convolution_ready_gt(len_observed) x = np.arange(len_observed)[:, None] coords = { "date": self.observed.index.values, "nonzero_date": self.observed.index.values[self.observed.total.gt(0)], } with pm.Model(coords=coords) as self.model: # Let log_r_t walk randomly with a fixed prior of ~0.035. Think # of this number as how quickly r_t can react. log_r_t = pm.GaussianRandomWalk("log_r_t", sigma=0.035, dims=["date"]) r_t = pm.Deterministic("r_t", pm.math.exp(log_r_t), dims=["date"]) # For a given seed population and R_t curve, we calculate the # implied infection curve by simulating an outbreak. While this may # look daunting, it's simply a way to recreate the outbreak # simulation math inside the model: # https://staff.math.su.se/hoehle/blog/2020/04/15/effectiveR0.html seed = pm.Exponential("seed", 1 / 0.02) y0 = tt.zeros(len_observed) y0 = tt.set_subtensor(y0[0], seed) outputs, _ = theano.scan( fn=lambda t, gt, y, r_t: tt.set_subtensor( y[t], tt.sum(r_t * y * gt)), sequences=[tt.arange(1, len_observed), convolution_ready_gt], outputs_info=y0, non_sequences=r_t, n_steps=len_observed - 1, ) infections = pm.Deterministic("infections", outputs[-1], dims=["date"]) # Convolve infections to confirmed positive reports based on a known # p_delay distribution. See patients.py for details on how we calculate # this distribution. test_adjusted_positive = pm.Deterministic( "test_adjusted_positive", conv2d( tt.reshape(infections, (1, len_observed)), tt.reshape(p_delay, (1, len(p_delay))), border_mode="full", )[0, :len_observed], dims=["date"]) # Picking an exposure with a prior that exposure never goes below # 0.1 * max_tests. The 0.1 only affects early values of Rt when # testing was minimal or when data errors cause underreporting # of tests. tests = pm.Data("tests", self.observed.total.values, dims=["date"]) exposure = pm.Deterministic("exposure", pm.math.clip( tests, self.observed.total.max() * 0.1, 1e9), dims=["date"]) # Test-volume adjust reported cases based on an assumed exposure # Note: this is similar to the exposure parameter in a Poisson # regression. positive = pm.Deterministic("positive", exposure * test_adjusted_positive, dims=["date"]) # Save data as part of trace so we can access in inference_data observed_positive = pm.Data("observed_positive", self.observed.positive.values, dims=["date"]) nonzero_observed_positive = pm.Data( "nonzero_observed_positive", self.observed.positive[nonzero_days.values].values, dims=["nonzero_date"]) positive_nonzero = pm.NegativeBinomial( "nonzero_positive", mu=positive[nonzero_days.values], alpha=pm.Gamma("alpha", mu=6, sigma=1), observed=nonzero_observed_positive, dims=["nonzero_date"]) return self.model
def generate_priors(self): """Set up the priors for the model.""" with self.model: if "sigma" not in self.priors: self.priors["sigma"] = pm.HalfCauchy("sigma_%s" % self.name, 10, testval=1.0) if "seasonality" not in self.priors and self.seasonality: self.priors["seasonality"] = pm.Laplace( "seasonality_%s" % self.name, 0, self.seasonality_prior_scale, shape=len(self.seasonality), ) if "holidays" not in self.priors and self.holidays: self.priors["holidays"] = pm.Laplace( "holidays_%s" % self.name, 0, self.holidays_prior_scale, shape=len(self.holidays), ) if "regressors" not in self.priors and self.regressors: if self.positive_regressors_coefficients: self.priors["regressors"] = pm.Exponential( "regressors_%s" % self.name, self.regressors_prior_scale, shape=len(self.regressors), ) else: self.priors["regressors"] = pm.Laplace( "regressors_%s" % self.name, 0, self.regressors_prior_scale, shape=len(self.regressors), ) if self.growth and "growth" not in self.priors: self.priors["growth"] = pm.Normal("growth_%s" % self.name, 0, 0.1) if (len(self.changepoints) and "changepoints" not in self.priors and len(self.changepoints)): if self.auto_changepoints: k = self.n_changepoints alpha = pm.Gamma("alpha", 1.0, 1.0) beta = pm.Beta("beta", 1.0, alpha, shape=k) w1 = pm.Deterministic( "w1", tt.concatenate( [[1], tt.extra_ops.cumprod(1 - beta)[:-1]]) * beta, ) w, _ = theano.map( fn=lambda x: tt.switch(tt.gt(x, 1e-4), x, 0), sequences=[w1]) self.w = pm.Deterministic("w", w) else: k = len(self.changepoints) w = 1 cgpt = pm.Deterministic( "cgpt", pm.Laplace("cgpt_inner", 0, self.changepoints_prior_scale, shape=k) * w, ) self.priors["changepoints"] = pm.Deterministic( "changepoints_%s" % self.name, cgpt) if self.intercept and "intercept" not in self.priors: self.priors["intercept"] = pm.Normal( "intercept_%s" % self.name, self.data["y"].mean(), self.data["y"].std() * 2, ) self.priors_names = {k: v.name for k, v in self.priors.items()}
help='Toggle to print summary of trace') p.add_argument('--samples', type=int, default=15000, help='Number of sampling iterations') args = p.parse_args() # Get the dataset # Format: # 10 rows of <T_{i}, X_{i}> pairs dataset = np.genfromtxt('dataset.txt', delimiter=' ') # Create the model pumps_mcmc_model = pymc3.Model() with pumps_mcmc_model: alpha = pymc3.Exponential('alpha', 1.0) beta = pymc3.Gamma('beta', 0.1, 1.0) for i in range(dataset.shape[0]): theta = pymc3.Gamma('theta{}'.format(i), alpha, beta) lambd = pymc3.Deterministic('lambda{}'.format(i), theta * dataset[i, 0]) x = pymc3.Poisson('x{}'.format(i), lambd, observed=dataset[i, 1]) # Perform Metropolis-Hastings algorithm step # and print the trace of variables with pumps_mcmc_model: step = pymc3.Metropolis(proposal_dist=getattr( pymc3.step_methods.metropolis, args.proposal_dist)) trace = pymc3.sample(args.samples, step=step) if args.print_summary: print(pymc3.summary(trace))
from pymc3.distributions.timeseries import GaussianRandomWalk from scipy import optimize import pandas as pd # load data returns = pd.read_csv( 'https://raw.githubusercontent.com/pymc-devs/pymc3/master/pymc3/examples/data/SP500.csv', index_col='date')['change'] ## data exploration #fig, ax = plt.subplots(figsize=(14, 8)) #returns.plot(label='S&P500') #ax.set(xlabel='time', ylabel='returns') #ax.legend(); with pm.Model() as model: step_size = pm.Exponential('step_size', 50.) s = GaussianRandomWalk('s', sd=step_size, shape=len(returns)) nu = pm.Exponential('nu', .1) r = pm.StudentT('r', nu=nu, lam=pm.math.exp(-2 * s), observed=returns) with model: trace = pm.sample(2000, cores=1, target_accep=0.9) with model: pm.traceplot(trace, varnames=['step_size', 'nu']) fig, ax = plt.subplots() plt.plot(trace['s'].T, 'b', alpha=.03) ax.set(title=str(s), xlabel='time', ylabel='log volatility')
def sim_prior_lin(model, x, vars=['α', 'β']): prior_ = pm.sample_prior_predictive(vars=vars, model=model) prior_array = pd.DataFrame(prior_).to_numpy() prior_mu = prior_array.dot(x) return prior_mu def link(sim, x): sim_array = pd.DataFrame(sim).to_numpy() return sim_array.dot(x) with pm.Model() as m1: α = pm.Normal('α', 1, 1) β = pm.Normal('β', 0, 1) σ = pm.Exponential('σ', 1) μi = α + β * (dA1.rugged_s.values - rbar) log_yi = pm.Normal('log_yi', μi, σ, observed=dA1.log_gdp_s) rugged_seq = np.linspace(-0.1, 1.1, num=30).reshape(-1, 1) rugged_seq.T.shape x = np.r_[np.ones((1,30)), rugged_seq.T] m1_μ = sim_prior_lin(m1, x, ) m1_μ.shape with pm.Model() as m1i: α = pm.Normal('α', 1, 0.1) β = pm.Normal('β', 0, 0.3) σ = pm.Exponential('σ', 1) μi = α + β * (dA1.rugged_s.values - rbar)
def __init__(self, y1, y2): self.y1 = y1 = np.array(y1) self.y2 = y2 = np.array(y2) assert y1.ndim == 1 assert y2.ndim == 1 y_all = np.concatenate((y1, y2)) self.mu_loc = mu_loc = np.mean(y_all) self.mu_scale = mu_scale = np.std(y_all) * 1000 self.sigma_low = sigma_low = np.std(y_all) / 1000 self.sigma_high = sigma_high = np.std(y_all) * 1000 self.nu_min = nu_min = 2.5 self.nu_mean = nu_mean = 30 self._nu_param = nu_mean - nu_min with pm.Model() as self._model: # Note: the IDE might give a warning for these because it thinks # distributions like pm.Normal() don't have a string "name" argument, # but this is false – pm.Distribution redefined __new__, so the # first argument indeed is the name (a string). group1_mean = pm.Normal('Group 1 mean', mu=mu_loc, sd=mu_scale) group2_mean = pm.Normal('Group 2 mean', mu=mu_loc, sd=mu_scale) nu = pm.Exponential('nu - %g' % nu_min, 1 / (nu_mean - nu_min)) + nu_min _ = pm.Deterministic('Normality', nu) group1_logsigma = pm.Uniform('Group 1 log sigma', lower=np.log(sigma_low), upper=np.log(sigma_high)) group2_logsigma = pm.Uniform('Group 2 log sigma', lower=np.log(sigma_low), upper=np.log(sigma_high)) group1_sigma = pm.Deterministic('Group 1 sigma', np.exp(group1_logsigma)) group2_sigma = pm.Deterministic('Group 2 sigma', np.exp(group2_logsigma)) lambda1 = group1_sigma**(-2) lambda2 = group2_sigma**(-2) group1_sd = pm.Deterministic('Group 1 SD', group1_sigma * (nu / (nu - 2))**0.5) group2_sd = pm.Deterministic('Group 2 SD', group2_sigma * (nu / (nu - 2))**0.5) _ = pm.StudentT('Group 1 data', observed=y1, nu=nu, mu=group1_mean, lam=lambda1) _ = pm.StudentT('Group 2 data', observed=y2, nu=nu, mu=group2_mean, lam=lambda2) diff_of_means = pm.Deterministic('Difference of means', group1_mean - group2_mean) _ = pm.Deterministic('Difference of SDs', group1_sd - group2_sd) _ = pm.Deterministic( 'Effect size', diff_of_means / np.sqrt( (group1_sd**2 + group2_sd**2) / 2))
def __init__( self, cell_state_mat: np.ndarray, X_data: np.ndarray, n_comb: int = 50, data_type: str = 'float32', n_iter=20000, learning_rate=0.005, total_grad_norm_constraint=200, verbose=True, var_names=None, var_names_read=None, obs_names=None, fact_names=None, sample_id=None, gene_level_prior={'mean': 1 / 2, 'sd': 1 / 4}, gene_level_var_prior={'mean_var_ratio': 1}, cell_number_prior={'cells_per_spot': 8, 'factors_per_spot': 7, 'combs_per_spot': 2.5}, cell_number_var_prior={'cells_mean_var_ratio': 1, 'factors_mean_var_ratio': 1, 'combs_mean_var_ratio': 1}, phi_hyp_prior={'mean': 3, 'sd': 1}, spot_fact_mean_var_ratio=0.5 ): ############# Initialise parameters ################ super().__init__(cell_state_mat, X_data, data_type, n_iter, learning_rate, total_grad_norm_constraint, verbose, var_names, var_names_read, obs_names, fact_names, sample_id) for k in gene_level_var_prior.keys(): gene_level_prior[k] = gene_level_var_prior[k] self.gene_level_prior = gene_level_prior self.phi_hyp_prior = phi_hyp_prior self.n_comb = n_comb self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio cell_number_prior['factors_per_combs'] = (cell_number_prior['factors_per_spot'] / cell_number_prior['combs_per_spot']) for k in cell_number_var_prior.keys(): cell_number_prior[k] = cell_number_var_prior[k] self.cell_number_prior = cell_number_prior ############# Define the model ################ self.model = pm.Model() with self.model: # =====================Gene expression level scaling======================= # # Explains difference in expression between genes and # how it differs in single cell and spatial technology # compute hyperparameters from mean and sd shape = gene_level_prior['mean'] ** 2 / gene_level_prior['sd'] ** 2 rate = gene_level_prior['mean'] / gene_level_prior['sd'] ** 2 shape_var = shape / gene_level_prior['mean_var_ratio'] rate_var = rate / gene_level_prior['mean_var_ratio'] self.gene_level_alpha_hyp = pm.Gamma('gene_level_alpha_hyp', mu=shape, sigma=np.sqrt(shape_var), shape=(1, 1)) self.gene_level_beta_hyp = pm.Gamma('gene_level_beta_hyp', mu=rate, sigma=np.sqrt(rate_var), shape=(1, 1)) self.gene_level = pm.Gamma('gene_level', self.gene_level_alpha_hyp, self.gene_level_beta_hyp, shape=(self.n_genes, 1)) # scale cell state factors by gene_level self.gene_factors = pm.Deterministic('gene_factors', self.cell_state) # tt.printing.Print('gene_factors sum')(gene_factors.sum(0).shape) # tt.printing.Print('gene_factors sum')(gene_factors.sum(0)) # =====================Spot factors======================= # # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured, # times heterogeniety in the total number of mRNA between individual cells with each cell type self.cells_per_spot = pm.Gamma('cells_per_spot', mu=cell_number_prior['cells_per_spot'], sigma=np.sqrt(cell_number_prior['cells_per_spot'] \ / cell_number_prior['cells_mean_var_ratio']), shape=(self.n_cells, 1)) self.comb_per_spot = pm.Gamma('combs_per_spot', mu=cell_number_prior['combs_per_spot'], sigma=np.sqrt(cell_number_prior['combs_per_spot'] \ / cell_number_prior['combs_mean_var_ratio']), shape=(self.n_cells, 1)) shape = self.comb_per_spot / np.array(self.n_comb).reshape((1, 1)) rate = tt.ones((1, 1)) / self.cells_per_spot * self.comb_per_spot self.combs_factors = pm.Gamma('combs_factors', alpha=shape, beta=rate, shape=(self.n_cells, self.n_comb)) self.factors_per_combs = pm.Gamma('factors_per_combs', mu=cell_number_prior['factors_per_combs'], sigma=np.sqrt(cell_number_prior['factors_per_combs'] \ / cell_number_prior['factors_mean_var_ratio']), shape=(self.n_comb, 1)) c2f_shape = self.factors_per_combs / np.array(self.n_fact).reshape((1, 1)) self.comb2fact = pm.Gamma('comb2fact', alpha=c2f_shape, beta=self.factors_per_combs, shape=(self.n_comb, self.n_fact)) self.spot_factors = pm.Gamma('spot_factors', mu=pm.math.dot(self.combs_factors, self.comb2fact), sigma=pm.math.sqrt(pm.math.dot(self.combs_factors, self.comb2fact) \ / self.spot_fact_mean_var_ratio), shape=(self.n_cells, self.n_fact)) # =====================Spot-specific additive component======================= # # molecule contribution that cannot be explained by cell state signatures # these counts are distributed between all genes not just expressed genes self.spot_add_hyp = pm.Gamma('spot_add_hyp', 1, 1, shape=2) self.spot_add = pm.Gamma('spot_add', self.spot_add_hyp[0], self.spot_add_hyp[1], shape=(self.n_cells, 1)) # =====================Gene-specific additive component ======================= # # per gene molecule contribution that cannot be explained by cell state signatures # these counts are distributed equally between all spots (e.g. background, free-floating RNA) self.gene_add_hyp = pm.Gamma('gene_add_hyp', 1, 1, shape=2) self.gene_add = pm.Gamma('gene_add', self.gene_add_hyp[0], self.gene_add_hyp[1], shape=(self.n_genes, 1)) # =====================Gene-specific overdispersion ======================= # self.phi_hyp = pm.Gamma('phi_hyp', mu=phi_hyp_prior['mean'], sigma=phi_hyp_prior['sd'], shape=(1, 1)) self.gene_E = pm.Exponential('gene_E', self.phi_hyp, shape=(self.n_genes, 1)) # =====================Expected expression ======================= # # expected expression self.mu_biol = pm.math.dot(self.spot_factors, self.gene_factors.T) * self.gene_level.T \ + self.gene_add.T + self.spot_add # tt.printing.Print('mu_biol')(self.mu_biol.shape) # =====================DATA likelihood ======================= # # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson self.data_target = pm.NegativeBinomial('data_target', mu=self.mu_biol, alpha=1 / (self.gene_E.T * self.gene_E.T), observed=self.x_data, total_size=self.X_data.shape) # =====================Compute nUMI from each factor in spots ======================= # self.nUMI_factors = pm.Deterministic('nUMI_factors', (self.spot_factors * (self.gene_factors * self.gene_level).sum(0)))
ax.set_ylabel("y") ax.set_title("The third Ascombe's quartet") plt.show() # center the x data x = x - x.mean() # ----------------------- specify a probabilistic model for the data ----------------------- # with pm.Model() as model_t: # set the prior over the intercept and the coefficient alpha = pm.Normal("alpha", mu=y.mean(), sd=1) beta = pm.Normal("beta", mu=0, sd=1) # set the prior over the errors variance sigma = pm.HalfNormal("sigma", 5) # set the prior on the degrees of freedom vu_ = pm.Exponential("vu_", 1 / 29) vu = pm.Deterministic("vu", vu_ + 1) # get the likelihood on the data obs = pm.StudentT("obs", mu=alpha + beta * x, sigma=sigma, nu=vu, observed=y) # inference step trace = pm.sample(2000) # _------------------ compare the result of a simple linear regression (which assumes gaussian errors) and the robust linear regression ------------------ # # get the coefficient and intercept from a scipy linear regression beta_c, alpha_c = ss.linregress(x, y)[:2] # plot the non robust linear regression