def level_model(y_old, y_new): # PyMC3 level changepoint model # level is modeled by Poisson RVs # y_old: older data points since the last changepoint # y_new: last win(10) datapoints mean_new = y_new.mean() if len(y_new) > 0 else None mean_old = y_old.mean() if len(y_old) > 0 else mean_new y_ = np.concatenate((y_old, y_new)) y_obs = theano.shared(y_) with pm.Model() as model: w = pm.Dirichlet('w', a=np.ones(2)) lambda_ = pm.Exponential('lambda', lam=np.array([1.0 / mean_old, 1.0 / mean_new]), shape=(2,)) components = pm.Poisson.dist(mu=lambda_, shape=(2, )) diff = pm.Deterministic('diff', lambda_[0] - lambda_[1]) obs = pm.Mixture('obs', w=w, comp_dists=components, observed=y_obs) return model
def fit_logreturns(self, data): def likelihood(x): def _normal(x, sigma): # assumes a mu of 0 return pm.Normal.dist(mu=0., sd=sigma).logp(x) nu_t = pm.math.dot(rhos, x[1:]) err = tt.reshape(x[0] - nu_t, [-1]) logps = (w[0] * pm.math.exp(_normal(err, pm.math.exp(s)))) + (w[1] * pm.math.exp(_normal(x[0], float(1e-100)))) return pm.math.log(logps) with pm.Model() as self.model: W = np.array([1., 1.]) w = pm.Dirichlet('w', W) intercept = pm.Normal('intercept', mu=-5, sd=5., testval=-5.) theta = pm.Uniform('theta', lower=0.001, upper=1.) sigma = pm.Uniform('sigma', lower=0.001, upper=10.) rhos = pm.Uniform('rhos', lower=-1., upper=1., shape=self.num_lags) sde = lambda x, theta, mu: (theta * (mu-x), sigma) s = ts.EulerMaruyama('path', 1.0, sde, [theta, intercept], shape=len(data) - self.num_lags, testval=np.ones_like(data[self.num_lags:])) lagged_data = self._lags(data) pm.DensityDist('obs', likelihood, observed=lagged_data) self.trace = pm.sample(3000, tune=3000, nuts_kwargs=dict(target_accept=0.95)) pm.traceplot(self.trace, varnames=[w, intercept, rhos, theta, sigma]) self.estimated_rhos = np.mean(self.trace['rhos'], axis=0) self.estimated_w = np.mean(self.trace['w'], axis=0) self.estimated_intercept = np.mean(self.trace['intercept'], axis=0) self.estimated_theta = np.mean(self.trace['theta'], axis=0) self.estimated_sigma = np.mean(self.trace['sigma'], axis=0) self.data = data
def run_regional_model(data, progressbar=False, db_file=None, burn=2000, samp=5000): # Setup masks r_dum = data.Region.str.get_dummies() regs = r_dum.columns r_mtx = r_dum.as_matrix() num_reg = r_mtx.shape[1] heads = [{'name': 'Region', 'values': regs.tolist()}] with pm.Model() as model: b0_mu = pm.Normal('b0_mu', mu=4, sd=3) sigma = pm.Uniform('sigma', lower=0.7, upper=70) thresh = pm.Dirichlet('thresh', a=np.ones(5)) mu_reg = pm.Normal('mu_reg', mu=0, sd=3, shape=num_reg) reg_mu = b0_mu + mu_reg reg_range = tt.arange(num_reg) cat_ps, update = theano.scan( fn=lambda r_i: compute_ps(thresh, reg_mu[r_i], sigma), sequences=[reg_range]) reg_ps = pm.Deterministic('reg_ps', cat_ps) nat_ps = pm.Deterministic('nat_ps', compute_ps(thresh, b0_mu, sigma)) cat_r = theano.dot(r_mtx, cat_ps) resp = data.response - 1 results = pm.Categorical('results', p=cat_r, observed=resp) with model: db = None if db_file is not None: db = pm.backends.Text(db_file) step = pm.Metropolis() burn = pm.sample(burn, step=step, progressbar=progressbar) trace = pm.sample(samp, step=step, start=burn[-1], progressbar=progressbar, trace=db) return {'heads': heads, 'trace': trace}
def build_model(data, K): N = data.shape[0] d = data.shape[1] print('Building model with n=%d, d=%d, k=%d' % (N, d, K)) with pm.Model() as gmm: #Prior over component weights if K > 1: p = pm.Dirichlet('p', a=np.array([1.] * K)) #Prior over component means mus = [ pm.MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(d)), tau=pm.floatX(0.1 * np.eye(d)), shape=(d, )) #testval = pm.floatX(np.ones(d))) for i in range(K) ] #Cholesky decomposed LKJ prior over component covariance matrices packed_L = [ pm.LKJCholeskyCov('packed_L_%d' % i, n=d, eta=2., sd_dist=pm.HalfCauchy.dist(1)) #testval = pm.floatX(np.ones(int(d*(d-1)/2+d)))) for i in range(K) ] #Unpack packed_L into full array L = [pm.expand_packed_triangular(d, packed_L[i]) for i in range(K)] #Convert L to sigma and tau for convenience sigma = [ pm.Deterministic('sigma_%d' % i, L[i].dot(L[i].T)) for i in range(K) ] tau = [ pm.Deterministic('tau_%d' % i, matrix_inverse(sigma[i])) for i in range(K) ] #Specify the likelihood if K > 1: mvnl = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)] Y_obs = pm.Mixture('Y_obs', w=p, comp_dists=mvnl, observed=data) else: Y_obs = pm.MvNormal('Y_obs', mu=mus[0], chol=L[0], observed=data) return gmm
def test_categorical(): k = 3 ndata = 5000 v = np.random.randint(0, k, ndata) with pymc3.Model() as model: p = pymc3.Dirichlet(name='p', a=np.array([1., 1., 1.]), shape=k) category = pymc3.Categorical(name='category', p=p, shape=ndata, observed=v) step = pymc3.Metropolis(vars=[p, category]) trace = pymc3.sample(3000, step=step) pymc3.traceplot(trace) plt.show()
def dice_toss(): n = 100 h = np.array([6,6,6,6,6,6]) h=h/sum(h) a=np.ones((6)) niter = 1000 with pm.Model() as model: # context management # define priors p = pm.Dirichlet('p',a=a) # define likelihood y= pm.Categorical('y',p=p,observed=h) # inference trace = pm.sample(niter, progressbar=True) pm.plots.traceplot(trace) plt.show()
def run_non_sparse_initialization(self): rat = self.allelic_counts/self.total_counts nans = np.isnan(rat) # Run bb-mf with pm.Model() as bb_glm: CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init) ALPHA = pm.HalfCauchy('ALPHA', beta=5, shape=(1, self.S)) BETA = pm.Normal('BETA', mu=0, tau=(1.0/10.0), shape=(self.S, self.num_cov), testval=self.beta_init) U = pm.Normal('U', mu=0, tau=(1.0/1.0), shape=(self.N, self.K), testval=self.U_init) V = pm.Normal('V', mu=0, tau=(1.0/1.0), shape=(self.S, self.K), testval=self.V_init) MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init) SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init) mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A) sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A) A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init) p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:]) conc_mat = pm.math.dot(np.ones((self.N,1)), CONC) w = pm.Dirichlet('w', a=np.ones((self.S,2))) beta_null_mat = pm.math.dot(np.ones((self.N,1)), ALPHA) BB_GLM = pm.BetaBinomial.dist(alpha=(p*conc_mat), beta=((1.0-p)*conc_mat), n=self.total_counts) BB_NULL = pm.BetaBinomial.dist(alpha=(np.ones((self.N,self.S))), beta=7.0+beta_null_mat, n=self.total_counts) mixmod = pm.Mixture('mixmodel', w=w, comp_dists=[BB_GLM, BB_NULL], observed=self.allelic_counts, shape=2) approx = pm.fit(method='advi', n=2000) #pickle.dump(approx, open(self.output_root + '_model', 'wb')) #approx = pickle.load( open(self.output_root + '_model', "rb" ) ) means_dict = approx.bij.rmap(approx.params[0].eval()) y = means_dict['w_stickbreaking__'].T y = np.concatenate([y, -np.sum(y, 0, keepdims=True)]) e_y = np.exp(y - np.max(y, 0, keepdims=True)) w_learned = e_y / np.sum(e_y, 0, keepdims=True) self.conc_init = np.exp(means_dict['CONC_log__']) self.alpha_init = np.exp(means_dict['ALPHA_log__']) self.beta_init = means_dict['BETA'] self.U_init = means_dict['U'] self.V_init = means_dict['V'] self.mu_a_init = means_dict['MU_A'] self.sigma_a_init = np.exp(means_dict['SIGMA_A_log__']) self.A_init = means_dict['A'] self.w_init = w_learned.T
def laser_late_trials(data, num_emissions): # Make the pymc3 model with pm.Model() as model: # Dirichlet prior on the emission/spiking probabilities - 4 states p = pm.Dirichlet('p', np.ones(num_emissions), shape=(4, num_emissions)) # Discrete Uniform switch times # Switch from detection to identity firing t1 = pm.DiscreteUniform('t1', lower=20, upper=60) # Switch from identity to palatability firing t2 = pm.DiscreteUniform('t2', lower=t1 + 20, upper=120) # Switch from palatability firing to end t3 = pm.DiscreteUniform('t3', lower=t2 + 30, upper=150) # Add potentials to keep the switch times from coming too close to each other #t_pot1 = pm.Potential('t_pot1', tt.switch(t2 - t1 >= 20, 0, -np.inf)) #t_pot2 = pm.Potential('t_pot2', tt.switch(t3 - t2 >= 20, 0, -np.inf)) #t_pot3 = pm.Potential('t_pot3', tt.switch(t3 - t1 >= 40, 0, -np.inf)) # Get the actual state numbers based on the switch times states1 = tt.switch(t1 >= np.arange(150), 0, 1) states2 = tt.switch(t2 >= np.arange(150), states1, 2) states = tt.switch(t3 >= np.arange(150), states2, 3) # Categorical observations obs = pm.Categorical('obs', p=p[states], observed=np.append(data[:140], data[190:])) # Inference button :D with model: tr = pm.sample(300000, init=None, step=pm.Metropolis(), njobs=2, start={ 't1': 25, 't2': 75, 't3': 125 }, progressbar=False) # Return the inference! return model, tr[250000:]
def run_national_model(data): with pm.Model() as model: mu = pm.Normal('mu', mu=4, sd=3) sigma = pm.Uniform('sigma', lower=0.7, upper=70) thresh = pm.Dirichlet('thresh', a=np.ones(5)) cat_p = compute_ps(thresh, mu, sigma) resp = data.response - 1 results = pm.Categorical('results', p=cat_p, observed=resp) with model: step = pm.Metropolis() burn = pm.sample(2000, step=step) trace = pm.sample(5000, step=step, start=burn[-1]) return trace
def test_multivariate2(self): # Added test for issue #3271 mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10) with pm.Model() as dm_model: probs = pm.Dirichlet("probs", a=np.ones(6)) obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data) burned_trace = pm.sample(20, tune=10, cores=1, return_inferencedata=False, compute_convergence_checks=False) sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model) sim_ppc = pm.sample_posterior_predictive(burned_trace, samples=20, model=dm_model) assert sim_priors["probs"].shape == (20, 6) assert sim_priors["obs"].shape == (20, ) + mn_data.shape assert sim_ppc["obs"].shape == (20, ) + mn_data.shape
def getAngelRate(data, n_sample=10000, n_chain=3, ax=None): # データの整理 data_0 = data.query('campaign != 1') data_1 = data.query('campaign == 1') d = np.array([[ sum(data_0['angel'] == 0), sum(data_0['angel'] == 1), sum(data_0['angel'] == 2) ], [ sum(data_1['angel'] == 0), sum(data_1['angel'] == 1), sum(data_1['angel'] == 2) ]]) weight = np.array([[1.0, 1.0, 1.0], [1.0, 0.0, 2.0]]) # パラメータ推定 with pm.Model() as model: alpha = [1., 1., 1.] # hyper-parameter of DirichletDist. pi = pm.Dirichlet('pi', a=np.array(alpha)) for i in np.arange(d.shape[0]): piw = pi * weight[i] m = pm.Multinomial('m_%s' % (i), n=np.sum(d[i]), p=piw, observed=d[i]) trace = pm.sample(n_sample, chains=n_chain) np.savetxt('trace_pi.csv', trace['pi'], delimiter=',') # Silver hpd_l, hpd_u = pm.hpd(trace['pi'][:, 1]) print('Silver : 95% HPD : {}-{}'.format(hpd_l, hpd_u)) print('Silver ExpectedValue : {}'.format(trace['pi'][:, 1].mean())) # Gold hpd_l, hpd_u = pm.hpd(trace['pi'][:, 2]) print('Gold : 95% HPD : {}-{}'.format(hpd_l, hpd_u)) print('Gold ExpectedValue : {}'.format(trace['pi'][:, 2].mean())) # save fig if ax is not None: pm.plot_posterior(trace['pi'][:, 0], ax=ax[0]) pm.plot_posterior(trace['pi'][:, 1], ax=ax[1]) pm.plot_posterior(trace['pi'][:, 2], ax=ax[2]) ax[0].set_title('Nothing') ax[1].set_title('SilverAngel') ax[2].set_title('GoldAngel') return trace
def fun_infer_model_learn(df, tune=100, samples=10, K=2, path="./", name="", run=1): print("Write: " + path + name + "learner_" + str(K) + ".txt") print("Write: " + path + name + "question_" + str(K) + ".txt") print("Write: " + path + name + "concentration_" + str(K) + ".txt") ch = 1 N = df.shape[0] Q = df.shape[1] # for K in Krange: with pm.Model() as model: learner = pm.Uniform('learner', shape=(N, K)) concentration = pm.Uniform('concentration', testval=.5) question = pm.Dirichlet('question', a=np.repeat(concentration, K), shape=(Q, K)) # difficulty=pm.Uniform ('difficulty',0.1,4,shape=(Q,1),testval=np.repeat(.5,Q).reshape(Q,1)) x = pm.math.dot(learner, question.T) results = pm.Bernoulli('rezults', p=x, shape=(N, Q), observed=df) if run: with model: trace = pm.sample(samples, chains=ch, tune=tune, discard_tuned_samples=True) # a=pm.math.dot(trace['learner'].mean(0), trace['question'][:,:].mean(0).T) pd.DataFrame(trace['learner'].mean(0)).to_csv( path + name + "learner_" + str(K) + ".txt", sep="\t") pd.DataFrame(trace['question'].mean(0)).to_csv( path + name + "question_" + str(K) + ".txt", sep="\t") # pd.DataFrame(a.eval()).to_csv(path+name+"estim_"+str(K)+".txt",sep="\t") pd.DataFrame(trace['concentration']).to_csv( path + name + "concentration_" + str(K) + ".txt", sep="\t") print("finished: " + str(K)) return [model, trace] return [model, None]
def predict_proba(self, X, return_std=False): """ Predicts probabilities of new data with a trained GaussianMixture Model Parameters ---------- X : numpy array, shape [n_samples, n_features] cats : numpy array, shape [n_samples, ] return_std : Boolean flag of whether to return standard deviations with mean probabilities. Defaults to False. """ if self.trace is None: raise NotFittedError('Run fit on the model before predict.') # num_samples = X.shape[0] if self.cached_model is None: self.cached_model = self.create_model() self._set_shared_vars({'model_input': X}) K = self.num_components with self.cached_model: pi = pm.Dirichlet("probability", a=np.array([1.0, 1.0, 1.0]), shape=K) _vars = [pi] ppc = pm.sample_ppc( self.trace, # model=self.cached_model, vars=_vars, samples=2000, size=len(X)) if return_std: return ppc['probability'].mean(axis=0), \ ppc['probability'].std(axis=0) else: return ppc['probability'].mean(axis=0)
def get_beta_bernoulli_mixture(X, params): n_doc, n_feat = X.shape n_comp = params['n_comp'] with pm.Model() as model: pkw = pm.Beta('pkw', alpha=params['pkw_beta_dist_alpha'], beta=params['pkw_beta_dist_beta'], shape=(n_comp, n_feat)) p_comp = pm.Dirichlet('p_comp', a=params['pcomp_dirichlet_dist_alpha'] * np.ones(n_comp)) z = pm.Categorical('z', p=p_comp, shape=n_doc) x = pm.Bernoulli('x', p=pkw[z], shape=(n_doc, n_feat), observed=X) return model
def run_mcmc(self, n_generations, n_burn): logger.info("{} subpaths in total".format( len(self.traversome.all_sub_paths))) isomer_num = self.traversome.num_of_isomers with pm.Model() as isomer_model: isomer_percents = pm.Dirichlet(name="props", a=np.ones(isomer_num), shape=(isomer_num, )) loglike_expression = self.traversome.get_multinomial_like_formula( isomer_percents=isomer_percents, log_func=tt.log).loglike_expression pm.Potential("likelihood", loglike_expression) # pm.Deterministic("likelihood", likes) # pm.DensityDist? # pm.Mixture(name="likelihood", w=np.ones(len(components)), comp_dists=components, observed=data) # pm.Binomial("path_last", n=n__num_reads_in_range, p=this_prob, observed=x__num_matched_reads) # sample from the distribution # uses the BFGS optimization algorithm to find the maximum of the log-posterior logger.info("Searching the maximum of the log-posterior ..") start = pm.find_MAP(model=isomer_model) # trace = pm.sample_smc(n_generations, parallel=False) # In an upcoming release, # pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default logger.info("Using NUTS sampler ..") self.trace = pm.sample(n_generations, tune=n_burn, discard_tuned_samples=True, cores=1, init='adapt_diag', start=start, return_inferencedata=True) logger.info("Summarizing the MCMC traces ..") summary = az.summary(self.trace) logger.info("\n{}".format(summary)) axes = az.plot_trace(self.trace) fig = axes.ravel()[0].figure fig.savefig( os.path.join(self.traversome.outdir, "mcmc.trace_plot.pdf")) return OrderedDict([(_go, _prop) for _go, _prop in enumerate(summary["mean"])])
def test_multivariate2(self): # Added test for issue #3271 mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10) with pm.Model() as dm_model: probs = pm.Dirichlet("probs", a=np.ones(6), shape=6) obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data) burned_trace = pm.sample(20, tune=10, cores=1) sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model) sim_ppc = pm.sample_posterior_predictive(burned_trace, samples=20, model=dm_model) assert sim_priors["probs"].shape == (20, 6) assert sim_priors["obs"].shape == (20, ) + obs.distribution.shape assert sim_ppc["obs"].shape == (20, ) + obs.distribution.shape sim_ppc = pm.fast_sample_posterior_predictive(burned_trace, samples=20, model=dm_model) assert sim_ppc["obs"].shape == (20, ) + obs.distribution.shape
def csp_modeling( obs, templates, dbname, redo=False, ): """ Model a CSP with bayesian model. """ if os.path.exists(dbname) and not redo: return dbname with pm.Model() as model: w = pm.Dirichlet("w", np.ones(len(templates))) bestfit = pm.math.dot(w.T, templates) sigma = pm.Exponential("sigma", lam=1) likelihood = pm.Normal('like', mu=bestfit, sd=sigma, observed=obs) with model: trace = pm.sample(1000, tune=1000) results = {'model': model, "trace": trace} with open(dbname, 'wb') as buff: pickle.dump(results, buff) return
def test_DiscreteMarkovChain_point(): test_Gammas = at.as_tensor_variable(np.array([[[1.0, 0.0], [0.0, 1.0]]])) with pm.Model(): # XXX: `draw_values` won't use the `Deterministic`s values in the `point` map! # Also, `Constant` is only for integer types (?!), so we can't use that. test_gamma_0 = pm.Dirichlet("gamma_0", np.r_[1.0, 1000.0], shape=2) test_point = {"gamma_0": np.r_[1.0, 0.0]} assert np.all( DiscreteMarkovChain.dist(test_Gammas, test_gamma_0, shape=10).random( point=test_point ) == 0 ) assert np.all( DiscreteMarkovChain.dist(test_Gammas, 1.0 - test_gamma_0, shape=10).random( point=test_point ) == 1 )
def run_factorization(self): rat = self.allelic_counts/self.total_counts nans = np.isnan(rat) # Run bb-mf with pm.Model() as bb_glm: CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init) ALPHA = pm.HalfCauchy('ALPHA', beta=5, shape=(1, self.S), testval=self.alpha_init) BETA = pm.Normal('BETA', mu=0, tau=(1.0/10.0), shape=(self.S, self.num_cov), testval=self.beta_init) gamma = pm.HalfCauchy('GAMMA', beta=5, shape=(self.N, self.K), testval=np.ones((self.N, self.K))) U = pm.Normal('U', mu=0, sigma=1.0/gamma, shape=(self.N, self.K), testval=self.U_init) V = pm.Normal('V', mu=0, tau=(1.0/1.0), shape=(self.S, self.K), testval=self.V_init) MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init) SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init) mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A) sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A) A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init) p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:]) conc_mat = pm.math.dot(np.ones((self.N,1)), CONC) w = pm.Dirichlet('w', a=np.ones((self.S,2)), testval=self.w_init) beta_null_mat = pm.math.dot(np.ones((self.N,1)), ALPHA) BB_GLM = pm.BetaBinomial.dist(alpha=(p*conc_mat), beta=((1.0-p)*conc_mat), n=self.total_counts) BB_NULL = pm.BetaBinomial.dist(alpha=(np.ones((self.N,self.S))), beta=7.0+beta_null_mat, n=self.total_counts) mixmod = pm.Mixture('mixmodel', w=w, comp_dists=[BB_GLM, BB_NULL], observed=self.allelic_counts, shape=2) approx = pm.fit(method='advi', n=30000) #pickle.dump(approx, open(self.output_root + '_model', 'wb')) #approx = pickle.load( open(self.output_root + '_model', "rb" ) ) means_dict = approx.bij.rmap(approx.params[0].eval()) np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_ALPHA.txt', (np.exp(means_dict['ALPHA_log__'])), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_GAMMA.txt', (np.exp(means_dict['GAMMA_log__'])), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_CONC.txt', (np.exp(means_dict['CONC_log__'])), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_w_stick_breaking.txt', (np.exp(means_dict['w_stickbreaking__'])), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_ELBO.txt', (approx.hist), fmt="%s", delimiter='\t')
def SIR_training(self, sequence, totalpopulation): self.popu = totalpopulation self.data = sequence[:] acc_infect = sequence[:, 0] / totalpopulation basic_model = pm.Model() n = len(acc_infect) I = acc_infect[0] R = 0 S = 1 - I with basic_model: BoundedNormal = pm.Bound(pm.Normal, lower=0.0, upper=1.0) BoundedNormal2 = pm.Bound(pm.Normal, lower=1.0, upper=10.0) theta = [] r0 = BoundedNormal2('R_0', mu=self.r0, sigma=0.72) gamma = BoundedNormal('gamma', mu=self.gamma, sigma=0.02) beta = pm.Deterministic('beta', r0 * gamma) ka = pm.Gamma('ka', 2, 0.0001) Lambda1 = pm.Gamma('Lambda1', 2, 0.0001) qu = pm.Uniform('qu', lower=0.1, upper=1.0) theta.append( pm.Deterministic('theta_' + str(0), pm.math.stack([S, I, R]))) for i in range(1, n): states = theta[i - 1] solve_theta = pm.Deterministic( 'solve_theta_' + str(i), ka * pm.math.stack([ states[0] - qu * beta * states[0] * states[1], states[1] + qu * beta * states[0] * states[1] - gamma * states[1], states[2] + gamma * states[1] ])) theta.append( pm.Dirichlet('theta_' + str(i), a=solve_theta, shape=(3))) real_infect = pm.Beta('real_infect_' + str(i), Lambda1 * theta[i][1], Lambda1 * (1 - theta[i][1]), observed=acc_infect[i]) step = pm.Metropolis() Trace = pm.sample(2000, cores=16, chains=1, init='auto', step=step) self.trace = Trace
def __init__(self, wave, flux, templates, adegree=None, mdegree=None, reddening=False): """ Model CSP with bayesian model. """ self.wave = wave self.flux = flux self.templates = templates self.ntemplates = len(templates) self.adegree = adegree # Construct additive polynomial if self.adegree is not None: _ = np.linspace(-1, 1, len(self.wave)) self.apoly = np.zeros((adegree + 1, len(_))) for i in range(adegree + 1): self.apoly[i] = legendre(i)(_) else: self.apoly = np.zeros(1) # Build statistical model with pm.Model() as self.model: self.flux0 = pm.Normal("f0", mu=1, sd=5) # Multiplicative constant self.w = pm.Dirichlet("w", np.ones(self.ntemplates) / self.ntemplates) self.wpoly = pm.Deterministic("wpoly", pm.math.zeros_like(self.flux0)) \ if self.adegree is None else \ pm.Normal("wpoly", mu=0, sd=1, shape=self.adegree) self.bestfit = pm.Deterministic( "bestfit", self.__call__(self.w, wpoly=self.wpoly, f0=self.flux0, math=pm.math)) self.sigma = pm.Exponential("sigma", lam=0.01) self.like = pm.Normal('like', mu=self.bestfit, sd=self.sigma, observed=flux)
def get_logisticnormal_bernoulli_mixture(X, params): n_doc, n_feat = X.shape n_comp = params['n_comp'] with pm.Model() as model: theta = pm.MvNormal('theta', mu=np.zeros(n_feat), cov=np.identity(n_feat), shape=(n_comp, n_feat)) pkw = pm.Deterministic('pkw', 1 / (1 + tt.exp(-theta))) p_comp = pm.Dirichlet('p_comp', a=params['pcomp_dirichlet_dist_alpha'] * np.ones(n_comp)) z = pm.Categorical('z', p=p_comp, shape=n_doc) x = pm.Bernoulli('x', p=pkw[z], shape=(n_doc, n_feat), observed=X) return model
def _sample_pymc3(cls, dist, size): """Sample from PyMC3.""" import pymc3 pymc3_rv_map = { 'MultivariateNormalDistribution': lambda dist: pymc3.MvNormal('X', mu=matrix2numpy(dist.mu, float).flatten(), cov=matrix2numpy(dist.sigma, float), shape=(1, dist.mu.shape[0])), 'MultivariateBetaDistribution': lambda dist: pymc3.Dirichlet('X', a=list2numpy(dist.alpha, float).flatten()), 'MultinomialDistribution': lambda dist: pymc3.Multinomial('X', n=int(dist.n), p=list2numpy(dist.p, float).flatten(), shape=(1, len(dist.p))) } dist_list = pymc3_rv_map.keys() if dist.__class__.__name__ not in dist_list: return None with pymc3.Model(): pymc3_rv_map[dist.__class__.__name__](dist) return pymc3.sample(size, chains=1, progressbar=False)[:]['X']
def create_dirac_zero_hmm(X, mu, xis, observed): S = 2 z_tt = tt.stack([tt.dot(X, xis[..., s, :]) for s in range(S)], axis=1) Gammas_tt = pm.Deterministic("Gamma", multilogit_inv(z_tt)) gamma_0_rv = pm.Dirichlet("gamma_0", np.ones((S, ))) if type(observed) == np.ndarray: T = X.shape[0] else: T = X.get_value().shape[0] V_rv = DiscreteMarkovChain("V_t", Gammas_tt, gamma_0_rv, shape=T) if type(observed) == np.ndarray: V_rv.tag.test_value = (observed > 0) * 1 else: V_rv.tag.test_value = (observed.get_value() > 0) * 1 Y_rv = SwitchingProcess( "Y_t", [pm.Constant.dist(0), pm.Constant.dist(mu)], V_rv, observed=observed, ) return Y_rv
def __init__(self, npersons, nitems, nlevels): super(FourPGRM, self).__init__(npersons, nitems, nlevels) with self.model: phi = pm.Dirichlet(name='phi', a=np.ones(2 * nlevels - 1), shape=(1, nitems, 2 * nlevels - 1)) # 1. drop the last term, which would make the top term 1 after # cumsum. # 2. reshape into a 2 x L matrix for each item, each row # corresponding to gamma and sigma, respectively # 3. cumulatively sum across the gamma->sigma, to ensure # sigma > gamma # 4. cumulative gammas and sigmas across levels to ensure monotone phi_star = phi[..., :-1] phi_star = phi_star.reshape((1, nitems, 2, nlevels - 1)) phi_star = phi_star.cumsum(axis=-2) phi_star = phi_star.cumsum(axis=-1) # first row is gamma, second is sigma gamma = pm.Deterministic(name='gamma', var=phi_star[..., 0, :]) sigma = pm.Deterministic(name='sigma', var=phi_star[..., 1, :]) param_list = [gamma, sigma] self.params.update({var.name: var for var in param_list})
def _model_eval(self, X, K, n_samples): # setup model with pm.Model() as model: data_dim = X.shape[1] # prior of mixture ratio w = pm.Dirichlet('w', a=np.ones(K)) # setup the likelihood init_mu = np.zeros(data_dim) components = [ self._multivariate_normal_dist(init_mu, suffix=k) for k in range(K) ] like = pm.Mixture('like', w=w, comp_dists=components, observed=X) # fit model with model: trace = pm.sample(2000, step=pm.NUTS(), start=pm.find_MAP(), tune=1000) # store the result self.result['K=' + str(K)] = trace
def run_model(self, **kwargs): """Run Bayesian model using prefit Y's for each Gene and Dataset distribution""" # Importing here since Theano base_compiledir needs to be set prior to import import pymc3 as pm # Collect fits self.fits = self.t_fits() click.echo("Building model") with pm.Model() as self.model: # Convex model priors b = ([1] if len(self.backgrounds) == 1 else pm.Dirichlet( "b", a=np.ones(len(self.backgrounds)))) # Model error eps = pm.InverseGamma("eps", 1, 1) # Convex model declaration for gene in tqdm(self.training_genes): y, norm_term = 0, 0 for i, dataset in enumerate(self.backgrounds): name = f"{gene}={dataset}" fit = self.fits.loc[name] x = pm.StudentT(name, nu=fit.nu, mu=fit.mu, lam=fit.lam) y += (b[i] / fit.sd) * x norm_term += b[i] / fit.sd # y_g = \frac{\sum_d \frac{\beta * x}{\sigma} + \epsilon}{\sum_d\frac{\beta}{\sigma}} # Embed mu in laplacian distribution pm.Laplace( gene, mu=y / norm_term, b=eps / norm_term, observed=self.sample[gene], ) # Sample self.trace = pm.sample(**kwargs)
def trend_model(y_old, y_new): # PyMC3 trend changepoint model # trend is modeled by Normal RVs # y_old: older data points since the last changepoint # y_new: last win(10) datapoints g_new = np.gradient(y_new) # observed trend g_old = np.gradient(y_old) if len(y_old) > 1 else g_new mu_new = g_new.mean() if len(g_new) > 0 else None mu_old = g_old.mean() if len(g_old) > 0 else mu_new sigma_new = max(1.0, g_new.std()) if len(g_new) > 0 else None sigma_old = max(1.0, g_old.std()) if len(g_old) > 0 else sigma_new y_ = np.concatenate((y_old, y_new)) y_obs = theano.shared(y_) ts = np.array(range(1, 1 + len(y_))) # start from 1 to deal with intercept t_arr = np.array([ts, ts]).T with pm.Model() as model: w = pm.Dirichlet('w', a=np.ones(2)) mu = pm.Normal('mu', np.array([mu_old, mu_new]), np.array([sigma_old, sigma_new]), shape=(2,)) mu_t = pm.Deterministic('mu_t', t_arr * mu) tau = pm.Gamma('tau', 1.0, 1.0, shape=2) diff = pm.Deterministic('diff', mu[1] - mu[0]) # needed for PyMC3 model obs = pm.NormalMixture('obs', w, mu_t, tau=tau, observed=y_obs) # needed for PyMC3 model return model
(D, W) = data.shape def log_lda(theta, phi): def ll_lda(value): dixs, vixs = value.nonzero() vfreqs = value[dixs, vixs] ll = vfreqs * pm.math.logsumexp( t.log(theta[dixs]) + t.log(phi.T[vixs]), axis=1).ravel() return t.sum(ll) return ll_lda with model1: theta = pm.Dirichlet("theta", a=alpha, shape=(D, K)) phi = pm.Dirichlet("phi", a=beta, shape=(K, V)) doc = pm.DensityDist('doc', log_lda(theta, phi), observed=data) with model1: inference = pm.ADVI() approx = pm.fit( n=10000, method=inference, callbacks=[pm.callbacks.CheckParametersConvergence(diff='absolute')]) #inference tr1 = approx.sample(draws=1000) pm.plots.traceplot(tr1) pm.plot_posterior(tr1, color='LightSeaGreen') plt.plot(approx.hist)
data = data[0:1000] truth = truth[0:1000] data[np.where(data>=1)]=1 np.random.seed(12345) alphaprime=10 nclusters = 10 ncells = data.shape[0] nsites = data.shape[1] #without scaling model = pm.Model() with model: pi = pm.Dirichlet('pi', a=np.array([alphaprime]*nclusters),shape=nclusters) # Define priors pk = pm.Beta('pk', 1,1,shape=(nclusters,nsites)) z = pm.Categorical("z",p=pi,shape=ncells) # Define likelihood likelihood = pm.Bernoulli('likelihood',p=pk[z],observed=data,shape=(ncells)) with model: step1 = pm.Metropolis(vars=[pk, pi, alpha]) step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2]) tr = pm.sample(100, step=[step1, step2]) traceplot(trace)