def posterior_mcmc(self, data): """ Find posterior distribution for the numerical method of solution """ with pm.Model() as ab_model: # priors mua = pm.distributions.continuous.Beta('muA', alpha=self.alpha_prior, beta=self.beta_prior) mub = pm.distributions.continuous.Beta('muB', alpha=self.alpha_prior, beta=self.beta_prior) # likelihoods pm.Bernoulli('likelihoodA', mua, observed=data[0]) pm.Bernoulli('likelihoodB', mub, observed=data[1]) # find distribution of difference pm.Deterministic('lift', mub - mua) # find distribution of effect size sigma_a = pm.Deterministic('sigmaA', np.sqrt(mua * (1 - mua))) sigma_b = pm.Deterministic('sigmaB', np.sqrt(mub * (1 - mub))) pm.Deterministic('effect_size', (mub - mua) / (np.sqrt(0.5 * (sigma_a**2 + sigma_b**2)))) start = pm.find_MAP() step = pm.Slice() trace = pm.sample(self.iterations, step=step, start=start) bins = np.linspace(0, 1, self.resolution) mua = np.histogram(trace['muA'][500:], bins=bins, normed=True) mub = np.histogram(trace['muB'][500:], bins=bins, normed=True) sigma_a = np.histogram(trace['sigmaA'][500:], bins=bins, normed=True) sigma_b = np.histogram(trace['sigmaB'][500:], bins=bins, normed=True) rvs = trace['lift'][500:] bins = np.linspace( np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution) lift = np.histogram(rvs, bins=bins, normed=True) rvs = trace['effect_size'][500:] bins = np.linspace( np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution) pes = np.histogram(rvs, bins=bins, normed=True) posterior = { 'muA': mua, 'muB': mub, 'sigmaA': sigma_a, 'sigmaB': sigma_b, 'lift': lift, 'es': pes, 'prior': self.prior() } return posterior
def test_disaster_model(self): model = build_disaster_model(masked=False) with model: # Initial values for stochastic nodes start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) az.summary(tr)
def test_disaster_model_missing(self): model = build_disaster_model(masked=True) with model: # Initial values for stochastic nodes start = {'early_mean': 2., 'late_mean': 3.} # Use slice sampler for means (other varibles auto-selected) step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) tr = pm.sample(500, tune=50, start=start, step=step) pm.summary(tr)
def test_disaster_model_missing(self): model = build_disaster_model(masked=True) with model: # Initial values for stochastic nodes start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) step = pm.Slice( [model["early_mean_log__"], model["late_mean_log__"]]) idata = pm.sample(500, tune=50, start=start, step=step, chains=2) az.summary(idata)
def run(n=5000): with model_1: xstart = pm.find_MAP() xstep = pm.Slice() trace = pm.sample(5000, xstep, start=xstart, random_seed=123, progressbar=True) pm.summary(trace)
def run_model(steps=10000): model = pymc.Model() with model: α = 1 / count_data.mean() λ1 = pymc.Exponential("λ1", α) λ2 = pymc.Exponential("λ2", α) τ = pymc.DiscreteUniform("τ", lower=0.0, upper=len(count_data)) process_mean = mean(τ, λ1, λ2) observation = pymc.Poisson("observation", process_mean, observed=count_data) start = {"λ1": 10.0, "λ2": 30.0} step1 = pymc.Slice([λ1, λ2]) step2 = pymc.Metropolis([τ]) trace = pymc.sample(steps, tune=500, start=start, step=[step1, step2], cores=2) return pymc.trace_to_dataframe(trace)
def model_work(obs): model = pm.Model() with model: mu = pm.Uniform('mu', lower=-0.01, upper=0.01, shape=(1,)) sigma = pm.Uniform('sigma', lower=0, upper=0.05, shape=(1,)) y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=obs) res = pm.find_MAP() step = pm.Slice() #step = pm-Metropolis() trace = pm.sample(1000, step, res, random_seed=123, progressbar=True) pm.traceplot(trace, [mu, sigma]); del model return res['mu'][0], res['sigma'][0]
import pymc3 as pm import numpy as np import matplotlib.pylab as plt data = np.ma.masked_values([42] * 100 + [-1] * 100 + [42] * 100, value=-1) with pm.Model() as model: s = pm.GaussianRandomWalk('s', sd=1e-2, shape=len(data)) n = pm.Normal('n', mu=pm.math.exp(s), observed=data) trace = pm.sample() plt.plot(trace['s'].T, alpha=0.1) with pm.Model() as model: s = pm.GaussianRandomWalk('s', sd=1e-2, shape=len(data)) n = pm.Poisson('n', mu=pm.math.exp(s), observed=data) step = pm.Slice(vars=model.vars[1]) trace = pm.sample(step=step) pm.traceplot(trace) plt.plot(trace['s'].T, alpha=0.1)
def dummy_bayesian_computation(): # If the user would like a dummy calculation, we can do a simple y=f(x) model fitting. # NOTES: # Metropolis is slow to converge # Slice is fast # NUTS is fast # The example function we are giong to fit def f(intercept, slope, exponent, x): return intercept + slope * x + np.exp(x / exponent) # Establishing the model parameters and making data. true_slope = 0.9 true_intercept = 1.1 true_exponent = 2.0 noise_strength = 1.0 # with noise_strength = 1.0, everything works. with 0.2, it doesn't fit well. xdata = np.linspace(0, 5, 100) ydata = [ f(true_intercept, true_slope, true_exponent, i) + noise_strength * np.random.randn() for i in xdata ] # the actual function with noise # Setting up priors and doing MCMC sampling with pm.Model() as model: # Defining priors on the variables you want to estimate. # Careful: you can try to estimate a data noise model, sigma # But you really should know what you're doing. # This step changes the behavior of the system dramatically. # sigma = pm.Normal('sigma',mu=0.3, sigma=0.1, testval=1.0); # a data noise model. # Somehow it breaks if testval is too small. # If you just put sigma=constant, you can change the behavior too. # If it matches the noise strength, then you get great convergence. intercept = pm.Normal('intercept', mu=0, sigma=20) # a wide-ranging uniform prior. slope = pm.Normal('slope', mu=0, sigma=10) # another wide-ranging prior exponent = pm.Normal('exponent', mu=3, sigma=0.5) # another wide-ranging prior that doesnt work very well. # Other possible priors: # intercept=pm.Uniform('Intercept', -2, 3); # a wide-ranging uniform prior. # exponent = pm.Uniform('beta',1.85, 2.20); # another wide-ranging prior # define likelihood likelihood = pm.Normal('y', mu=f(intercept, slope, exponent, xdata), sigma=0.5, observed=ydata) # Sample the distribution method = pm.Slice(vars=[intercept, slope, exponent]) # slice sampling for all variables trace = pm.sample(4000, tune=500, step=method) # if you remove step=method, then you'll do NUTS map_estimate = pm.find_MAP(model=model) # returns a dictionary # Organize outputs est_intercept = map_estimate['intercept'] est_exponent = map_estimate['exponent'] est_slope = map_estimate['slope'] intercept_std = np.std(trace['intercept']) exponent_std = np.std(trace['exponent']) slope_std = np.std(trace['slope']) est_y = f(est_intercept, est_slope, est_exponent, xdata) true_y = f(true_intercept, true_slope, true_exponent, xdata) # Plot model vs. data plt.figure() plt.plot(xdata, ydata, '.', label='Observations') plt.plot(xdata, true_y, '-g', linewidth=2, label='Actual') plt.plot(xdata, est_y, '-r', linewidth=2, label='MCMC Model') plt.legend() plt.savefig('example_line.png') plt.close() # Printing the results: # I would also like to write this into a file. print("----- RESULTS ------") print("Actual Intercept: %.2f " % true_intercept) print("MAP Intercept: %.2f +/- %.2f" % (est_intercept, intercept_std)) print("-----------") print("Actual Slope: %.2f " % true_slope) print("MAP Slope: %.2f +/- %.2f" % (est_slope, slope_std)) print("-----------") print("Actual Exponent: %.2f " % true_exponent) print("MAP Exponent: %.2f +/- %.2f" % (est_exponent, exponent_std)) print("-----------") output_functions.outputs_trace_plots(trace, '') return
def forecast(country, data, ftype='poly1', samples=10000, startdate=None, enddate=None, limit=0, targetdate=None, tune=2000, chains=20, cpu_cores=4, return_inis=False, **kwargs): """ do monte carlo fit of posterior (gives also the point max estimate) :param country: Country name, if there is "countries" column in the data - else use "World or "" for all data :param data: dataframe with "dates" (datetime) and "cases" columns - coses is the number of daily new cases :param ftype: 'polyN' where N is a number between 0 and a few (don't try more than 10 or so - becomes quite slow) or 'exp' for exponential :param samples: number of samples to use :param startdate: start date of the data to use as datetime.date :param enddate: end date of the data to use as datetime.date :param limit: take start date to be where cumulative count exceeds limit :param targetdate: datetime.date for prediction import datetime targetdate = datetime.datetime.strptime('2020-06-30','%Y-%m-%d').date() :param return_inis: don't run but return initial parameters :param **kwargs: model params if wanted to use like intercept=[int_mean,int_std] :return: fitresults """ import pymc3 as pm import datetime import pandas as pd from .utils import calculateStats, modelfit_eval_dates from .models import poly_model, exp_model, logistic_model if isinstance(startdate, str): startdate = pd.to_datetime(startdate) if country=="World" or country=="all" or len(country)==0: temp = data.sort_values('dates') temp['cases'] = temp.groupby(['dates'])['cases'].transform('sum') temp['deaths'] = temp.groupby(['dates'])['deaths'].transform('sum') temp.drop_duplicates(subset=['dates'], inplace=True) else: temp = data[data.countries == country].sort_values('dates') temp['cumcases']=temp.cases.cumsum().values if startdate == None: startdate = temp[temp.cumcases > limit].dates.dt.date.min() if enddate == None: enddate = temp[temp.cases > 0].dates.dt.date.max() temp_new = temp[(temp.dates.dt.date>=startdate) & (temp.dates.dt.date<=enddate)] intercept = next((value for key, value in kwargs.items() if key == 'intercept'), None) if intercept is None: intercept = temp_new.cumcases.values.min() kwargs['intercept'] = [intercept, intercept / 10 + 20] try: x0 = temp_new.dates.dt.date - startdate except: x0 = temp_new.dates - startdate x = x0.dt.days y = temp_new.cumcases.values if targetdate == None: xTarget = None else: xTarget = (targetdate - startdate).days log = 'lin' if ftype=='exp': slope = next((value for key, value in kwargs.items() if key == 'slope'), None) if slope is None: a10 = (y.max() - y[0]) / x.max() kwargs['slope'] = [a10 / 2, a10 / 4 + 10] if return_inis: return kwargs model, varnames, modelfun = exp_model(x, y, **kwargs) log = 'log' elif 'poly' in ftype: order = int(ftype[4:]) a1 = next((value for key, value in kwargs.items() if key == 'a1'), None) if not a1: a10 = (y.max() - y[0]) / x.max() kwargs['a1'] = [a10, a10 / 4 + 20] if return_inis: return kwargs model, varnames, modelfun = poly_model(x, y, order, **kwargs) elif 'logis' in ftype or 'scurve' in ftype or 'sigmoid' in ftype: peak = next((value for key, value in kwargs.items() if key == 'peak'), None) if peak is None: peak0 = y.max() * 1.5 kwargs['peak'] = [peak0, peak0 / 4] shifted = next((value for key, value in kwargs.items() if key == 'shifted'), None) if shifted is None: kwargs['shifted'] = [x[temp_new.cases.idxmax()], x.max() / 5] if return_inis: return kwargs model, varnames, modelfun = logistic_model(x, y, **kwargs) else: return None with model: step = pm.Slice() trace = pm.sample(samples, step=step, tune=tune, chains=chains, cores=cpu_cores) # , step, tune=2500, cores=10) varstats = [] for va in varnames + ['sigma']: stats = calculateStats(trace[va]) # mean 2, std 3, 20% 5, 80% 7 varstats.append([stats[2], stats[3], stats[5], stats[7]]) sigma = sum(calculateStats(trace['sigma'])[2:4]) # mean + std plotstrs = ['%s COVID-19 cases %s model'%(country, ftype), '%s to %s'%(datetime.datetime.strftime(startdate, '%d.%m.%Y'), datetime.datetime.strftime(enddate, '%d.%m.%Y')), 'cumulative cases'] df = modelfit_eval_dates(y, x, temp_new.dates, modelfun, varstats[0:-1], sigma=sigma, target=xTarget, plotstrs=plotstrs, log=log, varnames=varnames) for va in varnames + ['sigma']: stats = calculateStats(trace[va]) df.loc[va + '_mean'] = stats[2] df.loc[va + '_std'] = stats[3] #df.loc[va + '_20%'] = stats[5] #df.loc[va + '_80%'] = stats[7] return df
model = pm.Model() np.random.seed(9) # Change this to reflect the Iphone points algo_a = sp.stats.bernoulli(.5).rvs(300) # 50% profitable days # Change this to reflect the nonIphone users algo_b = sp.stats.bernoulli(.6).rvs(300) # 60% profitable days # This model approximates the avg time spent per session as a distribution with model: # model specifications in PyMC3 are wrapped in a with-statement # Define random variables theta_a = pm.Normal('theta_a', mu=15, sd=5) # prior theta_b = pm.Normal('theta_b', mu=15, sd=5) # prior # Define how data relates to unknown causes data_a = pm.Normal('observed A', p=theta_a, observed=algo_a) data_b = pm.Normal('observed B', p=theta_b, observed=algo_b) # Inference! start = pm.find_MAP() # Find good starting point step = pm.Slice() # Instantiate MCMC sampling algorithm trace = pm.sample(10000, step, start=start, progressbar=False) # draw posterior samples using slice sampling
def fitbayesianmodel(bayesian_model, ytrain, method=1, n_=3000, MAP=True, chains=1, jobs=1, star='rrlyr', classifier='RL', PCA=False): print('chains: ', chains) print('jobs: ', jobs) if method == 4: print('------- Slice Sampling--------') with bayesian_model as model: map = 0 step = pm.Slice() trace = pm.sample(n_, step=step, njobs=jobs) return trace, model, map if method == 5: print('------- HamiltonianMC--------') with bayesian_model as model: step = pm.HamiltonianMC() trace = pm.sample(n_, chain=chains, tune=2000, njobs=jobs, step=step, init=None) return trace, model, map if method == 6: print('------- Default--------') with bayesian_model as model: map = 0 trace = pm.sample(n_, chain=chains, njobs=jobs, callbacks=[CheckParametersConvergence()]) return trace, model, map if method == 7: print('------- Metropolis--------') with bayesian_model as model: map = 0 step = pm.Metropolis() trace = pm.sample(n_, step=step, chain=chains, njobs=jobs, callbacks=[CheckParametersConvergence()], tune=1000, step_size=100) pm.traceplot(trace) name = star + '_' + classifier + '_PCA_' + str(PCA) + '2.png' plt.savefig(name) plt.clf() return trace, model, map if method == 8: print('------- NUTS--------') with bayesian_model as model: stds = np.ones(model.ndim) for _ in range(5): args = {'is_cov': True} trace = pm.sample(500, tune=1000, chains=1, init='advi+adapt_diag_grad', nuts_kwargs=args) samples = [model.dict_to_array(p) for p in trace] stds = np.array(samples).std(axis=0) traces = [] for i in range(1): step = pm.NUTS(scaling=stds**2, is_cov=True, target_accept=0.8) # start = trace[-10 * i] trace_ = pm.sample(n_, cores=4, step=step, tune=1000, chain=chains, njobs=1, init='advi+adapt_diag_grad', start=start, callbacks=[CheckParametersConvergence()]) trace = trace_ map = 0 return trace, model, map
tau1 = pm.Normal('tau1', 0, 1) a_0 = pm.Normal('a_0', 0, 10, shape=Num_5, testval=.2) Δ_a = rate_(a_0, tau1) # σ_a = pm.HalfCauchy('σ_a', 5.) a0 = pm.Normal('a0', 0., 20.) δ_1 = pm.Gamma('δ_1', alpha=5, beta=1) δ = pm.Normal('δ', 0, sd=(δ_1 * δ_1)) # δ = pm.Normal('δ', 0, sd=20) # 若模型收敛差则δ改用这个语句 theta1 = pm.Deterministic('theta1', a0 + (Δ_a).cumsum()) theta = Bx_.dot(theta1) + δ Observed = pm.Normal('Observed', mu=theta, sd=sigma, observed=elec_faults) # 观测值 # start = pm.find_MAP() step1 = pm.Slice([tau1, a_0]) trace2 = pm.sample(1000, tune=500, step=step1) chain2 = trace2 varnames1 = [ 'a0', 'δ', 'sigma', 'tau1'] pm.plot_posterior(chain2, varnames1, kde_plot=True) plt.show() pm.energyplot(chain2) # 能量图对比,重合度越高表示模型越优 plt.show() # 画出自相关曲线 varnames1 = [ 'a0', 'δ', 'sigma', 'tau1'] pm.autocorrplot(chain2, varnames1) plt.show() print(pm.df_summary(chain2, varnames1)) print(pm.waic(trace=trace2, model=partial_model))
with basic_model: # Priors for unknown model parameters alpha = pm.Normal('alpha', mu=0, sd=10) beta = pm.Normal('beta', mu=0, sd=10, shape=2) sigma = pm.HalfNormal('sigma', sd=1) # Expected value of outcome mu = alpha + beta[0] * X1 + beta[1] * X2 # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y) from scipy import optimize # obtain starting values via MAP start = pm.find_MAP(fmin=optimize.fmin_powell) # instantiate sampler step = pm.Slice() # draw 5000 posterior samples trace = pm.sample(5000, step=step, start=start) pm.traceplot(trace) print(pm.summary(trace)) import matplotlib.pyplot as plt plt.show()
def test_run(self): with self.build_model(): start = pm.find_MAP(method="Powell") pm.sample(50, pm.Slice(), start=start)
def test_run(self): with self.build_model(): start = pm.find_MAP(fmin=opt.fmin_powell) pm.sample(50, pm.Slice(), start=start)
def test_run(self): with self.build_model(): start = pm.find_MAP(fmin=opt.fmin_powell) trace = pm.sample(50, pm.Slice(), start=start) pm.glm.plot_posterior_predictive(trace)
def train(self, df_train, first_feature): self.first_feature = first_feature #get rid of identifiers and team record info index = list(df_train.columns).index(first_feature) features = df_train.iloc[:, index:] features = remove_stats(features, ['w', 'l', 'gp', 'min']) #get rid of irrelevant quarter features features = get_quarter_features(features, self.period) for f in self.remove_features: features = remove_stats(features, [f]) for f in self.restrict_features: features = restrict_stats(features, [f]) print("feature classes", self.feature_classes) #choose features here if self.feature_classes != 'all': features = restrict_stats(features, self.feature_classes) self.feature_cols = list(features.columns) if self.normalize: self.features_prenorm = features mm_scaler = MinMaxScaler() features = mm_scaler.fit_transform(features) features = pd.DataFrame(data=features, columns=self.feature_cols) if self.period == 0: Y = df_train['pts_a'] + df_train['pts_h'] elif self.period == 5: Y = df_train['pts_qtr1_a'] + df_train['pts_qtr2_a'] + df_train[ 'pts_qtr1_h'] + df_train['pts_qtr2_h'] elif self.period == 6: Y = df_train['pts_qtr3_a'] + df_train['pts_qtr4_a'] + df_train[ 'pts_qtr3_h'] + df_train['pts_qtr4_h'] else: Y = df_train[f'pts_qtr{self.period}_a'] + df_train[ f'pts_qtr{self.period}_h'] if self.model_type == 'Lasso' or self.model_type == 'Ridge': self.reg.fit(features, Y) else: x = features.values model_split = self.model_type.split('-') cat = model_split[0] prior = model_split[1] self.prior = prior self.cat = cat if prior == 'basic': if cat == 'bayes': print("x shape", x.shape) self.x_shared = theano.shared(x) self.y_shared = theano.shared(Y.values) print("Y shape", Y.values.shape) self.basic_model = pm.Model() with self.basic_model: # Priors for unknown model parameters alpha = pm.Normal('alpha', mu=0, sigma=10) beta = pm.Normal('beta', mu=0, sigma=1, shape=x.shape[1]) sigma = pm.HalfNormal('sigma', sigma=1) if cat == 'MAP': mu = alpha + pm.math.dot(x, beta) Y_obs = pm.Normal('Y_obs', mu=mu, sigma=sigma, observed=Y) elif cat == 'bayes': mu = alpha + pm.math.dot(self.x_shared, beta) Y_obs = pm.Normal('Y_obs', mu=mu, sigma=sigma, observed=self.y_shared) self.trace = pm.sample(self.trace_samp, step=pm.NUTS(), chains=self.chains, cores=self.cores, tune=self.burn_in) elif cat == 'bayes' and prior == 'normal': print("x shape", x.shape) self.x_shared = theano.shared(x) self.y_shared = theano.shared(Y.values) print("Y shape", Y.values.shape) self.basic_model = pm.Model() with self.basic_model: alpha = pm.Normal('alpha', mu=0, sigma=10) beta_def = pm.Normal('beta_def', mu=-.25, sigma=.25, shape=8) beta_off = pm.Normal('beta_off', mu=.25, sigma=.25, shape=8) beta_pace = pm.Normal('beta_pace', mu=.25, sigma=.25, shape=8) sigma = pm.HalfNormal('sigma', sigma=1) mu = alpha for i in ['off', 'def', 'pace']: if i == 'off': off_col_list = [j * 3 for j in range(8)] x_off = self.x_shared[:, off_col_list] mu += theano.tensor.dot(x_off, beta_off) elif i == 'def': def_col_list = [j * 3 + 1 for j in range(8)] x_def = self.x_shared[:, def_col_list] mu += theano.tensor.dot(x_def, beta_def) elif i == 'pace': pace_col_list = [j * 3 + 2 for j in range(8)] x_pace = self.x_shared[:, pace_col_list] mu += theano.tensor.dot(x_pace, beta_pace) # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=mu, sigma=sigma, observed=self.y_shared) self.trace = pm.sample(self.trace_samp, step=pm.Slice(), chains=self.chains, cores=self.cores, tune=self.burn_in) elif cat == 'MAP': cols = features.columns print('building model with prior:', prior) self.prior_model = pm.Model() for i in range(len(cols)): print(i, cols[i]) with self.prior_model: # Priors for unknown model parameters alpha = pm.Normal('alpha', mu=0, sigma=10) if prior == 'normal': beta_def = pm.Normal('beta_def', mu=-.25, sigma=.25, shape=8) beta_off = pm.Normal('beta_off', mu=.25, sigma=.25, shape=8) beta_pace = pm.Normal('beta_pace', mu=.25, sigma=.25, shape=8) if prior == 'uniform': beta_def = pm.Uniform('beta_def', upper=0, lower=-.5, shape=8) beta_off = pm.Uniform('beta_off', upper=.5, lower=0, shape=8) beta_pace = pm.Uniform('beta_pace', upper=.5, lower=0, shape=8) if prior == 'truncnormal': beta_def = pm.TruncatedNormal('beta_def', mu=-.25, sigma=.25, upper=0, shape=8) beta_off = pm.TruncatedNormal('beta_off', mu=.25, sigma=.25, lower=0, shape=8) beta_pace = pm.TruncatedNormal('beta_pace', mu=.25, sigma=.25, lower=0, shape=8) sigma = pm.HalfNormal('sigma', sigma=1) # Expected value of outcome mu = alpha for i in ['off', 'def', 'pace']: if i == 'off': self.off_col_list = [j * 3 for j in range(8)] x = features.iloc[:, self.off_col_list].values mu += pm.math.dot(x, beta_off) elif i == 'def': self.def_col_list = [j * 3 + 1 for j in range(8)] x = features.iloc[:, self.def_col_list].values print("X SHAPE", x.shape) exit() mu += pm.math.dot(x, beta_def) elif i == 'pace': self.pace_col_list = [j * 3 + 2 for j in range(8)] x = features.iloc[:, self.pace_col_list].values mu += pm.math.dot(x, beta_pace) # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=mu, sigma=sigma, observed=Y) elif cat == 'GP': train_x = torch.from_numpy(x).double() train_y = torch.from_numpy(Y.values).double() self.likelihood = gpytorch.likelihoods.GaussianLikelihood() if prior == 'RBF': kernel = gpytorch.kernels.RBFKernel() elif prior == 'Exponential': kernel = gpytorch.kernels.MaternKernel(nu=0.5) self.gp_model = GPModelSKI(train_x, train_y, self.likelihood, kernel) # Find optimal model hyperparameters self.gp_model.double() self.gp_model.train() self.likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam( [{ 'params': self.gp_model.parameters() }], lr=0.1) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood( self.likelihood, self.gp_model) print('training gp model...') def gp_train(training_iter): for i in range(training_iter): optimizer.zero_grad() output = self.gp_model(train_x) loss = -mll(output, train_y) loss.backward() print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iter, loss.item()), end='\r') optimizer.step() print('Iter %d/%d - Loss: %.3f' % (training_iter, training_iter, loss.item())) with gpytorch.settings.use_toeplitz(False): gp_train(500)
out[switchpoint:] = late_mean return out with pm.Model() as model: # Prior for distribution of switchpoint location switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years) # Priors for pre- and post-switch mean number of disasters early_mean = pm.Exponential('early_mean', lam=1.) late_mean = pm.Exponential('late_mean', lam=1.) # Allocate appropriate Poisson rates to years before and after current # switchpoint location idx = arange(years) rate = rate_(switchpoint, early_mean, late_mean) # Data likelihood disasters = pm.Poisson('disasters', rate, observed=disasters_data) # Use slice sampler for means step1 = pm.Slice([early_mean, late_mean]) # Use Metropolis for switchpoint, since it accomodates discrete variables step2 = pm.Metropolis([switchpoint]) # Initial values for stochastic nodes start = {'early_mean': 2., 'late_mean': 3.} tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=2) pm.traceplot(tr)
upper=n_count_data) # NOTE the setting of alpha seems to make a huge difference? not a good sign alpha = 1.0 / count_data.mean() first_mean = pm.Exponential('first_mean', lam=alpha) second_mean = pm.Exponential('second_mean', lam=alpha) third_mean = pm.Exponential('third_mean', lam=alpha) #TODO rate = pm.switch(first_switchpoint >= count_data, first_mean, second_mean) rate = rateFunc(first_switchpoint, second_switchpoint, first_mean, second_mean, third_mean) text_count = pm.Poisson('text_count', rate, observed=count_data) #TODO step1 = pm.NUTS([first_mean, second_mean, third_mean]) step1 = pm.Slice([first_mean, second_mean, third_mean]) # Use Metropolis for switchpoint, and missing values since it accomodates discrete variables step2 = pm.Metropolis([first_switchpoint, second_switchpoint]) trace = pm.sample(10000, step=[step1, step2]) pm.summary(trace) pm.traceplot(trace) plt.show() # NOTE: tentative results # switchpoint 1: about day 33 # switchpoint 2: about day 61 # rate 1: about 10.67 per day # rate 2: about 14.67 per day # rate 3: about 10 per day
def make_step(cls): args = {} if hasattr(cls, 'step_args'): args.update(cls.step_args) return pm.Slice(**args)
Hans_Model = pm.Model() with Hans_Model: # Define prior alpha = pm.Normal('alpha_est',mu=0,sd=10) beta = pm.Normal('beta_est',mu=0,sd=10,shape=2) sigma=pm.HalfNormal('sigma_est',sd=1) # Model parameter mu = alpha + beta[0]*X1 + beta[1]*X2 # Likelihood Y_rv = pm.Normal('Y_rv',mu=mu,sd=sigma,observed=Y) ''' Model fitting''' with Hans_Model: # step = pm.Metropolis(vars=[alpha,beta,sigma]) param_MAP = pm.find_MAP(fmin = sp.optimize.fmin_powell) Method = pm.Slice(vars=[alpha,beta,sigma]) trace = pm.sample(Niter,step=Method,start=param_MAP) pm.traceplot(trace) print pm.summary(trace) plt.show() # # plt.plot(trace['alpha_est']) # print pm.summary(trace) # plt.show()