def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta("theta", alpha=1, beta=1) psi = pm.HalfNormal("psi", sd=1) pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data["theta"].shape == (5000, ) assert gen_data["psi"].shape == (5000, ) assert gen_data["suppliers"].shape == (5000, 20)
def aevb_model(): with pm.Model() as model: pm.HalfNormal('x', shape=(2, ), total_size=5) pm.Normal('y', shape=(2, )) x = model.x y = model.y mu = theano.shared(x.init_value) rho = theano.shared(np.zeros_like(x.init_value)) return {'model': model, 'y': y, 'x': x, 'replace': dict(mu=mu, rho=rho)}
def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta('theta', alpha=1, beta=1) psi = pm.HalfNormal('psi', sd=1) pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data['theta'].shape == (5000,) assert gen_data['psi'].shape == (5000,) assert gen_data['suppliers'].shape == (5000, 20)
def aevb_model(): with pm.Model() as model: pm.HalfNormal("x", shape=(2, ), total_size=5) pm.Normal("y", shape=(2, )) x = model.x y = model.y mu = theano.shared(x.init_value) rho = theano.shared(np.zeros_like(x.init_value)) return {"model": model, "y": y, "x": x, "replace": dict(mu=mu, rho=rho)}
def Metropolis_Hastings(self): # True parameter values a, b, c = 1, 0, 2 sigma = 0.01 # Size of dataset size = 100 # Predictor variable X1 = np.random.randn(size) # Simulate outcome variable Y_obs = a * X1**2 + b * X1 + c + np.random.randn(size) * sigma # uni= uniform.rvs(size=size) fig, axes = plt.subplots(1, 2, sharex=True, figsize=(10, 4)) axes[0].scatter(X1, Y_obs) axes[0].set_ylabel('Y') axes[0].set_xlabel('X1') basic_model = pm.Model() plt.show() with basic_model: # Priors for unknown model parameters a = pm.Uniform('a', lower=self.sampler['a']['range_min'], upper=self.sampler['a']['range_max']) b = pm.Uniform('b', lower=self.sampler['b']['range_min'], upper=self.sampler['b']['range_max']) c = pm.Uniform('c', lower=self.sampler['c']['range_min'], upper=self.sampler['c']['range_max']) sigma = pm.HalfNormal('sigma', sd=1) # Expected value of outcome mu = a * X1**2 + b * X1 + c # Likelihood (sampling distribution) of observations Y_posterior = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y_obs) trace = pm.sample(100000, pm.Metropolis()) # trace = pm.sample(5000) # # obtain starting values via MAP # start = pm.find_MAP(model=basic_model) # # # instantiate sampler # step = pm.Slice() # # # draw 5000 posterior samples # trace = pm.sample(5000, step=step, start=start) _ = pm.traceplot(trace) # plt.plot(trace['a']) plt.show()
def train(self, niter = 1000, random_seed=123, tune=500, cores = 4): ### model training with self.scallop_model: # hyperparameter priors l = pm.InverseGamma("l", 5, 5, shape = self.dim) sigma_f = pm.HalfNormal("sigma_f", 1) # convariance function and marginal GP K = sigma_f ** 2 * pm.gp.cov.ExpQuad(self.dim, ls = l) self.gp = pm.gp.Marginal(cov_func=K) # marginal likelihood # convariance function and marginal GP sigma_n = pm.HalfNormal("sigma_n",1) tot_catch = self.gp.marginal_likelihood("tot_catch", X = self.x, y = self.y, noise = sigma_n) # model fitting self.trace = pm.sample(niter, random_seed=random_seed, progressbar=True, tune=tune, cores = cores)
def model_ggl(locations, samples, centers, cc): basic_model = pm.Model() with basic_model: # Priors for unknown model parameters s1 = pm.HalfNormal('s1', sd=20) m1 = centers[0] s2 = pm.Normal('s2', sd=20) m2 = centers[1] m3 = centers[2] s3 = pm.HalfNormal('s3', sd=20) p_x = gpdf(locations[0], m1, s1) p_y = gpdf(locations[1], m2, s2) p_theta = lpdf(locations[2], m3, s3) sigma = pm.HalfNormal('sigma', sd=1) # Expected value of outcome mu = cc * p_x * p_y * p_theta # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=samples) trace = pm.sample(5000, njobs=4) pm.summary(trace) # values S1 = np.mean(trace['s1']) M1 = centers[0] S2 = np.mean(trace['s2']) M2 = centers[1] M3 = centers[2] S3 = np.mean(trace['s3']) p_x = gpdf(locations[0], M1, S1).eval() p_y = gpdf(locations[1], M2, S2).eval() p_theta = lpdf(locations[2], M3, S3).eval() mu = cc * p_x * p_y * p_theta Err = np.sum((samples - mu)**2) print(Err)
def test_start(self): with pm.Model() as model: a = pm.Poisson("a", 5) b = pm.HalfNormal("b", 10) y = pm.Normal("y", a, b, observed=[1, 2, 3, 4]) start = { "a": np.random.poisson(5, size=500), "b_log__": np.abs(np.random.normal(0, 10, size=500)), } trace = pm.sample_smc(500, start=start)
def test_init_jitter(testval, jitter_max_retries, expectation): with pm.Model() as m: pm.HalfNormal("x", transform=None, testval=testval) with expectation: # Starting value is negative (invalid) when np.random.rand returns 0 (jitter = -1) # and positive (valid) when it returns 1 (jitter = 1) with mock.patch("numpy.random.rand", side_effect=[0, 0, 0, 1, 0]): start = pm.sampling._init_jitter(m, chains=1, jitter_max_retries=jitter_max_retries) pm.util.check_start_vals(start, m)
def solve_vi(X, Y, initial=None, batch_size=100): X_t = th.shared(X) #pm.Minibatch(X,batch_size=batch_size,) Y_t = th.shared(Y) #pm.Minibatch(Y,batch_size=batch_size) # sigma_Y_t = th.shared(sigma_Y)#pm.Minibatch(sigma_Y,batch_size=batch_size) #initial=(0.3,0.5,2.) dx = np.max(X) - np.min(X) dy = np.max(Y) - np.min(Y) with pm.Model() as model: sigma_K = pm.HalfNormal('sigma_K', sd=dy / 3.) l_space = pm.HalfNormal('l_space', sd=dx / 3., testval=1.) cov_func = sigma_K**2 * pm.gp.cov.ExpQuad( 2, active_dims=[0, 1], ls=l_space) gp = pm.gp.Marginal(cov_func=cov_func) eps = pm.Uniform('eps', 0.0, np.std(Y)) y1 = gp.marginal_likelihood('y1', X_t, Y_t, eps) #y2 = gp.marginal_likelihood('y2',X[:100,:],Y[:100],eps*sigma_Y[:100]) initial = initial or pm.find_MAP() approx = pm.fit( 1000, start=initial, method='advi', callbacks=[ pm.callbacks.CheckParametersConvergence(tolerance=1e-4) ]) # plt.plot(approx.hist) # plt.show() means = approx.bij.rmap(approx.mean.eval()) # print(means) # sds = approx.bij.rmap(approx.std.eval()) # print(sds) df = approx.sample(10000) p = { k: pm.summary(df)['mean'][k] for k in pm.summary(df)['mean'].keys() } # pm.traceplot(df,lines=p) # plt.show() return p
def draws_from_StudentT(data, uncertainties): #pymc3 model with pm.Model() as model: sig_prior = pm.HalfNormal('sig', 50) vel_prior = pm.Normal('vel', 0.0, 50.0) lognu_prior = pm.Uniform('lognu', -2.0, np.log(20)) nu_prior = pm.Deterministic('nu', pm.math.exp(lognu_prior)) vel_tracers = pm.Normal('vel-tracers', mu=vel_prior, sd=uncertainties, shape=len(data)) measurements = pm.StudentT('measurements', nu=nu_prior, mu=vel_tracers, sd=sig_prior, observed=data) trace = pm.sample(2000, tune=10000) #Plot these traces pm.traceplot(trace) plt.savefig('Plots/studentT_traceplot.pdf') plt.savefig('Plots/studentT_traceplot.jpg') #Make a KDE approximation to the sigma posterior xx = np.linspace(0.0, 30.0, 1000) kde_approximation = stats.gaussian_kde(trace['sig']) #Plot things fig, ax = plt.subplots(figsize=(10, 6)) ax.plot(xx, kde_approximation(xx), c='r', linewidth=3.0) ax.hist(trace['sig'], 100, facecolor='0.8', edgecolor='k', histtype='stepfilled', normed=True, linewidth=2.0) ax.axvline(xx[np.argmax(kde_approximation(xx))], c='k', linestyle='dashed', linewidth=2.0) ax.set_xlim([0.0, 30.0]) ax.set_ylabel(r'PDF') ax.set_yticks([]) #ax.tick_params(axis='both', which='major', labelsize=15) ax.set_xlabel(r'$\sigma$ (kms$^{-1}$)') fig.tight_layout() fig.savefig('Plots/studentT_pdf.pdf') fig.savefig('Plots/studentT_pdf.jpg') return trace, kde_approximation
def fit(self, cases_past, deaths_curr, quantiles=[10, 20, 30, 40, 50, 60, 70, 80, 90]): ''' Use a GP to find the relationship between cases in the past and deaths today. ''' # If not enough data, return all zeros. if len(cases_past) < 5: self.quantile_gp = [] for q in quantiles: self.quantile_gp.append(degenerate) return self.quantile_gp cases_past2, deaths_curr2 = self.scale_data(cases_past, deaths_curr) # First, we do a simple linear fit, and use this as our mean prior. mfit = curve_fit(linear, cases_past2, deaths_curr2) slope = mfit[0] with pm.Model() as gp_model: ρ = pm.HalfCauchy('ρ', 5) η = pm.HalfCauchy('η', 5) M = pm.gp.mean.Linear(coeffs=slope) K = (η**2) * pm.gp.cov.ExpQuad(1, ρ) σ = pm.HalfNormal('σ', 50) deaths_gp = pm.gp.Marginal(mean_func=M, cov_func=K) deaths_gp.marginal_likelihood('deaths', X=cases_past2.reshape(-1,1), y=deaths_curr2, noise=σ) with gp_model: gp_trace = pm.sample(self.draws, tune=self.tune, cores=1, random_seed=random.randint(30, 80)) X_pred = np.arange(0, np.max(cases_past2)*5) with gp_model: deaths_pred = deaths_gp.conditional("deaths_pred_noise", X_pred.reshape(-1, 1), pred_noise=True) gp_samples = pm.sample_posterior_predictive(gp_trace, vars=[deaths_pred], samples=self.samples) quantile_gp = [np.percentile( gp_samples['deaths_pred_noise'] * self.scale_factor, q, axis=0) for q in quantiles] # We interpolate our predicted function X_pred2 = X_pred * self.scale_factor self.quantile_gp = [] for i in range(len(quantiles)): f = interp1d(X_pred2, quantile_gp[i], bounds_error=False, fill_value='extrapolate') self.quantile_gp.append(f)
def test_pairplot(): with pm.Model() as model: a = pm.Normal('a', shape=2) c = pm.HalfNormal('c', shape=2) b = pm.Normal('b', a, c, shape=2) d = pm.Normal('d', 100, 1) trace = pm.sample(1000) pairplot(trace) pairplot(trace, hexbin=True, plot_transformed=True) pairplot(trace, sub_varnames=['a_0', 'c_0', 'b_1'])
def test_exec_nuts_init(method): with pm.Model() as model: pm.Normal('a', mu=0, sd=1, shape=2) pm.HalfNormal('b', sd=1) with model: start, _ = pm.init_nuts(init=method, n_init=10) assert isinstance(start, dict) start, _ = pm.init_nuts(init=method, n_init=10, njobs=2) assert isinstance(start, list) assert len(start) == 2 assert isinstance(start[0], dict)
def fixture_model(): with pm.Model() as model: n = 5 dim = 4 with pm.Model(): cov = pm.InverseGamma("cov", alpha=1, beta=1) x = pm.Normal("x", mu=np.ones((dim,)), sigma=pm.math.sqrt(cov), shape=(n, dim)) eps = pm.HalfNormal("eps", np.ones((n, 1)), shape=(n, dim)) mu = pm.Deterministic("mu", at.sum(x + eps, axis=-1)) y = pm.Normal("y", mu=mu, sigma=1, shape=(n,)) return model, [cov, x, eps, y]
def run(self): coloredlogs.install() logging.info('Fetching some data') with dask.set_options(get=dask.multiprocessing.get): data = dask.dataframe.read_csv( '/tmp/split_data/{}/train/*.csv'.format(self.rand_round)) total_size = data.week_num.count().compute() nose.tools.assert_greater(total_size, 100, 'Not enought data!') unique_products = data['product_id'].unique().compute().astype( np.uint16) sample = data.head() logging.info('Got it!') product_id_var = theano.shared(value=sample.product_id.astype( 'category', categories=unique_products).cat.codes.values, name='product_id_var') adjusted_demand_var = theano.shared( value=sample.adjusted_demand.values, name='adjusted_demand_var') model = pm.Model() with model: product_category = pm.Uniform('cat', 0, 1, shape=(unique_products.shape[0], 5)) product_vecs = pm.Normal('vecs', 0, 100, shape=5) adjusted_demand_variance = pm.HalfNormal('demand_variance', 10) product_pred = T.dot(product_category[product_id_var], product_vecs) adjusted_demand = pm.Normal('adjusted_demand', product_pred, adjusted_demand_variance, observed=adjusted_demand_var) minibatches = map(self.expand_batch, self.minibatches(unique_products)) v_params = pm.variational.advi_minibatch( n=100, minibatch_tensors=[product_id_var, adjusted_demand_var], minibatch_RVs=[adjusted_demand], minibatches=minibatches, total_size=total_size, n_mcsamples=5, verbose=True) trace = pm.variational.sample_vp(v_params, draws=500) print(pm.summary(trace)) res = trace[-100:]['cat'].mean(0) self.output().makedirs() pandas.DataFrame(res, index=unique_products.values).to_msgpack( self.output().path)
def _build_model(self, x, y): """ """ if self.model is not None: raise Exception("Overwriting previous fit.") input_dim = x.shape[1] output_dim = y.shape[1] ann_input = theano.shared(x) ann_output = theano.shared(y) n_hidden = 3 with pm.Model() as neural_network: # Weights from input to hidden layer weights_in_1 = pm.Normal('w_in_1', 0, sd=1, shape=(input_dim, n_hidden)) #, testval=init_1) weights_b_1 = pm.Normal('w_b_1', 0, sd=1, shape=(n_hidden)) #, testval=init_b_1) # Weights from 1st to 2nd layer weights_1_2 = pm.Normal('w_1_2', 0, sd=1, shape=(n_hidden, n_hidden)) #, testval=init_2) weights_b_2 = pm.Normal('w_b_2', 0, sd=1, shape=(n_hidden)) #, testval=init_b_2) # Weights from hidden layer to output weights_2_out = pm.Normal('w_2_out', 0, sd=1, shape=(n_hidden, output_dim)) #, testval=init_out) weights_b_out = pm.Normal( 'w_b_out', 0, sd=1, shape=(output_dim)) #, testval=init_b_out) # Build neural-network using tanh activation function act_1 = pm.math.tanh( pm.math.dot(ann_input, weights_in_1) + weights_b_1) act_2 = pm.math.tanh(pm.math.dot(act_1, weights_1_2) + weights_b_2) act_out = pm.math.dot(act_2, weights_2_out) + weights_b_out variance = pm.HalfNormal('uncertainty', sigma=3.0) out = pm.Normal('out', mu=act_out, sigma=variance, observed=ann_output) self.model = neural_network
def bayesTest(mocktable, outname): import pymc3 as pymc from pymc3.backends import SQLite from collections import Counter idx = {} expr_vector = {} for line in open(mocktable): if line.startswith('Gene'): header = line.strip().split('\t') for i in range(len(header)): if header[i] != 'Gene': idx[header[i]] = i else: vals = line.strip().split('\t') gene = vals[0] for sample in idx: if sample not in expr_vector: expr_vector[sample] = [float(vals[idx[sample]])] else: expr_vector[sample].append(float(vals[idx[sample]])) for sample in expr_vector: if sample == 'Neurons': neuro = expr_vector[sample] if sample == 'Astrocytes': astro = expr_vector[sample] if sample == 'Oligodendrocytes': oligo = expr_vector[sample] if sample == 'Sample1': one = expr_vector[sample] if sample == 'Sample2': two = expr_vector[sample] if sample == 'Sample3': three = expr_vector[sample] samples = [one, two, three] for s in samples: model = pymc.Model() with pymc.Model() as model: beta = pymc.Dirichlet('beta', a=np.array([1.0, 1.0, 1.0])) sigma = pymc.HalfNormal('sigma', sd=1) y_est = beta[0] * neuro + beta[1] * astro + beta[2] * oligo likelihood = pymc.Normal('y', mu=y_est, sd=sigma, observed=s) trace = pymc.sample(1000, random_seed=123, progressbar=True) s = pymc.summary(trace) #print trace['beta'] #matrix with 3 columns and 1000 rows, need to convert this and do math neurons = trace['beta'][:, 0] astrocytes = trace['beta'][:, 1] oligodendrocytes = trace['beta'][:, 2] n_avg = np.mean(neurons) n_med = np.median(neurons) data = Counter(neurons) data.most_common() n_mode = data.most_common(1)[0][0] print n_avg, n_med, n_mode
def test_variable_type(self): with pm.Model() as model: mu = pm.HalfNormal("mu", 1) a = pm.Normal("a", mu=mu, sigma=2, observed=np.array([1, 2])) b = pm.Poisson("b", mu, observed=np.array([1, 2])) trace = pm.sample() with model: ppc = pm.sample_posterior_predictive(trace, samples=1) assert ppc["a"].dtype.kind == "f" assert ppc["b"].dtype.kind == "i"
def ruqaxik_pymc(ri, rnjml): if rnjml is None: return ri.tzij rjchnm = pm.HalfNormal(name='sg_' + str(ri), sd=10) # rujechunem chijun b = pm.Normal(name='junelïk_' + str(ri), mu=0, sd=100) return pm.Normal(name=str(ri), mu=rnjml + b, sd=rjchnm, observed=ri.tzij)
def case_count_model_us_states(df): # Normalize inputs in a way that is sensible: # People per test: normalize to South Korea # assuming S.K. testing is "saturated" ppt_sk = np.log10(51500000. / 250000) df['people_per_test_normalized'] = ( np.log10(df['people_per_test_7_days_ago']) - ppt_sk) n = len(df) # For each country, let: # c_obs = number of observed cases c_obs = df['num_pos_7_days_ago'].values # c_star = number of true cases # d_obs = number of observed deaths d_obs = df[['death', 'num_pos_7_days_ago']].min(axis=1).values # people per test people_per_test = df['people_per_test_normalized'].values covid_case_count_model = pm.Model() with covid_case_count_model: # Priors: mu_0 = pm.Beta('mu_0', alpha=1, beta=100, testval=0.01) # sig_0 = pm.Uniform('sig_0', lower=0.0, upper=mu_0 * (1 - mu_0)) alpha = pm.Bound(pm.Normal, lower=0.0)( 'alpha', mu=8, sigma=3, shape=1) beta = pm.Bound(pm.Normal, upper=0.0)( 'beta', mu=-1, sigma=1, shape=1) # beta = pm.Normal('beta', mu=0, sigma=1, shape=3) sigma = pm.HalfNormal('sigma', sigma=0.5, testval=0.1) # sigma_1 = pm.HalfNormal('sigma_1', sigma=2, testval=0.1) # Model probability of case under-reporting as logistic regression: mu_model_logit = alpha + beta * people_per_test tau_logit = pm.Normal('tau_logit', mu=mu_model_logit, sigma=sigma, shape=n) tau = np.exp(tau_logit) / (np.exp(tau_logit) + 1) c_star = c_obs / tau # Binomial likelihood: d = pm.Binomial('d', n=c_star, p=mu_0, observed=d_obs) return covid_case_count_model
def estimate_student(normalized_ranks): """This fits a PyMC3 model. All the model does is fit the parameters for t distribution, since it is clear (in the authors opinion) that the logit-transformed ranks are very well described by a t distribution. The logit ranks are thus the observations, and the model finds the ranges of parameters consistent with those obs.""" with pm.Model() as model: nu = pm.HalfNormal('nu', 50) #very broad priors mu = pm.Normal('mu', mu=0, sigma=50) #very broad priors sigma = pm.HalfNormal('sig', 50) #very broad priors lik = pm.StudentT('t', nu=nu, mu=mu, sigma=sigma, observed=logit(normalized_ranks)) trace = pm.sample(1000, tune=1000) return trace, model
def graded_response_model(dataset, n_categories): """Defines the mcmc model for the graded response model. Args: dataset: [n_items, n_participants] 2d array of measured responses n_categories: number of polytomous values (i.e. Number of Likert Levels) Returns: model: PyMC3 model to run """ n_items, n_people = dataset.shape n_levels = n_categories - 1 # Need small deviation in offset to # fit into pymc framework mu_value = linspace(-0.1, 0.1, n_levels) # Run through 0, K - 1 observed = dataset - dataset.min() graded_mcmc_model = pm.Model() with graded_mcmc_model: # Ability Parameters ability = pm.Normal("Ability", mu=0, sigma=1, shape=n_people) # Discrimination multilevel prior rayleigh_scale = pm.Lognormal("Rayleigh_Scale", mu=0, sigma=1 / 4, shape=1) discrimination = pm.Bound(Rayleigh, lower=0.25)(name='Discrimination', beta=rayleigh_scale, offset=0.25, shape=n_items) # Threshold multilevel prior sigma_difficulty = pm.HalfNormal('Difficulty_SD', sigma=1, shape=1) for ndx in range(n_items): thresholds = pm.Normal( f"Thresholds{ndx}", mu=mu_value, sigma=sigma_difficulty, shape=n_levels, transform=pm.distributions.transforms.ordered) # Compute the log likelihood kernel = discrimination[ndx] * ability probabilities = pm.OrderedLogistic(f'Log_Likelihood{ndx}', cutpoints=thresholds, eta=kernel, observed=observed[ndx]) return graded_mcmc_model
def setup_class(self): super().setup_class() self.data = np.sort(np.random.normal(loc=0, scale=1, size=1000)) def normal_sim(a, b): return np.sort(np.random.normal(a, b, 1000)) with pm.Model() as self.SMABC_test: a = pm.Normal("a", mu=0, sd=5) b = pm.HalfNormal("b", sd=2) s = pm.Simulator("s", normal_sim, observed=self.data)
def infer_with_pymc3(self, n_iteration): with pm.Model() as linreg: a = pm.Normal('a', mu=0, sd=100) b = pm.Normal('b', mu=0, sd=100) sigma = pm.HalfNormal('sigma', sd=1) # http://docs.pymc.io/api/distributions/continuous.html#pymc3.distributions.continuous.HalfNormal y_est = a * self.x + b likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=self.y) self.trace = pm.sample(n_iteration, random_seed=123)
def from_posterior(param, samples, distribution=None, half=False, freedom=10): if len(samples.shape) > 1: shape = samples.shape[1:] else: shape = None if (distribution is None): smin, smax = np.min(samples), np.max(samples) width = smax - smin x = np.linspace(smin, smax, 1000) y = stats.gaussian_kde(samples)(x) if half: x = np.concatenate([x, [x[-1] + 0.1 * width]]) y = np.concatenate([y, [0]]) else: x = np.concatenate([[x[0] - 0.1 * width], x, [x[-1] + 0.1 * width]]) y = np.concatenate([[0], y, [0]]) return pm.distributions.Interpolated(param, x, y) elif (distribution == 'normal'): temp = stats.norm.fit(samples) if shape is None: return pm.Normal(param, mu=temp[0], sigma=freedom * temp[1]) else: return pm.Normal(param, mu=temp[0], sigma=freedom * temp[1], shape=shape) elif (distribution == 'hnormal'): temp = stats.halfnorm.fit(samples) if shape is None: return pm.HalfNormal(param, sigma=freedom * temp[1]) else: return pm.HalfNormal(param, sigma=freedom * temp[1], shape=shape) elif (distribution == 'hcauchy'): temp = stats.halfcauchy.fit(samples) if shape is None: return pm.HalfCauchy(param, freedom * temp[1]) else: return pm.HalfCauchy(param, freedom * temp[1], shape=shape)
def setup_class(self): super().setup_class() self.data = np.random.normal(loc=0, scale=1, size=1000) def normal_sim(a, b): return np.random.normal(a, b, 1000) with pm.Model() as self.SMABC_test: a = pm.Normal("a", mu=0, sigma=1) b = pm.HalfNormal("b", sigma=1) s = pm.Simulator( "s", normal_sim, params=(a, b), sum_stat="sort", epsilon=1, observed=self.data ) self.s = s def quantiles(x): return np.quantile(x, [0.25, 0.5, 0.75]) def abs_diff(eps, obs_data, sim_data): return np.mean(np.abs((obs_data - sim_data) / eps)) with pm.Model() as self.SMABC_test2: a = pm.Normal("a", mu=0, sigma=1) b = pm.HalfNormal("b", sigma=1) s = pm.Simulator( "s", normal_sim, params=(a, b), distance=abs_diff, sum_stat=quantiles, epsilon=1, observed=self.data, ) with pm.Model() as self.SMABC_potential: a = pm.Normal("a", mu=0, sigma=1) b = pm.HalfNormal("b", sigma=1) c = pm.Potential("c", pm.math.switch(a > 0, 0, -np.inf)) s = pm.Simulator( "s", normal_sim, params=(a, b), sum_stat="sort", epsilon=1, observed=self.data )
def FitMyModel(trainDM,PredDM): with pm.Model() as model: # partition dataframes df Ydf = trainDM[0] TXdf = trainDM[1] PXdf = PredDM ## Parameters for linear predictor #b0 = pm.Normal('b0',mu=0,sd=10) #dum_names = filter(lambda col : str(col).startswith('inegiv5name'),TXdf) #dumsdf = TXdf[dum_names] #dumshape = dumscols.shape #coordsdf = TXdf[['Longitude','Latitude']] # Create vectors for dumi vars #drvs = map(lambda col : pm.Normal(col,mu=0,sd=1.5),dum_names) ## Create theano vector dimX = len(TXdf.columns) b = pm.Normal('b',mu=0,sd=1.5,shape=dimX) #mk = pm.math.matrix_dot(TXdf.values,b.transpose()) ## The latent function x_index = TXdf.columns.get_loc("Longitude") y_index = TXdf.columns.get_loc("Latitude") ## Building the covariance structure tau = pm.HalfNormal('tau',sd=10) sigma = pm.HalfNormal('sigma',sd=10) #phi = pm.Uniform('phi',0,15) phi = pm.HalfNormal('phi',sd=6) Tau = pm.gp.cov.Constant(tau) cov = (sigma * pm.gp.cov.Matern32(2,phi,active_dims=[x_index,y_index])) + Tau mean_f = pm.gp.mean.Linear(coeffs=b) gp = pm.gp.Latent(mean_func=mean_f,cov_func=cov) f = gp.prior("latent_field", X=TXdf.values,reparameterize=False) yy = pm.Bernoulli("yy",logit_p=f,observed=Ydf.values) #trace = pm.fit(method='advi', callbacks=[CheckParametersConvergence()],n=15000) trace = pm.sample(15,init='adapt_diag') #trace = trace.sample(draws=5000) # Remove any column that doesnt appear in the training data ValidPreds = PredDM[TXdf.columns] PredX = ValidPreds.values f_star = gp.conditional("f_star", PredX) pred_samples = pm.sample_ppc(trace, vars=[f_star], samples=100) return pred_samples,trace
def create_model(self): """ Creates and returns the PyMC3 model. Note: The size of the shared variables must match the size of the training data. Otherwise, setting the shared variables later will raise an error. See http://docs.pymc.io/advanced_theano.html Returns ------- the PyMC3 model """ model_input = theano.shared(np.zeros([self.num_training_samples, self.num_pred])) model_output = theano.shared(np.zeros(self.num_training_samples, dtype='int')) model_cats = theano.shared(np.zeros(self.num_training_samples, dtype='int')) self.shared_vars = { 'model_input': model_input, 'model_output': model_output, 'model_cats': model_cats } model = pm.Model() with model: mu_alpha = pm.Normal('mu_alpha', mu=0, sd=100) sigma_alpha = pm.HalfNormal('sigma_alpha', sd=100) mu_beta = pm.Normal('mu_beta', mu=0, sd=100) sigma_beta = pm.HalfNormal('sigma_beta', sd=100) alpha = pm.Normal('alpha', mu=mu_alpha, sd=sigma_alpha, shape=(self.num_cats,)) betas = pm.Normal('beta', mu=mu_beta, sd=sigma_beta, shape=(self.num_cats, self.num_pred)) c = model_cats temp = alpha[c] + T.sum(betas[c] * model_input, 1) p = pm.invlogit(temp) o = pm.Bernoulli('o', p, observed=model_output) return model
def fit_refractory_minus_duration(): sample_data = pd.read_pickle( '../data/raw/refractory_prior_samples.pkl')['samples'].values with pm.Model() as model: a = pm.HalfNormal('a', 100 * 10) b = pm.HalfNormal('b', 100 * 10) pm.Wald('prior', mu=a, lam=b, observed=sample_data) trace = pm.sample(2000, njobs=1) summary_df = pm.summary(trace) a_est = summary_df.loc['a', 'mean'] b_est = summary_df.loc['b', 'mean'] n_samples = 10000 with pm.Model() as model: pm.Wald('prior_check', mu=a_est, lam=b_est) outcome = pm.sample(n_samples, njobs=1, nchains=1) samples = outcome['prior_check'] sns.distplot(samples, kde=True) sns.distplot(sample_data, kde=True) plt.show() print(summary_df)