def fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs): callbacks = vi_params.get("callbacks", []) for i, c in enumerate(callbacks): if isinstance(c, CheckParametersConvergence): params = c.__dict__ params.pop("_diff") params.pop("prev") params.pop("ord") params["diff"] = "absolute" callbacks[i] = CheckParametersConvergence(**params) if sampler == "variational": with self.model: try: self.trace_ = pm.sample(chains=2, cores=8, tune=5, draws=5) vi_params["start"] = self.trace_[-1] self.trace_vi_ = pm.fit(**vi_params) self.trace_ = self.trace_vi_.sample(draws=draws) except Exception as e: if hasattr(e, "message"): message = e.message else: message = e logger.error(message) self.trace_vi_ = None if self.trace_vi_ is None and self.trace_ is None: with self.model: logger.info( "Error in vi ADVI sampler using Metropolis sampler with draws {}" .format(draws)) self.trace = pm.sample(chains=1, cores=4, tune=20, draws=20, step=pm.NUTS()) elif sampler == "metropolis": with self.model: start = pm.find_MAP() self.trace_ = pm.sample( chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.Metropolis(), start=start, ) else: with self.model: self.trace_ = pm.sample(chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.NUTS())
def get_step_for_trace(trace=None, model=None, regularize=True, regular_window=5, regular_variance=1e-3, **kwargs): """ Define a tuning procedure that adapts off-diagonal mass matrix terms adapted from a blog post by Dan Foreman-Mackey here: https://dfm.io/posts/pymc3-mass-matrix/ Args: trace (trace): pymc3 trace object model (model): pymc3 model object regularize (bool): flag to turn on covariance matrix regularization regular_window (int): size of parameter space at which regularization becomes important regular_variance (float): magnitude of covariance floor Returns: pymc3 step_methods object """ model = pm.modelcontext(model) # If not given, use the trivial metric if trace is None: potential = pm.step_methods.hmc.quadpotential.QuadPotentialFull(np.eye(model.ndim)) return pm.NUTS(potential=potential, **kwargs) # Loop over samples and convert to the relevant parameter space # while removing divergent samples div_mask = np.invert(np.copy(trace.diverging)) samples = np.empty((div_mask.sum() * trace.nchains, model.ndim)) i = 0 imask = 0 for chain in trace._straces.values(): for p in chain: if div_mask[imask]: samples[i] = model.bijection.map(p) i += 1 imask += 1 # Compute the sample covariance cov = np.cov(samples, rowvar=0) # Stan uses a regularized estimator for the covariance matrix to # be less sensitive to numerical issues for large parameter spaces. if regularize: N = len(samples) cov = cov * N / (N + regular_window) cov[np.diag_indices_from(cov)] += regular_variance * regular_window / (N + regular_window) # Use the sample covariance as the inverse metric potential = pm.step_methods.hmc.quadpotential.QuadPotentialFull(cov) return pm.NUTS(potential=potential, **kwargs)
def fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs): callbacks = vi_params.get('callbacks', []) for i, c in enumerate(callbacks): if isinstance(c, CheckParametersConvergence): params = c.__dict__ params.pop('_diff') params.pop('prev') params.pop('ord') params['diff'] = 'absolute' callbacks[i] = CheckParametersConvergence(**params) if sampler == 'variational': with self.model: try: self.trace = pm.sample(chains=2, cores=8, tune=5, draws=5) vi_params['start'] = self.trace[-1] self.trace_vi = pm.fit(**vi_params) self.trace = self.trace_vi.sample(draws=draws) except Exception as e: if hasattr(e, 'message'): message = e.message else: message = e self.logger.error(message) self.trace_vi = None if self.trace_vi is None and self.trace is None: with self.model: self.logger.info( "Error in vi ADVI sampler using Metropolis sampler with draws {}" .format(draws)) self.trace = pm.sample(chains=1, cores=4, tune=20, draws=20, step=pm.NUTS()) elif sampler == 'metropolis': with self.model: start = pm.find_MAP() self.trace = pm.sample(chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.Metropolis(), start=start) else: with self.model: self.trace = pm.sample(chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.NUTS())
def get_step_for_trace(init_cov=None, trace=None, model=None, regularize_cov=True, regular_window=5, regular_variance=1e-3, **kwargs): """ Construct an estimate of the mass matrix based on the sample covariance, which is either provided directly via `init_cov` or generated from a `MultiTrace` object from PyMC3. This is then used to initialize a `NUTS` object to use in `sample`. """ model = pm.modelcontext(model) # If no trace or covariance is provided, just use the identity. if trace is None and init_cov is None: potential = QuadPotentialFull(np.eye(model.ndim)) return pm.NUTS(potential=potential, **kwargs) # If the trace is provided, loop over samples # and convert to the relevant parameter space. if trace is not None: samples = np.empty((len(trace) * trace.nchains, model.ndim)) i = 0 for chain in trace._straces.values(): for p in chain: samples[i] = model.bijection.map(p) i += 1 # Compute the sample covariance. cov = np.cov(samples, rowvar=False) # Stan uses a regularized estimator for the covariance matrix to # be less sensitive to numerical issues for large parameter spaces. if regularize_cov: N = len(samples) cov = cov * N / (N + regular_window) diags = np.diag_indices_from(cov) cov[diags] += ((regular_variance * regular_window) / (N + regular_window)) else: # Otherwise, just copy `init_cov`. cov = np.array(init_cov) # Use the sample covariance as the inverse metric. potential = QuadPotentialFull(cov) return pm.NUTS(potential=potential, **kwargs)
def test_explicit_sample(): with pm.Model() as model: a = pm.Normal('a', shape=1) pm.HalfNormal('b') step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) proc = ps.ProcessAdapter(10, 10, step, chain=3, seed=1, start={ 'a': 1., 'b_log__': 2. }) proc.start() while True: proc.write_next() out = ps.ProcessAdapter.recv_draw([proc]) view = proc.shared_point_view for name in view: view[name].copy() if out[1]: break proc.join()
def sample_posterior(x, y, n_samples=1000, random_seed=0): ''' A general linear model. Paramters --------- x: A numpy array y: A numpy array n_samples: The number of samples to draw in pymc3.sample(). Defaults to 1000. random_seed: An int. Used in pymc3.sample(). Defaults to 0. Returns ------- A pymc3.MultiTrace object with access to sampling values. ''' df = pd.DataFrame({'x': x, 'y': y}) #Create Bayesian linear model with pm.Model() as model_glm: family = pm.glm.families.Normal() pm.glm.glm('y ~ x', df, family=family) #Estimates model parameters start = pm.find_MAP() #generate posterior samples step = pm.NUTS() trace = pm.sample(n_samples, step=step, start=start, progressbar=True, random_seed=random_seed) return trace
def learn_bayesian_linear_model(self, encoded_plans, prior_weights, number_of_dimensions, sd=1, sampling_count=2000, num_chains=3, bias_preference=0, uninformative_prior_var=None): #TODO NOTE EVEN WITHOUT PRIOR WEIGHTS ARE NOT CURRENTLY USED, and works just as well #the encoded plans contains a list of [<encoding>,<rating>] input_dataset = np.array([x[0] for x in encoded_plans], dtype=np.float) output_dataset = np.array([x[1] for x in encoded_plans], dtype=np.float) bias_preference = tt.constant(bias_preference) #todo Make bias A learnable parameter with pm.Model() as linear_model: # Intercept # alpha = pm.Normal('alpha', mu=0.0, sd=sd) alpha = pm.Deterministic('alpha', bias_preference) #todo add support to have the covariance of unknown features to be much larger ? SD = 1.0 is enough !! # Slope # prior_weights = np.random.rand(number_of_dimensions) betas = pm.MvNormal('betas', mu=prior_weights, cov=uninformative_prior_var, shape=(number_of_dimensions, )) # Standard deviation sigma = pm.HalfNormal('sigma', sd=sd) # sigma = sd #unfair knowledge # Estimate of mean mean = alpha + tt.dot(input_dataset, betas) # Observed values Y_obs = pm.Normal('Y_obs', mu=mean, sd=sigma, observed=output_dataset) # Sampler step = pm.NUTS() # step = pm.Metropolis() # step = pm.HamiltonianMC() # Posterior distribution linear_params_trace = pm.sample(sampling_count, step, chains=num_chains, cores=num_chains) #todo NOTE do not add tuning if deterministic. Fails spectacularly, not it's intended use. #todo note: may consider making mu and cov as parameters sampled from distributions too # mu = pm.MvNormal('mu', mu=prior_weights, cov=cov, shape=(number_of_dimensions,)) #end with # todo look into the aplha values that were sampled, because they didn't appear in the plot self.full_param_trace = linear_params_trace # we only take the last 2000, and assume it is after sufficient mixing and good values. #TODO THIS IS ONLY FROM ONE CHAIN, there is a function called trace.getValues, that lets you get values from each chain. Then we mix it. self.linear_params_values = linear_params_trace[ -2000:] # we only take the last 2000, and assume it is after sufficient mixing and good values. self.set_normal_distr_params(num_chains=num_chains, num_last_samples=None)
def test_abort(): with pm.Model() as model: a = pm.Normal("a", shape=1) pm.HalfNormal("b") step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) ctx = multiprocessing.get_context() proc = ps.ProcessAdapter( 10, 10, step, chain=3, seed=1, mp_ctx=ctx, start={"a": 1.0, "b_log__": 2.0}, step_method_pickled=None, pickle_backend="pickle", ) proc.start() proc.write_next() proc.abort() proc.join()
def sample1000(): print('----------010') # 単純なガウス分布の平均パラメータの推定 with pm.Model() as model_10: mu = pm.Normal('mu', mu=0., sd=0.1) print('===001') print(mu) x = pm.Normal('x', mu=mu, sd=1., observed=x_sample_1000) print('===002') print(x) with model_10: # サンプリングの初期値として、MAP推定の結果を利用 start = pm.find_MAP() print('===003') print(start) # No-U-Turn Sampler の略。サンプリング手法。 step = pm.NUTS() print('===004') print(step) # 100イテレーション trace = pm.sample(100, step, start) print('===005') print(trace) print('===006') print(pm.traceplot(trace)) print(pm.summary(trace).round(2)) plt.savefig('result_1000')
def test_l2hmc_matches_leapfrog(): with pm.Model(): x = pm.Normal('x', 0, 1) y = pm.Normal('y', x, 1) step = pm.NUTS() q_func, p_func = default_aux_functions() l2hmc_integrator = L2HMCLeapfrogIntegrator(step.potential, step._logp_dlogp_func, q_func=q_func, p_func=p_func) hmc_integrator = pm.step_methods.hmc.integration.CpuLeapfrogIntegrator( step.potential, step._logp_dlogp_func) points = [] p0 = step.potential.random() for integrator in (l2hmc_integrator, hmc_integrator): point = {'x': np.array([1.]), 'y': np.array([1.])} integrator._logp_dlogp_func.set_extra_values(point) q0 = integrator._logp_dlogp_func.dict_to_array(point) state = integrator.compute_state(q0, p0) points.append(integrator._step(0.1, state)) l2hmc_state, hmc_state = points npt.assert_array_almost_equal(l2hmc_state.q, hmc_state.q) npt.assert_array_almost_equal(l2hmc_state.p, hmc_state.p) npt.assert_array_almost_equal(l2hmc_state.v, hmc_state.v) npt.assert_array_almost_equal(l2hmc_state.q_grad, hmc_state.q_grad) assert l2hmc_state.energy == hmc_state.energy
def _estimate_model(self): #If user provides model, use that. Otherwise, create default Bayesian Model self.x_shared = theano.shared(self.x_train.values) if self.model_provided is not None: self.model = self.model_provided else: self.model = pymc3.Model() with self.model: # Priors for unknown model parameters alpha = pymc3.Normal('alpha', mu=0, sd=1) beta = pymc3.Normal('beta', mu=0, sd=1, shape=self.number_feat) sigma = pymc3.HalfNormal('sigma', sd=1) # Expected value of outcome #mu = alpha + x_shared[:,0]*beta[0] + x_shared[:,1]*beta[1] + x_shared[:,2]*beta[2] + x_shared[:,3]*beta[3] + x_shared[:,4]*beta[4] + x_shared[:,5]*beta[5] + x_shared[:,6]*beta[6] + x_shared[:,7]*beta[7] mu = alpha + theano.tensor.dot(self.x_shared, beta) # Likelihood (sampling distribution) of observations Y_obs = pymc3.Normal('Y_obs', mu=mu, sd=sigma, observed=self.y_train.values) with self.model: self.start = pymc3.find_MAP(fmin=scipy.optimize.fmin_powell) step = pymc3.NUTS(scaling=self.start) self.trace = pymc3.sample(self.niter, step) return self.model
def linear_posterior(X, y, n_samples=1000, random_seed=0): """ A general linear model. Paramters --------- X: A numpy array y: A numpy array n_samples: The number of samples to draw in pymc3.sample(). Defaults to 1000. random_seed: An int. Used in pymc3.sample(). Defaults to 0. Returns ------- A pymc3.MultiTrace object with access to sampling values. """ df = {'x': X, 'y': y} with pm.Model() as model_glm: pm.glm.glm('y ~ x', df, family=pm.glm.families.StudentT()) start = pm.find_MAP() step = pm.NUTS(scaling=start) trace = pm.sample(n_samples, start=start, step=step, model=model_glm, random_seed=random_seed, progressbar=True) return trace
def get_trace(X, y, n_samples=1000, random_seed=0): ''' A simple Bayesian linear regression model with normal priors. Paramters --------- X: A numpy array y: A numpy array n_samples: The number of samples to draw in pymc3.sample(). Defaults to 1000. random_seed: An int. Used in pymc3.sample(). Defaults to 0. Returns ------- A pymc3.MultiTrace object with access to sampling values. ''' #Create linear model with alpha, beta, and sigma defined as above with pm.Model() as linear_model: alpha = pm.Normal('alpha', mu = 0, sd = 1.0) beta = pm.Normal('beta', mu = 10, sd = 1.0) sigma = pm.Uniform('sigma', lower = 0, upper = 100) y_exp = alpha+beta*X likelihood = pm.Normal('y', mu=y_exp, sd=sigma, observed=y) #Estimates model parameters start = pm.find_MAP() #generate posterior samples step = pm.NUTS(scaling=start) trace = pm.sample(n_samples, step=step, start=start, progressbar=True, random_seed=random_seed) return trace
def get_trace(X, y, n_samples=1000, random_seed=0): """ A simple Bayesian linear regression model with normal priors. Paramters --------- X: A numpy array y: A numpy array n_samples: The number of samples to draw in pymc3.sample(). Defaults to 1000. random_seed: An int. Used in pymc3.sample(). Defaults to 0. Returns ------- A pymc3.MultiTrace object with access to sampling values. """ with pm.Model() as linear_model: alpha = pm.Normal('alpha', mu=0.0, sd=1.0) beta = pm.Normal('beta', mu=10.0, sd=1.0) sigma = pm.Uniform('sigma', lower=0, upper=100) mu = alpha + beta * X y = pm.Normal('y', mu=mu, sd=sigma, observed=y) start = pm.find_MAP() step = pm.NUTS(scaling=start) trace = pm.sample(n_samples, start=start, step=step, model=linear_model, random_seed=random_seed, progressbar=True) return trace
def fit(x,y,meanVec,stdVec,errors): aMu,bMu,cMu = meanVec aStd,bStd,cStd = stdVec model = pm.Model() if False: df = pd.DataFrame(np.transpose([x,y,errors]),columns=['x','y','error']) print df with model: # Priors for unknown model parameters a = pm.Normal('a', mu=aMu, sd=aStd) b = pm.Normal('b', mu=bMu, sd=bStd) c = pm.Normal('c', mu=cMu, sd=cStd) # Expected value of outcome mu = Model(x,a,b,c) # Likelihood (sampling distribution) of observations Like = pm.Normal('Like', mu=mu, sd=errors, observed=y) # do sampling trace = pm.sample(1000,progressbar=False,init='ADVI',step = pm.NUTS(),njobs=1) # give summary summary = pm.df_summary(trace) return summary
def fit(x,y,errors,signA): model = pm.Model() if False: df = pd.DataFrame(np.transpose([x,y,errors]),columns=['x','y','error']) print df with model: # Priors for unknown model parameters LowerA = 0. UpperA = 0.1 if signA == -1.0: UpperA = 0. LowerA = -0.1 a = pm.Uniform('a', lower=LowerA, upper=UpperA) b = pm.Uniform('b', lower=0., upper=1.0) c = pm.Uniform('c', lower=0., upper=1.0) # Expected value of outcome mu = Model(x,a,b,c) # Likelihood (sampling distribution) of observations Like = pm.Normal('Like', mu=mu, sd=errors, observed=y) # do sampling trace = pm.sample(1000,progressbar=False,init='ADVI',step = pm.NUTS(),njobs=1) # give summary summary = pm.df_summary(trace) return summary
def glm_mcmc_inference(df, formula, family, I): """ Calculates the Markov Chain Monte Carlo trace of a Generalised Linear Model Bayesian linear regression model on supplied data. df: DataFrame containing the data formula: Regressing equation in terms of columns of DataFrame df family: Type of liner model. Takes a pymc object (pm.glm.families). I: Number of iterations for MCMC """ if family.lower() == 'normal': family_object = pm.glm.families.Normal() elif family.lower() == 'logistic': family_object = pm.glm.families.Binomial() elif family.lower() == 'poisson': family_object = pm.glm.families.Poisson() else: print("Family {} is not a supported family".format(family)) raise(NameError("Invalid family")) # Use PyMC3 to construct a model context basic_model = pm.Model() with basic_model: # Create the glm using the Patsy model syntax pm.glm.GLM.from_formula(str(formula), df.dropna(), family=family_object) step = pm.NUTS() trace = pm.sample(I, step, progressbar=False, tune=50) return(trace)
def samplePosterior(model, N, fit_intercept=False, fit_slope=True): """ Monte Carlo for the posterior. Sample posterior predictive """ RANDOM_SEED = 58 with model: step = pm.NUTS() trace = pm.sample(N, step) if fit_intercept and not fit_slope: var_names = ["Intercept", "Y_obs"] summary_names = ["Intercept"] elif not fit_intercept and fit_slope: var_names = ["slope", "Y_obs"] summary_names = ["slope"] else: var_names = ["Intercept", "slope", "Y_obs"] summary_names = ["Intercept", "slope"] ppc = pm.sample_posterior_predictive(trace, var_names=var_names, random_seed=RANDOM_SEED) summary = az.summary(trace, var_names=summary_names, round_to=3) print(summary) params = {} for name in summary_names: params[name] = {} params[name]['hpd_3%'] = summary['hpd_3%'][name] params[name]['hpd_mean'] = summary['mean'][name] params[name]['hpd_97%'] = summary['hpd_97%'][name] return params, ppc['Y_obs']
def model_returns_normal(data, samples=500): """ Run Bayesian model assuming returns are normally distributed. Parameters ---------- returns : pandas.Series Series of simple returns of an algorithm or stock. samples : int (optional) Number of posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. """ with pm.Model() as model: mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean()) sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std()) returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data) pm.Deterministic('annual volatility', returns.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'sharpe', returns.distribution.mean / returns.distribution.variance**.5 * np.sqrt(252)) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) step = pm.NUTS(scaling=start) trace = pm.sample(samples, step, start=start) return model, trace
def posterior_distribution(X, y, N): reg = LinearRegression(fit_intercept=True).fit(X.values.reshape(-1, 1), y.values) # Set up with pm.Model() as model: # Intercept intercept = pm.Normal('Intercept', mu=.5, sd=1) # sd = 25 # Slope slope = pm.Normal('slope', mu=float(2 * reg.coef_), sd=1) # sd = 1 # Standard Deviation sigma = pm.HalfNormal('sigma', sd=1) # sd = 25 # Estimate of Mean mean = intercept + (slope * X.values) # Observed Values Y_obs = pm.Normal('Y_obs', mu=mean, sd=sigma, observed=y.values) # Sampler step = pm.NUTS() # Posterior distribution return pm.sample(N, step)
def bayes_linregress(y, x=None, nsamples=1000, showtrace=False, ): """ Linear regression. Regress y onto x (or linspace(0,1,len(y)) if None). """ with pm.Model() as model: a = pm.Normal('a', mu=0, sd=20) b = pm.Normal('b', mu=0, sd=20) sigma = pm.Uniform('sigma', lower=0, upper=20) # y_estimate y_est = a*x + b likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=y) # Inference and MCMC sampling start = pm.find_MAP() # Max a post. inference step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler trace = pm.sample(nsamples, step, start, random_seed=123, progressbar=True) # Show/print the results if showtrace: pm.traceplot(trace) plt.show() print('\n') # Report the findings for obj in trace.varnames: print('%s: %.3f +/- %.3f' %(obj, np.mean(trace[obj]), np.std(trace[obj]))) return trace
def train_BLM_model(X, y, random_seed = None): if random_seed is not None: np.random.seed(random_seed) with pm.Model() as model: # Intercept alpha = pm.Normal('alpha', mu = 0, sd = sd_for_priors) mu = np.zeros(number_of_dimensions) cov = np.diag(np.full(number_of_dimensions, sd_for_priors)) # Slope betas = pm.MvNormal('betas', mu=mu, cov=cov, shape=(number_of_dimensions,)) # Standard deviation sigma = pm.HalfNormal('sigma', sd = sd_for_priors) # Estimate of mean mean = alpha + tt.dot(X, betas) # Observed values Y_obs = pm.Normal('Y_obs', mu = mean, sd = sigma, observed = y) # Sampler step = pm.NUTS() # Posterior distribution linear_trace = pm.sample(no_of_samples, step, chains=chains) return linear_trace
def test_explicit_sample(): with pm.Model() as model: a = pm.Normal("a", shape=1) pm.HalfNormal("b") step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) ctx = multiprocessing.get_context() proc = ps.ProcessAdapter( 10, 10, step, chain=3, seed=1, mp_ctx=ctx, start={"a": 1.0, "b_log__": 2.0}, step_method_pickled=None, pickle_backend="pickle", ) proc.start() while True: proc.write_next() out = ps.ProcessAdapter.recv_draw([proc]) view = proc.shared_point_view for name in view: view[name].copy() if out[1]: break proc.join()
def halo_posteriors(n_halos_in_sky, galaxy_data,samples = 5e5, burn_in = 500): #set the size of the halo's mass with pm.Model() as model: mass_large = pm.Uniform("mass_large", 40, 180) mass_small_1 = 20 mass_small_2 = 20 masses = np.array([mass_large,mass_small_1, mass_small_2], dtype=object) #set the initial prior positions of the halos, it's a 2-d Uniform dist. halo_positions = pm.Uniform("halo_positions", 0, 4200, shape=(n_halos_in_sky,2)) #notice this size fdist_constants = np.array([240, 70, 70]) _sum = 0 for i in range(n_halos_in_sky): _sum += masses[i]/f_distance(data[:,:2], halo_positions[i, :], fdist_constants[i])*\ tangential_distance(data[:,:2], halo_positions[i, :]) mean = pm.Deterministic("mean", _sum) ellpty = pm.Normal("ellipcity", mu=mean, tau=1./0.05, observed=data[:,2:]) mu, sds, elbo = pm.variational.advi(n=50000) step = pm.NUTS(scaling=model.dict_to_array(sds), is_cov=True) trace = pm.sample(samples, step=step, start=mu) burned_trace = trace[burn_in:] return burned_trace["halo_positions"]
def test_nuts_tuning(): model = pymc3.Model() with model: mu = pymc3.Normal("mu", mu=0, sd=1) step = pymc3.NUTS() trace = pymc3.sample(10, step=step, tune=5, progressbar=False) assert not step.tune
def fit(x, y, lowerVec, upperVec): lA, lB, lC = lowerVec uA, uB, uC = upperVec model = pm.Model() with model: # Priors for unknown model parameters a = pm.Uniform('a', lower=lA, upper=uA) b = pm.Uniform('b', lower=lB, upper=uB) c = pm.Uniform('c', lower=lC, upper=uC) # Expected value of outcome mu = Model(x, a, b, c) # Likelihood (sampling distribution) of observations Like = pm.Normal('Like', mu=mu, sd=0.1 * np.ones_like(y), observed=y) # do sampling trace = pm.sample(1000, progressbar=False, init='ADVI', step=pm.NUTS(), njobs=1) # give summary summary = pm.df_summary(trace) return summary
def simple_model_error_dist(self, ymincentroid): import pymc3 as pm #import seaborn as sns #f, ax = pyplot.subplots(figsize=(6, 6)) #sns.distplot(ymincentroid) #sns.kdeplot(ymincentroid, ax=ax, shade=True, color="g") #sns.rugplot(ymincentroid, color="black", ax=ax) #ax.set(xlabel= "Peak Minima Magnitude", ylabel= "Density") #pyplot.show() with pm.Model() as model: #mu = pm.Uniform('mu', lower=-1, upper=1) lower = ymincentroid.min() upper = ymincentroid.max() sd = pm.Uniform('sd', lower=lower, upper=upper) y = pm.HalfNormal('y', sd=sd, observed=ymincentroid) start = pm.find_MAP() step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler trace = pm.sample(1000, step, start, random_seed=123, progressbar=True, tune=1000) print(pm.summary(trace)) return pm.summary(trace)['mean'].values[0]
def fitFlat(x, y): model = pm.Model() with model: # Priors for unknown model parameters a = pm.Flat('a') b = pm.Flat('b') c = pm.Flat('c') # Expected value of outcome mu = Model(x, a, b, c) # Likelihood (sampling distribution) of observations Like = pm.Normal('Like', mu=mu, sd=0.01 * np.ones_like(y), observed=y) # do sampling trace = pm.sample(1000, progressbar=False, init='ADVI', step=pm.NUTS(), njobs=1) # give summary summary = pm.df_summary(trace) return summary
def cgpt(y, mdl, idx): timesteps = range(len(y)) exp_scale = y.mean() with mdl: # Exponential priors lambda_1 = lambdas[idx] # pm.Exponential('lambda_1', lam=1 / exp_scale) lambda_2 = lambdas[idx + 1 ] #pm.Exponential('lambda_2', lam=1 / exp_scale) lambda_diff = pm.Deterministic('lambda_diff', lambda_2 - lambda_1) # Change point changepoint = pm.DiscreteUniform('changepoint', lower=0, upper=max(timesteps), testval=len(y) // 2) # First distribution is strictly before the other lamda_selected = tt.switch(timesteps < changepoint, lambda_1, lambda_2) # Observations come from Poission distributions with one of the priors obs = pm.Poisson('obs', mu=lamda_selected, observed=y) # sample samples = 1000 step_method = pm.NUTS(target_accept=0.90, max_treedepth=15) cpt_trace = pm.sample(samples, chains=None, step=step_method, tune=1000) cpt_smry = pm.summary(cpt_trace) pm.traceplot(cpt_trace) spp = pm.sample_posterior_predictive(cpt_trace, samples=samples * 2, progressbar=False, var_names=['changepoint']) return np.round(spp['changepoint'].mean(), 0)
def trial1(): radon = pd.read_csv('data/radon.csv')[['county', 'floor', 'log_radon']] # print(radon.head()) county = pd.Categorical(radon['county']).codes # print(county) niter = 1000 with pm.Model() as hm: # County hyperpriors mu_a = pm.Normal('mu_a', mu=0, sd=10) sigma_a = pm.HalfCauchy('sigma_a', beta=1) mu_b = pm.Normal('mu_b', mu=0, sd=10) sigma_b = pm.HalfCauchy('sigma_b', beta=1) # County slopes and intercepts a = pm.Normal('slope', mu=mu_a, sd=sigma_a, shape=len(set(county))) b = pm.Normal('intercept', mu=mu_b, sd=sigma_b, shape=len(set(county))) # Houseehold errors sigma = pm.Gamma("sigma", alpha=10, beta=1) # Model prediction of radon level mu = a[county] + b[county] * radon.floor.values # Data likelihood y = pm.Normal('y', mu=mu, sd=sigma, observed=radon.log_radon) start = pm.find_MAP() step = pm.NUTS(scaling=start) hm_trace = pm.sample(niter, step, start=start) plt.figure(figsize=(8, 60)) pm.forestplot(hm_trace, varnames=['slope', 'intercept'])