def binom_model(df): # todo: make sure this works ok with pm.Model() as disaster_model: switchpoint = pm.DiscreteUniform('switchpoint', lower=df['t'].min(), upper=df['t'].max()) # Priors for pre- and post-switch probability of "yes"...is there a better prior? early_rate = pm.Beta('early_rate', 1, 1) late_rate = pm.Beta('late_rate', 1, 1) # Allocate appropriate probabilities to periods before and after current p = pm.math.switch(switchpoint >= df['t'].values, early_rate, late_rate) p = pm.Deterministic('p', p) successes = pm.Binomial('successes', n=df['n'].values, p=p, observed=df['category'].values) trace = pm.sample(10000) pm.traceplot(trace) plt.show()
def uniform_model(df): """ The switchpoint is modeled using a Discrete Uniform distribution. The observed data is modeled using the Normal distribution (likelihood). The priors are each assumed to be exponentially distributed. """ alpha = 1.0 / df['score'].mean() beta = 1.0 / df['score'].std() t = df['t_encoded'].values with pm.Model() as model: switchpoint = pm.DiscreteUniform("switchpoint", lower=df['t_encoded'].min(), upper=df['t_encoded'].max()) mu_1 = pm.Exponential("mu_1", alpha) mu_2 = pm.Exponential("mu_2", alpha) sd_1 = pm.Exponential("sd_1", beta) sd_2 = pm.Exponential("sd_2", beta) mu = pm.math.switch(switchpoint >= t, mu_1, mu_2) sd = pm.math.switch(switchpoint >= t, sd_1, sd_2) X = pm.Normal('x', mu=mu, sd=sd, observed=df['score'].values) trace = pm.sample(20000) pm.traceplot(trace[1000:], varnames=['switchpoint', 'mu_1', 'mu_2', 'sd_1', 'sd_2']) plt.show()
def build_disaster_model(masked=False): # fmt: off disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]) # fmt: on if masked: disasters_data[[23, 68]] = -1 disasters_data = np.ma.masked_values(disasters_data, value=-1) years = len(disasters_data) with pm.Model() as model: # Prior for distribution of switchpoint location switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years) # Priors for pre- and post-switch mean number of disasters early_mean = pm.Exponential("early_mean", lam=1.0) late_mean = pm.Exponential("late_mean", lam=1.0) # Allocate appropriate Poisson rates to years before and after current # switchpoint location idx = np.arange(years) rate = tt.switch(switchpoint >= idx, early_mean, late_mean) # Data likelihood pm.Poisson("disasters", rate, observed=disasters_data) return model
def coal_mining_desaster(modelname='pymc3_coal_mining_disaster_model'): """TODO: Documentation here. why is there two returned models? what for?""" # data disasters = np.array([ 4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 3, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 2, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1 ]) years = np.arange(1851, 1962) data = pd.DataFrame({'years': years, 'disasters': disasters}) years = theano.shared(years) with pm.Model() as disaster_model: switchpoint = pm.DiscreteUniform('switchpoint', lower=years.min(), upper=years.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = pm.Exponential('early_rate', 1.0) late_rate = pm.Exponential('late_rate', 1.0) # Allocate appropriate Poisson rates to years before and after current rate = pm.math.switch(switchpoint >= years, early_rate, late_rate) disasters = pm.Poisson('disasters', rate, observed=data['disasters']) # years = pm.Normal('years', mu=data['years'], sd=0.1, observed=data['years']) model = ProbabilisticPymc3Model(modelname, disaster_model, shared_vars={'years': years}) model_fitted = model.copy(name=modelname + '_fitted').fit(data) return model, model_fitted
def main(imagePath, hashmethod = hashmethod_example, num_rect = 1): image = Image.open(imagePath) hashobj = hashmethod(image) @as_op(itypes=[tt.lvector, tt.lvector, tt.lvector], otypes=[tt.lvector]) def evaluate(xpositions, ypositions, colors): im = Image.new('RGB', (image.width, image.height)) draw = ImageDraw.Draw(im) for i in range(num_rect): x1, x2 = xpositions[i*2:i*2+2] y1, y2 = ypositions[i*2:i*2+2] r, g, b = colors[i*3:i*3+3] draw.rectangle([(x1, y1), (x2, y2)], fill=(r, g, b)) del draw return hashmethod(im) with pm.Model() as model: # Priors xpositions = pm.DiscreteUniform('xpositions', lower=0, upper=image.width-1, shape=2*num_rect) ypositions = pm.DiscreteUniform('ypositions', lower=0, upper=image.height-1, shape=2*num_rect) colors = pm.DiscreteUniform('colors', lower=0, upper=255, shape=3*num_rect) hashval = evaluate(xpositions, ypositions, colors) # Data likelihood hashobj_obs = pm.Poisson('objective', hashval, observed=hashobj) step = pm.Metropolis([xpositions, ypositions]) step2 = pm.Metropolis([colors]) # # Initial values for stochastic nodes start = { 'xpositions': [random.randrange(image.width) for _ in range(2*num_rect)], 'ypositions': [random.randrange(image.height) for _ in range(2*num_rect)], 'colors': [random.randrange(256) for _ in range(3*num_rect)] } tr = pm.sample(200, tune=100, start=start, step=[step, step2], cores=2) pm.traceplot(tr) import matplotlib.pyplot as plt plt.show()
def laser_late_trials(data, num_emissions): # Make the pymc3 model with pm.Model() as model: # Dirichlet prior on the emission/spiking probabilities - 4 states p = pm.Dirichlet('p', np.ones(num_emissions), shape=(4, num_emissions)) # Discrete Uniform switch times # Switch from detection to identity firing t1 = pm.DiscreteUniform('t1', lower=20, upper=60) # Switch from identity to palatability firing t2 = pm.DiscreteUniform('t2', lower=t1 + 20, upper=120) # Switch from palatability firing to end t3 = pm.DiscreteUniform('t3', lower=t2 + 30, upper=150) # Add potentials to keep the switch times from coming too close to each other #t_pot1 = pm.Potential('t_pot1', tt.switch(t2 - t1 >= 20, 0, -np.inf)) #t_pot2 = pm.Potential('t_pot2', tt.switch(t3 - t2 >= 20, 0, -np.inf)) #t_pot3 = pm.Potential('t_pot3', tt.switch(t3 - t1 >= 40, 0, -np.inf)) # Get the actual state numbers based on the switch times states1 = tt.switch(t1 >= np.arange(150), 0, 1) states2 = tt.switch(t2 >= np.arange(150), states1, 2) states = tt.switch(t3 >= np.arange(150), states2, 3) # Categorical observations obs = pm.Categorical('obs', p=p[states], observed=np.append(data[:140], data[190:])) # Inference button :D with model: tr = pm.sample(300000, init=None, step=pm.Metropolis(), njobs=2, start={ 't1': 25, 't2': 75, 't3': 125 }, progressbar=False) # Return the inference! return model, tr[250000:]
def find_changepoint(self): niter_vec = [5000, 2000, 2000, 3000] niter = niter_vec[self.case] data = self.data #initialize defaultdict for change point priors tau = defaultdict(list) #initialize defaultdict for uniform priors u = defaultdict(list) #time array t = np.arange(0, self.N) with pm.Model() as model: # context management #define uniform priors for mean values/standard deviation #depending on the type of problem for i in range(self.ncpt + 1): if (not self.type == "1-cpt-var" and not self.type == "normal-var"): varname = "mu" + str(i + 1) u[i] = pm.Uniform(varname, 650, 1200) else: varname = "sigma" + str(i + 1) u[i] = pm.Uniform(varname, 5., 60.) #define switch function for i in range(self.ncpt): varname = "tau" + str(i + 1) if (i == 0): tmin = t.min() switch_function = u[0] else: tmin = tau[i - 1] tau[i] = pm.DiscreteUniform(varname, tmin, t.max()) switch_function = T.switch(tau[i] >= t, switch_function, u[i + 1]) #we are finally in a position to define the mu and sigma random variables if (not self.type == "1-cpt-var" and not self.type == "normal-var"): mu = switch_function sigma = pm.Uniform("sigma", 1, 60) else: mu = pm.Uniform("mu", 600, 1500) sigma = switch_function #define log-likelihood function logp = - T.log(sigma * T.sqrt(2.0 * np.pi)) \ - T.sqr(data - mu) / (2.0 * sigma * sigma) def logp_func(data): return logp.sum() #evaluate log-likelihood given the observed data L_obs = pm.DensityDist('L_obs', logp_func, observed=data) self.trace = pm.sample(niter, random_seed=123, progressbar=True)
def bayes_multiple_detector(t, s, n): scala = 1000 with pm.Model() as abrupt_model: sigma = pm.Normal('sigma', mu=0.02 * scala, sigma=0.015 * scala) # sigma = pm.Uniform('sigma', 5, 15) mu = pm.Uniform("mu1", -1.5 * scala, -1.4 * scala) tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max()) for i in np.arange(2, n + 2): _mu = pm.Uniform("mu" + str(i), -1.6 * scala, -1.4 * scala) mu = T.switch(tau >= t, mu, _mu) if (i < (n + 1)): tau = pm.DiscreteUniform("tau" + str(i), tau, t.max()) s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s) with abrupt_model: pm.find_MAP() trace = pm.sample(20000, tune=5000) az.plot_trace(trace) az.to_netcdf(trace, getpath('tracepath') + 'bd9_4') plt.show() pm.summary(trace) return trace
def run_model(steps=10000): model = pymc.Model() with model: α = 1 / count_data.mean() λ1 = pymc.Exponential("λ1", α) λ2 = pymc.Exponential("λ2", α) τ = pymc.DiscreteUniform("τ", lower=0.0, upper=len(count_data)) process_mean = mean(τ, λ1, λ2) observation = pymc.Poisson("observation", process_mean, observed=count_data) start = {"λ1": 10.0, "λ2": 30.0} step1 = pymc.Slice([λ1, λ2]) step2 = pymc.Metropolis([τ]) trace = pymc.sample(steps, tune=500, start=start, step=[step1, step2], cores=2) return pymc.trace_to_dataframe(trace)
def pymc3_dist(self, name, hypers): lower = self.lower upper = self.upper if(len(hypers) == 1): hyper_dist = hypers[0][0] hyper_name = hypers[0][1] idx = hypers[0][2] if(idx == 0): lower = hyper_dist.pymc3_dist(hyper_name, []) else: upper = hyper_dist.pymc3_dist(hyper_name, []) elif(len(hypers) == 2): hyper_dist_1 = hypers[0][0] hyper_name_1 = hypers[0][1] hyper_dist_2 = hypers[1][0] hyper_name_2 = hypers[1][1] lower = hyper_dist_1.pymc3_dist(hyper_name_1, []) upper = hyper_dist_2.pymc3_dist(hyper_name_2, []) if(self.num_elements==-1): return pm.DiscreteUniform(name, lower=lower, upper=upper) else: return pm.DiscreteUniform(name, lower=lower, upper=upper, shape=self.num_elements)
def bayes_single_detector(t, s): with pm.Model() as abrupt_model: steppoint = pm.DiscreteUniform("steppoint", lower=t[1], upper=t[-1], testval=50) early_mu = pm.Uniform("early_mu", -50, 50) late_mu = pm.Uniform("late_mu", -50, 50) mu = pm.math.switch(steppoint >= t, early_mu, late_mu) sigma = pm.Normal('sigma', mu=30, sigma=20) s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s) with abrupt_model: trace = pm.sample(1000) az.plot_trace(trace) plt.show() return trace
def infer(some_count_data): """ Docstring: Run bayesian inference on any count data. Outputs distributions/trace plots of lambda_1, lambda_2 and tau in addition to returning a list of expected values to be able to plot observed vs expected source: https://nbviewer.jupyter.org/github/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/blob/master/Chapter1_Introduction/Ch1_Introduction_PyMC3.ipynb """ n_count_data = len(some_count_data) with pm.Model() as model: alpha = 1.0 / some_count_data.mean() lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1) #with model: idx = np.arange(n_count_data) lambda_ = pm.math.switch(tau > idx, lambda_1, lambda_2) #with model: observation = pm.Poisson("obs", lambda_, observed=some_count_data) #with model: step = pm.Metropolis() trace = pm.sample(10000, tune=5000, step=step) lambda_1_samples = trace['lambda_1'] lambda_2_samples = trace['lambda_2'] tau_samples = trace['tau'] print(pm.gelman_rubin(trace)) pm.traceplot(trace) N = tau_samples.shape[0] expected_violence = np.zeros(n_count_data) for day in range(0, n_count_data): # ix is a bool index of all tau samples corresponding to # the switchpoint occurring prior to value of 'day' ix = day < tau_samples # Each posterior sample corresponds to a value for tau. # for each day, that value of tau indicates whether we're "before" # (in the lambda1 "regime") or # "after" (in the lambda2 "regime") the switchpoint. # by taking the posterior sample of lambda1/2 accordingly, we can average # over all samples to get an expected value for lambda on that day. # As explained, the "count" random variable is Poisson distributed, # and therefore lambda (the poisson parameter) is the expected value of # "count". expected_violence[day] = (lambda_1_samples[ix].sum() + lambda_2_samples[~ix].sum()) / N return expected_violence
def baysian_latency(count_data): import pymc3 as pm import theano.tensor as tt n_count_data = len(count_data) with pm.Model() as model: alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1) with model: idx = np.arange(n_count_data) # Index lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2) observation = pm.Poisson("obs", lambda_, observed=count_data) step = pm.Metropolis() trace = pm.sample(10000, tune=5000, step=step) return trace
def make_switchpoint_model(counts: ndarray, prior_lambda: float): """ A model that assumes counts are generated by 2 different poisson processes: * counts up to switchpoint (not inclusive) ~ Poisson(early_rate) * counts from switchpoint on (inclusive) ~ Poisson(late_rate) Parameters ---------- counts : 1 - dimensional array of counts prior_lambda : rate parameter for exponential prior; 1 / prior_lambda is the mean of the exponential Returns ------- pm.Model : the model instance Based on https://docs.pymc.io/notebooks/getting_started.html#Case-study-2:-Coal-mining-disasters """ model = pm.Model() with model: idxs = np.arange(len(counts)) lower_idx = idxs[1] upper_idx = idxs[-1] mid = (upper_idx - lower_idx) // 2 switchpoint = pm.DiscreteUniform("switchpoint", lower=lower_idx, upper=upper_idx, testval=mid) early_rate = pm.Exponential("early_rate", prior_lambda) late_rate = pm.Exponential("late_rate", prior_lambda) rate = pm.math.switch(switchpoint > idxs, early_rate, late_rate) pm.Poisson("counted", rate, observed=counts) return model
def single_model(self, idx): minimum = 0. maximum = 8. sample_space = np.arange(minimum, maximum + 1, 1) sample_space = 1. / 10**(sample_space / 4.) with pm.Model() as smodel: # uniform priors on h hab_ten = pm.DiscreteUniform('h', 0., 8.) # convert to a tensor alpha = tt.as_tensor_variable([10**(hab_ten / 4.)]) probs_a, probs_r = self.inferrer(alpha) # use a DensityDist pm.Categorical('actions', probs_a, observed=self.actions[idx]) pm.Categorical('rewards', probs_r, observed=self.rewards[idx]) return smodel, sample_space
def gev0_shift_1(dataset): locm = dataset.mean() locs = dataset.std() / (np.sqrt(len(dataset))) scalem = dataset.std() scales = dataset.std() / (np.sqrt(2 * (len(dataset) - 1))) with pm.Model() as model: # Priors for unknown model parameters c1 = pm.Beta( 'c1', alpha=6, beta=9 ) # c=x-0,5: transformation in gev_logp is required due to Beta domain between 0 and 1 loc1 = pm.Normal('loc1', mu=locm, sd=locs) scale1 = pm.Normal('scale1', mu=scalem, sd=scales) c2 = pm.Beta('c2', alpha=6, beta=9) loc2 = pm.Normal('loc2', mu=locm, sd=locs) scale2 = pm.Normal('scale2', mu=scalem, sd=scales) def gev_logp(value): scaled = (value - loc_) / scale_ logp = -(tt.log(scale_) + (((c_ - 0.5) + 1) / (c_ - 0.5) * tt.log1p( (c_ - 0.5) * scaled) + (1 + (c_ - 0.5) * scaled)**(-1 / (c_ - 0.5)))) bound1 = loc_ - scale_ / (c_ - 0.5) bounds = tt.switch((c_ - 0.5) > 0, value > bound1, value < bound1) return bound(logp, bounds, c_ != 0) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1) idx = np.arange(n_count_data) c_ = pm.math.switch(tau > idx, c1, c2) loc_ = pm.math.switch(tau > idx, loc1, loc2) scale_ = pm.math.switch(tau > idx, scale1, scale2) gev = pm.DensityDist('gev', gev_logp, observed=dataset) trace = pm.sample(1000, chains=1, progressbar=True) # geweke_plot = pm.geweke(trace, 0.05, 0.5, 20) # gelman_and_rubin = pm.diagnostics.gelman_rubin(trace) posterior = pm.trace_to_dataframe(trace) summary = pm.summary(trace) return summary, posterior
def switch_test(current, sample_num=1000): with pm.Model() as switch_point: #sps = pm.Poisson('points', 0) #ragnes = np.random.randint(years.min(),years.max(), sps) current_data = current[0] time = current[1] switchpoint = pm.DiscreteUniform('switchpoint', lower=time.min(), upper=time.max()) # Priors for pre- and post-switch rates number of disasters early_rate = pm.Normal('early_rate', mu=0, sd=1000) late_rate = pm.Normal('late_rate', mu=0, sd=1000) # Allocate appropriate Poisson rates to years before and after current rate = pm.math.switch(switchpoint >= time, early_rate, late_rate) switch_points = pm.Normal('current', mu=rate, sd=70, observed=current_data) trace = pm.sample(sample_num) if sample_num < 5000 else pm.sample(5000) return [trace['switchpoint'].mean(), trace['switchpoint'].std(), trace['early_rate'].mean(), trace['early_rate'].std(), trace['late_rate'].mean(), trace['late_rate'].std()]
def poisson_model(): with pm.Model() as disaster_model: switchpoint = pm.DiscreteUniform('switchpoint', lower=year.min(), upper=year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = pm.Exponential('early_rate', 1) late_rate = pm.Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = pm.math.switch(switchpoint >= year, early_rate, late_rate) disasters = pm.Poisson('disasters', rate, observed=disaster_data) trace = pm.sample(10000) pm.traceplot(trace) plt.show()
def infer_lambda(self): """ Ci ~ Poisson(lambda) Is there a day ("tau") where the lambda suddenly jumps to a higher value? We are looking for a 'switchpoint' s.t. lambda (1) (lambda_1 if t < tau) and (lambda_2 if t > tau) (2) lambda_2 > lambda_1 lambda_1 ~ Exponential(alpha) lambda_2 ~ Exponential(alpha) tau ~ Discrete_uniform(1/n_count_data) """ print("Infer with PyMC3...") with pm.Model() as model: ## assign lambdas and tau to stochastic variables alpha = 1.0 / self.count_data.mean() lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=self.n_count_data) ## create a combined function for lambda (it is still a random variable) idx = np.arange(self.n_count_data) # Index lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2) ## combine the data with our proposed data generation scheme observation = pm.Poisson("obs", lambda_, observed=self.count_data) ## inference step = pm.Metropolis() self.trace = pm.sample(10000, tune=5000, step=step) ## get the variables we want to plot from our trace self.lambda_1_samples = self.trace['lambda_1'] self.lambda_2_samples = self.trace['lambda_2'] self.tau_samples = self.trace['tau']
def bayesian_tipping_point(obs_data): """ :param obs_data: 1-d numpy array containing the daily precipitation data :return: summary of sampled values and trace itself """ n_dd = obs_data.shape[0] idx = np.arange(n_dd) with pm.Model() as model: alpha_1 = pm.Uniform("alpha_1", lower=0, upper=10) alpha_2 = pm.Uniform("alpha_2", lower=0, upper=10) beta_1 = pm.Uniform("beta_1", lower=0, upper=10) beta_2 = pm.Uniform("beta_2", lower=0, upper=10) pi_1 = pm.Uniform("pi_1", lower=0, upper=0.9) pi_2 = pm.Uniform("pi_2", lower=0, upper=0.9) tau = pm.DiscreteUniform("tau", lower=365 * (5/4.), upper=n_dd - 365 * (5/4.)) alpha_ = pm.math.switch(tau >= idx, alpha_1, alpha_2) beta_ = pm.math.switch(tau >= idx, beta_1, beta_2) pi_ = pm.math.switch(tau >= idx, pi_1, pi_2) observation = ZeroInflatedGamma("obs", alpha=alpha_, beta=beta_, pi=pi_, observed=obs_data) step = pm.NUTS() trace = pm.sample(5000, tune=20000, step=step, nuts_kwargs=dict(target_accept=.9)) summary = pm.stats.summary(trace) return summary, trace
def main(): with pm.Model() as model: xl1 = pm.DiscreteUniform('xl1', lower=0,upper=500) yl1 = pm.DiscreteUniform('yl1', lower=150,upper=500) θl1 = pm.DiscreteUniform('θl1', lower=-100,upper=100) xl2 = pm.DiscreteUniform('xl2', lower=0,upper=500) yl2 = pm.DiscreteUniform('yl2', lower=150,upper=500) θl2 = pm.DiscreteUniform('θl2', lower=-100,upper=100) obs = pm.Normal('obs', mu=simulation(xl1, yl1, θl1, xl2, yl2, θl2), sigma=.001, observed=484) trace = pm.sample(10,tune=2000,cores=2) def print_and_visualize(t): print(t) visualize_simulation(t['xl1'], t['yl1'], t['θl1'], t['xl2'], t['yl2'], t['θl2']) [print_and_visualize(t) for t in trace]
cpt_smry = pm.summary(cpt_trace) pm.traceplot(cpt_trace) spp = pm.sample_posterior_predictive(cpt_trace, samples=1000, progressbar=False, var_names=['w', 'theta', 'obs']) n = len(cpf) cpf = data[['ds', 'y']].set_index('ds').resample('7D').sum().reset_index() _, _, _, _, cpf['t'] = set_times(cpf) # add 't' g = np.gradient(cpf['y'].values) # trend cpf['w_trend'] = np.abs(g) / np.sum(np.abs(g)) # changepoint density at each point cpf['trend'] = g alpha = 1.0 / cpf['w_trend'].mean() beta = 1.0 / cpf['w_trend'].std() t = np.range(0, n) with pm.Model() as my_model: switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=cpf['t'].max(), shape=10) for i in range(n): mu_name = 'mu_' + str(i) setattr(my_model, mu_name, pm.Exponential(mu_name, alpha)) sd_name = 'sd_' + str(i) setattr(my_model, sd_name, pm.Exponential(sd_name, beta)) t = 0 for i in range(n): mu = pm.switch(switchpoint >= t, mu_1, mu_2 ) var = pm.switch(switchpoint >= t, sd_1, sd_2 ) obs = pm.Normal('x',mu=mu,sd=sd,observed=data) with model: step1 = pm.NUTS( [mu_1, mu_2, sd_1, sd_2] ) step2 = pm.Metropolis( [switchpoint] )
import pymc3 as pm import arviz as az import pandas as pd import numpy as np import matplotlib.pyplot as plt #Gather and transform data data_path = '/home/gerardo/Desktop/Projects/PGA-Analysis/data/driving-data/driving-data.csv' raw_data = pd.read_csv(data_path) data = raw_data['average_driving_distance'].to_numpy() #Declare model with pm.Model() as model: #Switchpoint tau = pm.DiscreteUniform("tau", lower=0, upper=len(data) - 1) #Prior when t <= tau mu_1 = pm.Normal("mu_1", mu=280, sd=20) sd_1 = pm.HalfNormal("sd_1", sigma=40) #Prior when t > tau mu_2 = pm.Normal("mu_2", mu=280, sd=20) sd_2 = pm.HalfNormal("sd_2", sigma=40) #Observations idx = np.arange(len(data)) mu_t = pm.math.switch(tau > idx, mu_1, mu_2) sd_t = pm.math.switch(tau > idx, sd_1, sd_2) observations = pm.Normal("observations", mu=mu_t, sd=sd_t, observed=data) #Perform inference with model: step = pm.NUTS() trace = pm.sample(50000, tune=5000, step=step)
""" Comparing models using Hierarchical modelling. """ from __future__ import division import numpy as np import pymc3 as pm import matplotlib.pyplot as plt from plot_post import plot_post ## specify the Data y = np.repeat([0, 1], [3, 6]) # 3 tails 6 heads with pm.Model() as model: # Hyperhyperprior: model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) # Hyperprior: kappa_theta = 12 mu_theta = pm.switch(pm.eq(model_index, 1), 0.25, 0.75) # Prior distribution: a_theta = mu_theta * kappa_theta b_theta = (1 - mu_theta) * kappa_theta theta = pm.Beta('theta', a_theta, b_theta) # theta distributed as beta density #likelihood y = pm.Bernoulli('y', theta, observed=y) start = pm.find_MAP() step = pm.Metropolis() trace = pm.sample(10000, step, start=start, progressbar=False) ## Check the results. burnin = 2000 # posterior samples to discard
]) years = len(disasters_data) @as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector]) def rate_(switchpoint, early_mean, late_mean): out = empty(years) out[:switchpoint] = early_mean out[switchpoint:] = late_mean return out with pm.Model() as model: # Prior for distribution of switchpoint location switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years) # Priors for pre- and post-switch mean number of disasters early_mean = pm.Exponential('early_mean', lam=1.) late_mean = pm.Exponential('late_mean', lam=1.) # Allocate appropriate Poisson rates to years before and after current # switchpoint location idx = arange(years) rate = rate_(switchpoint, early_mean, late_mean) # Data likelihood disasters = pm.Poisson('disasters', rate, observed=disasters_data) # Use slice sampler for means step1 = pm.Slice([early_mean, late_mean]) # Use Metropolis for switchpoint, since it accomodates discrete variables
import numpy as np from matplotlib import pyplot as plt import scipy.stats as stats import pymc3 as pm import theano.tensor as tt count_data = np.loadtxt("data/txtdata.csv") n_count_data = len(count_data) with pm.Model() as model: alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1) with model: idx = np.arange(n_count_data) # Index lambda_ = pm.math.switch(tau > idx, lambda_1, lambda_2) with model: observation = pm.Poisson("obs", lambda_, observed=count_data) with model: step = pm.Metropolis() trace = pm.sample(10000, tune=5000, step=step) lambda_1_samples = trace['lambda_1'] lambda_2_samples = trace['lambda_2'] tau_samples = trace['tau']
What is that point, and how skilled is the test taker? """ ground_truth = np.array([True,True,True,True,True,True,True,True,True,True]) obs = np.array([True,False,False,False,False,False,False,False,False,False]) correct = np.equal(ground_truth,obs) true_inds = np.where(obs)[0] last_true_idx = true_inds[-1] test_len = len(ground_truth) with pm.Model() as model: skill = pm.Beta('skill',1.0,1.0) n_correct_obs = np.sum(correct) endpoint = pm.DiscreteUniform('endpoint',last_true_idx,test_len-1) for i in range(0,last_true_idx+1): pm.Bernoulli('correct_%d' % i,skill,observed=correct[i]) for i in range(last_true_idx+1,test_len): after_endpoint = pm.math.gt(i,endpoint) prob_correct_if_done = float(ground_truth[i]==False) prob_correct = pm.math.where(after_endpoint,prob_correct_if_done,skill) pm.Bernoulli('correct%d' % i,prob_correct,observed=correct[i]) trace = pm.sample() pm.traceplot(trace) print pm.summary(trace)
] # Time series of recorded coal mining disasters in the UK from 1851 to 1962 disasters_data = array([ 4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1 ]) year = arange(1851, 1962) with pm.Model() as model: switchpoint = pm.DiscreteUniform('switchpoint', lower=year.min(), upper=year.max()) early_mean = pm.Exponential('early_mean', lam=1.) late_mean = pm.Exponential('late_mean', lam=1.) # Allocate appropriate Poisson rates to years before and after current # switchpoint location rate = tt.switch(switchpoint >= year, early_mean, late_mean) disasters = pm.Poisson('disasters', rate, observed=disasters_data) # Initial values for stochastic nodes start = {'early_mean': 2., 'late_mean': 3.} tr = pm.sample(1000, tune=500, start=start) pm.traceplot(tr)
import pymc3 as pm from scipy.stats import poisson import seaborn as sns # Config os.chdir("/home/jovyan/work") %config InlineBackend.figure_format = 'retina' %matplotlib inline plt.rcParams["figure.figsize"] = (12, 3) # Preparation N = 100 true_lams = [20, 50] true_tau = 30 data = np.hstack([ poisson(true_lams[0]).rvs(true_tau), poisson(true_lams[1]).rvs(N - true_tau), ]) # Modeling with pm.Model() as model: lam_1 = pm.Exponential("lam_1", data.mean()) lam_2 = pm.Exponential("lam_2", data.mean()) tau = pm.DiscreteUniform("tau", lower=0, upper=N-1) idx = np.arange(N) lam = pm.math.switch(tau > idx, lam_1, lam_2) female = pm.Poisson("target", lam, observed=data) step = pm.Metropolis() trace = pm.sample(20000, tune=5000, step=step, chains=10) pm.traceplot(trace[1000:], grid=True) plt.savefig("./results/3-15-a-inference.png")
]) years = len(disasters_data) @as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector]) def rate_(switchpoint, early_mean, late_mean): out = empty(years) out[:switchpoint] = early_mean out[switchpoint:] = late_mean return out with pm.Model() as model: # Prior for distribution of switchpoint location switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years) # Priors for pre- and post-switch mean number of disasters early_mean = pm.Exponential("early_mean", lam=1.0) late_mean = pm.Exponential("late_mean", lam=1.0) # Allocate appropriate Poisson rates to years before and after current # switchpoint location idx = arange(years) rate = rate_(switchpoint, early_mean, late_mean) # Data likelihood disasters = pm.Poisson("disasters", rate, observed=disasters_data) # Use slice sampler for means step1 = pm.Slice([early_mean, late_mean]) # Use Metropolis for switchpoint, since it accomodates discrete variables