def MCMCPoissonPosteriorRatio(sample_number, burn, count1, count2): """MCMC method to calculate ratio distribution of two Posterior Poisson distributions. sample_number: number of sampling. It must be greater than burn, however there is no check. burn: number of samples being burned. count1: observed counts of condition 1 count2: observed counts of condition 2 return: list of log2-ratios """ lam1 = pymc.Uniform('U1', 0, 10000) # prior of lambda is uniform distribution lam2 = pymc.Uniform('U2', 0, 10000) # prior of lambda is uniform distribution poi1 = pymc.Poisson('P1', lam1, value=count1, observed=True) # Poisson with observed value count1 poi2 = pymc.Poisson('P2', lam2, value=count2, observed=True) # Poisson with observed value count2 @deterministic def ratio(l1=lam1, l2=lam2): return log(l1, 2) - log(l2, 2) mcmcmodel = pymc.MCMC([ratio, lam1, poi1, lam2, poi2]) mcmcmodel.use_step_method(pymc.AdaptiveMetropolis, [ratio, lam1, lam2, poi1, poi2], delay=20000) if PROGRESS_BAR_ENABLED: mcmcmodel.sample(iter=sample_number, progress_bar=False, burn=burn) else: mcmcmodel.sample(iter=sample_number, burn=burn) return ratio.trace()
def simple_mcmc_model(p_df): s_mu = pm.Normal('mu', mu=np.mean(p_df), tau=0.00001) s_ob = pm.Poisson('observed', mu=s_mu, value=p_df, observed=True) s_es = pm.Poisson('estimated', mu=s_mu, observed=False) s_model = pm.Model([s_mu, s_ob, s_es]) return s_mu, s_ob, s_es, s_model
def gaussian_plus_constant(bins, observed_counts_per_bin, init=None): """Assumes the line can be modeled using a constant and a Gaussian """ if init is None: constant = pymc.Uniform('constant', lower=0.0, upper=6.0, doc='constant') amplitude = pymc.Uniform('amplitude', lower=0.0, upper=10.0, doc='amplitude') position = pymc.Uniform('position', lower=-20.0, upper=10.0, doc='position') width = pymc.Uniform('width', lower=-20.0, upper=10.0, doc='width') else: raise ValueError('Not implemented yet') # Model for the emission line @pymc.deterministic(plot=False) def modeled_emission(c=constant, a=amplitude, p=position, w=width, bins=bins): # A pure and simple power law model out = integral_across_all_bins(bins, (c, a, p, w)) return out # @pymc.potential def constrain_total_emission(): total_observed_emission = np.sum(observed_counts_per_bin) total_fit_emission = np.sum( modeled_emission(c=constant, a=amplitude, p=position, w=width, bins=bins)) return spectrum = pymc.Poisson('emission', mu=modeled_emission, value=observed_counts_per_bin, observed=True) # Need to add in the potential constraint predictive = pymc.Poisson('predictive', mu=modeled_emission) # MCMC model return locals()
def gamma_poisson(x, t): """ x: number of failures (N vector) t: operation time, thousands of hours (N vector) """ if x is not None: N = x.shape else: N = num_points # place an exponential prior on t, for when it is unknown t = pymc.Exponential('t', beta=1.0 / 50.0, value=t, size=N, observed=(t is not None)) alpha = pymc.Exponential('alpha', beta=1.0, value=1.0) beta = pymc.Gamma('beta', alpha=0.1, beta=1.0, value=1.0) theta = pymc.Gamma('theta', alpha=alpha, beta=beta, size=N) @pymc.deterministic def mu(theta=theta, t=t): return theta * t x = pymc.Poisson('x', mu=mu, value=x, observed=(x is not None)) return locals()
def likelihood_model1(self, nObs, yObs, Slope, Norm, Sig, dx, xp=14, deg=3): # (1) Calculate the expected Mass -> MCR # (2) Calculate the slope and scatter parameter in -> MOR # (3) Calculate Number Count # (4) Write the likelihood mu = Slope * yObs + Norm # alpha = 1.0 / Slope # First Order Approximation # sigma = Sig / Slope # First Order Approximation # [beta_n, beta_n-1, beta_n-2, ...] beta = [pymc.Normal('beta_%i'%i, mu=0., tau=0.0001, value=0.0, observed=False) for i in range(deg+1)] @pymc.deterministic(plot=False) def exp_n(beta=beta, mu=mu, deg=deg, slope=Slope, sig=Sig, dx=dx): # It returns the normalization and the first order approximation for the scatter # MF = A x exp(beta1 x mu) p = np.poly1d(beta) A = dx * np.exp(p(mu)) c = [beta[j] * (deg - j) for j in range(deg)] p = np.poly1d(c) beta1 = p(mu) return A * slope * np.exp(- sig**2 * beta1) likelihood = pymc.Poisson('n_obs', mu=exp_n, value=nObs, observed=True) return locals()
def three_model_comparison(p_df): a_n = len(p_df) t_lam = pm.Uniform('d_lam', 0, 1) #d_lam = 1.0 / np.mean(p_df) t_lambda_1 = pm.Exponential("t_lambda_1", t_lam) #t_lambda_1 = pm.Uniform("t_lambda_1", min(p_df), max(p_df)) t_lambda_2 = pm.Exponential("t_lambda_2", t_lam) #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df)) t_lambda_3 = pm.Exponential("t_lambda_3", t_lam) #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df)) #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) ) t_tau_1 = pm.DiscreteUniform("tau1", lower=0, upper=max(p_df) - 1) t_tau_2 = pm.DiscreteUniform("tau", lower=t_tau_1, upper=max(p_df)) @pm.deterministic def lambda_(tau_1=t_tau_1, tau_2=t_tau_2, lambda_1=t_lambda_1, lambda_2=t_lambda_2, lambda_3=t_lambda_3): out = np.zeros(a_n) out[:tau_1] = lambda_1 # lambda before tau_1 is lambda1 out[tau_1:tau_2] = lambda_2 # lambda_2 between tau_1 and tau_2 out[tau_2:] = lambda_3 # lambda after (and including) tau is lambda_3 return out t_obs = pm.Poisson('t_observed', mu=lambda_, value=p_df, observed=True) t_model = pm.Model( [t_obs, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2]) #d_model = pm.Model([d_obs, t_lambda_1, t_lambda_2, tau]) return t_model, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2
def likelihood_model_obs2(self, nObs, yObs, dObs, dV, expMu, slope_pri, sig_pri, deg=3): slope = slope_pri #pymc.Normal('slope', mu=1.0, tau=1600.0, value=1.0, observed=False) sig = sig_pri #pymc.Normal('slope', mu=1.0, tau=1600.0, value=1.0, observed=False) # [beta_n, beta_n-1, beta_n-2, ...] beta = [pymc.Normal('beta_%i'%i, mu=0., tau=0.0001, value=0.0, observed=False) for i in range(deg+1)] @pymc.deterministic(plot=False) def exp_n(beta=beta, mu=expMu, deg=deg, slope=slope, sig=sig, dx=dObs, dV=dV): # It returns the normalization and the first order approximation for the scatter # MF = A x exp(beta1 x mu) p = np.poly1d(beta) A = dx * np.exp(p(mu)) c = [beta[j] * (deg - j) for j in range(deg)] p = np.poly1d(c) beta1 = p(mu) return dV * A * slope * np.exp(np.array(sig)**2 * beta1) likelihood = pymc.Poisson('n_obs', mu=exp_n, value=nObs, observed=True) return locals()
def two_model_comparison(p_df): a_n = len(p_df) d_lam = pm.Uniform('d_lam', 0, 1) #d_lam = 1.0 / np.mean(p_df) lambda_1 = pm.Exponential("lambda_1", d_lam) #lambda_1 = pm.Uniform("lambda_1", min(p_df), max(p_df)) lambda_2 = pm.Exponential("lambda_2", d_lam) #lambda_2 = pm.Uniform("lambda_2",min(p_df), max(p_df)) #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) ) tau = pm.DiscreteUniform("tau", lower=0, upper=max(p_df)) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(a_n) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out d_obs = pm.Poisson('d_observed', mu=lambda_, value=p_df, observed=True) d_model = pm.Model([d_obs, d_lam, lambda_1, lambda_2, tau]) #d_model = pm.Model([d_obs, lambda_1, lambda_2, tau]) return d_model, d_obs, d_lam, lambda_1, lambda_2, tau
def compute_n_sat_prior(informative=False, poisson_mu=None, uniform_lower=None, uniform_upper=None): """ Compute n_sat prior. Note: There are two options for modelling n_sat: - uninformative: discrete uniform distribution - informative: Poisson distribution Parameters ---------- informative : bool, optional (default: False) If True, n_sat is modelled by a Poisson distribution. Else, n_sat is modelled by a discrete uniform distribution. poisson_mu : int, optional (default: None) Parameter mu (i.e. mean) of the Poisson distribution used to model n_sat. Must be specified if `informative` is True. uniform_lower : int, optional (default: None) Lower bound of the discrete uniform distribution used to model n_sat. Must be specified if `informative` is False. uniform_upper : int, optional (default: None) Upper bound of the discrete uniform distribution used to model n_sat. Must be specified if `informative` is False. Returns ------- pymc distribution Prior distribution for n_sat. """ if informative: if poisson_mu is None: error_msg = ("If you want to use a Poisson prior for n_sat, " "please specify the parameter `poisson_mu`.") sys.exit(error_msg) return pymc.Poisson("n_sat", mu=poisson_mu) if (uniform_lower is None or uniform_upper is None): error_msg = ("If you want to use an uniform prior for n_sat, " "please specify the parameters `uniform_lower` " "and `uniform_upper`.") sys.exit(error_msg) return pymc.DiscreteUniform("n_sat", lower=uniform_lower, upper=uniform_upper)
def main(): lambda_1 = pm.Exponential("lambda_1", 1) # prior on first behaviour lambda_2 = pm.Exponential("lambda_2", 1) # prior on second behaviour tau = pm.DiscreteUniform("tau", lower=0, upper=10) # prior on behaviour change print "lambda_1.value = %.3f" % lambda_1.value print "lambda_2.value = %.3f" % lambda_2.value print "tau.value = %.3f" % tau.value print lambda_1.random(), lambda_2.random(), tau.random() print "After calling random() on the variables..." print "lambda_1.value = %.3f" % lambda_1.value print "lambda_2.value = %.3f" % lambda_2.value print "tau.value = %.3f" % tau.value samples = [lambda_1.random() for i in range(20000)] plt.hist(samples, bins=70, normed=True, histtype="stepfilled") plt.title("Prior distribution for $\lambda_1$") plt.xlim(0, 8) plt.show() data = np.array([10, 5]) fixed_variable = pm.Poisson("fxd", 1, value=data, observed=True) print "value: ", fixed_variable.value print "calling .random()" fixed_variable.random() print "value: ", fixed_variable.value n_data_points = 5 # in CH1 we had ~70 data points @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_data_points) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after tau is lambda2 return out data = np.array([10, 25, 15, 20, 35]) obs = pm.Poisson("obs", lambda_, value=data, observed=True) model = pm.Model([obs, lambda_, lambda_1, lambda_2, tau])
def test_start(self): with pm.Model() as model: a = pm.Poisson("a", 5) b = pm.HalfNormal("b", 10) y = pm.Normal("y", a, b, observed=[1, 2, 3, 4]) start = { "a": np.random.poisson(5, size=500), "b_log__": np.abs(np.random.normal(0, 10, size=500)), } trace = pm.sample_smc(500, chains=1, start=start)
def test_variable_type(self): with pm.Model() as model: mu = pm.HalfNormal("mu", 1) a = pm.Normal("a", mu=mu, sigma=2, observed=np.array([1, 2])) b = pm.Poisson("b", mu, observed=np.array([1, 2])) trace = pm.sample(compute_convergence_checks=False, return_inferencedata=False) with model: ppc = pm.sample_posterior_predictive(trace, return_inferencedata=False, samples=1) assert ppc["a"].dtype.kind == "f" assert ppc["b"].dtype.kind == "i"
def model_factory(): """Build a PyMC model and return it as a dict""" x = pymc.Uniform("x", value=S0[0], lower=XMIN, upper=XMAX) y = pymc.Uniform("y", value=S0[1], lower=YMIN, upper=YMAX) I = pymc.Uniform("I", value=I0, lower=IMIN, upper=IMAX) @pymc.deterministic(plot=False) def model_pred(x=x, y=y, I=I): return P([x, y], I) detector_response = pymc.Poisson( "d", data, value=data, observed=True, plot=False, ) background = pymc.Poisson( "background", DWELL * BG, value=DWELL * BG, observed=True, plot=False, ) observed_response = model_pred + background # return locals() # the lazy way return { "x": x, "y": y, "I": I, "detector_response": detector_response, "background": background, "observed_response": observed_response, }
def make_on_off(n_off, expo_off, n_on, expo_on, mean0): """ Make a PyMC model for inferring a Poisson signal rate parameter, `s`, for 'on-off' observations with uncertain background rate, `b`. Parameters ---------- n_off, n_on : int Event counts off-source and on-source expo_off, expo_on : float Exposures off-source and on-source mean0 : float Prior mean for both background and signal rates """ # PyMC's exponential dist'n uses beta = 1/scale = 1/mean. # Here we initialize rates to good guesses. b_est = float(n_off)/expo_off s_est = max(float(n_on)/expo_on - b_est, .1*b_est) b = pymc.Exponential('b', beta=1./mean0, value=b_est) s = pymc.Exponential('s', beta=1./mean0, value=s_est) # The expected number of counts on and off source, as deterministic functions. @pymc.deterministic def mu_off(b=b): return b*expo_off @pymc.deterministic def mu_on(s=s, b=b): return (s+b)*expo_on # Poisson likelihood functions: off_count = pymc.Poisson('off_count', mu=mu_off, value=n_off, observed=True) on_count = pymc.Poisson('on_count', mu=mu_on, value=n_on, observed=True) return locals()
def test_model_not_drawable_prior(self): data = np.random.poisson(lam=10, size=200) model = pm.Model() with model: mu = pm.HalfFlat("sigma") pm.Poisson("foo", mu=mu, observed=data) idata = pm.sample(tune=1000) with model: with pytest.raises(NotImplementedError) as excinfo: pm.sample_prior_predictive(50) assert "Cannot sample" in str(excinfo.value) samples = pm.sample_posterior_predictive(idata, 40, return_inferencedata=False) assert samples["foo"].shape == (40, 200)
def test_respects_shape(self): for shape in (2, (2,), (10, 2), (10, 10)): with pm.Model(): mu = pm.Gamma("mu", 3, 1, size=1) goals = pm.Poisson("goals", mu, size=shape) trace1 = pm.sample_prior_predictive( 10, return_inferencedata=False, var_names=["mu", "mu", "goals"] ) trace2 = pm.sample_prior_predictive( 10, return_inferencedata=False, var_names=["mu", "goals"] ) if shape == 2: # want to test shape as an int shape = (2,) assert trace1["goals"].shape == (10,) + shape assert trace2["goals"].shape == (10,) + shape
def likelihood_model3(self, nObs, yObs, Slope, Norm, Sig, dx, xp=14, deg=3): # (1) Calculate the expected Mass -> MCR # (2) Calculate the slope and scatter parameter in -> MOR # (3) Calculate Number Count # (4) Write the likelihood # alpha = 1.0 / Slope # First Order Approximation # sigma = Sig / Slope # First Order Approximation print np.mean(Slope) print Slope print Norm print Slope * yObs + Norm print nObs print (Sig[10] - Sig[4]) / (Slope[10]*yObs[10] + Norm[10] - Slope[4]*yObs[4] - Norm[4]) # exit() slope = pymc.Normal('slope', mu=0.7, tau=100.0, value=0.7, observed=False) slope_mu = pymc.Normal('slope_mu', mu=0.0, tau=100.0, value=0.0, observed=False) norm = pymc.Normal('norm', mu=-9.0, tau=100.0, value=-9.0, observed=False) sig = 0.15 #pymc.Uniform('sig', 0.001, 0.4, value=np.mean(Sig), observed=False) # [beta_n, beta_n-1, beta_n-2, ...] beta = [pymc.Normal('beta_%i'%i, mu=0., tau=0.0001, value=0.0, observed=False) for i in range(deg+1)] @pymc.deterministic(plot=False) def mu(yObs=yObs, slope=slope, norm=norm, slope_mu=slope_mu): return (slope + slope_mu*yObs) * yObs + norm @pymc.deterministic(plot=False) def exp_n(beta=beta, mu=mu, deg=deg, slope=slope, sig=sig, dx=dx): # It returns the normalization and the first order approximation for the scatter # MF = A x exp(beta1 x mu) p = np.poly1d(beta) A = dx * np.exp(p(mu)) c = [beta[j] * (deg - j) for j in range(deg)] p = np.poly1d(c) beta1 = p(mu) return A * slope * np.exp(- np.array(sig)**2 * beta1) likelihood = pymc.Poisson('n_obs', mu=exp_n, value=nObs, observed=True) return locals()
class likelihood_model: # Stochastic variables for signal, background, and total event rates #signal_rate = pymc.Normal('signal_rate', mu=s*muT, tau=1/sigmas**2) #background_rate = pymc.Normal('background_rate', mu=b, tau=1/sigmab**2) # Doh, need to use truncated normal to prevent negative values signal_rate = pymc.TruncatedNormal('signal_rate', mu=s*muT, tau=1/sigmas**2, a=0, b=np.inf) background_rate = pymc.TruncatedNormal('background_rate', mu=b, tau=1/sigmab**2, a=0, b=np.inf) # Deterministic variable (simply the sum of the signal and background rates) total_rate = pymc.LinearCombination('total_rate', [1,1], [signal_rate, background_rate]) # Stochastic variable for number of observed events observed_events = pymc.Poisson('observed_events', mu=total_rate) # Deterministic variable for the test statistic @pymc.deterministic() def qCLs(n=observed_events): q,chi2B = self.QCLs(n,s) return q
def make_poisson(n, intvl, mean0): """ Make a PyMC model for inferring a Poisson distribution rate parameter, for a datum consisting of `n` counts observed in an interval of size `intvl`. The inference will use an exponential prior for the rate, with prior mean `mean0`. """ # PyMC's exponential dist'n uses beta = 1/scale = 1/mean. # Here we initialize rate to n/intvl. rate = pymc.Exponential('rate', beta=1./mean0, value=float(n)/intvl) # The expected number of counts, mu=rate*intvl, is a deterministic function # of the rate RV (and the constant intvl). @pymc.deterministic def mu(rate=rate): return rate*intvl # Poisson likelihood function: count = pymc.Poisson('count', mu=mu, value=n, observed=True) return locals()
def Main(): # Create observation observation = pm.Poisson("obs", lambda_, value=count_data, observed=True) # Create model model = pm.Model([observation, lambda_1, lambda_2, tau]) # Solve using MCMC (Explained in Chapter 3) mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 1) # Get traces for parameters lambda_1_samples = mcmc.trace('lambda_1')[:] lambda_2_samples = mcmc.trace('lambda_2')[:] tau_samples = mcmc.trace('tau')[:] plot_data(lambda_1_samples, lambda_2_samples, tau_samples) solve_exercises(mcmc)
def run(self): self.validateinput() data = self.data data = self.fluctuate(data) if self.rndseed >= 0 else data # unpack background dictionaries backgroundkeys = self.backgroundsyst.keys() backgrounds = array([self.background[key] for key in backgroundkeys]) backgroundnormsysts = array( [self.backgroundsyst[key] for key in backgroundkeys]) # unpack object systematics dictionary objsystkeys = self.objsyst['signal'].keys() signalobjsysts = array( [self.objsyst['signal'][key] for key in objsystkeys]) backgroundobjsysts = array([]) if len(objsystkeys) > 0 and len(backgroundkeys) > 0: backgroundobjsysts = array([[ self.objsyst['background'][syst][bckg] for syst in objsystkeys ] for bckg in backgroundkeys]) recodim = len(data) resmat = self.response truthdim = len(resmat) import priors truth = priors.wrapper(priorname=self.prior, low=self.lower, up=self.upper, other_args=self.priorparams) bckgnuisances = [] for name, err in zip(backgroundkeys, backgroundnormsysts): if err < 0.: bckgnuisances.append( mc.Uniform('norm_%s' % name, value=1., lower=0., upper=3.)) else: bckgnuisances.append( mc.TruncatedNormal( 'gaus_%s' % name, value=0., mu=0., tau=1.0, a=(-1.0 / err if err > 0.0 else -inf), b=inf, observed=(False if err > 0.0 else True))) bckgnuisances = mc.Container(bckgnuisances) objnuisances = [ mc.Normal('gaus_%s' % name, value=self.systfixsigma, mu=0., tau=1.0, observed=(True if self.systfixsigma != 0 else False)) for name in objsystkeys ] objnuisances = mc.Container(objnuisances) # define potential to constrain truth spectrum if self.regularization: truthpot = self.regularization.getpotential(truth) #This is where the FBU method is actually implemented @mc.deterministic(plot=False) def unfold(truth=truth, bckgnuisances=bckgnuisances, objnuisances=objnuisances): smearbckg = 1. if len(backgroundobjsysts) > 0: smearbckg = smearbckg + dot(objnuisances, backgroundobjsysts) smearedbackgrounds = backgrounds * smearbckg bckgnormerr = array([ (-1. + nuis) / nuis if berr < 0. else berr for berr, nuis in zip(backgroundnormsysts, bckgnuisances) ]) bckg = dot(1. + bckgnuisances * bckgnormerr, smearedbackgrounds) reco = dot(truth, resmat) smear = 1. + dot(objnuisances, signalobjsysts) out = bckg + reco * smear return out unfolded = mc.Poisson('unfolded', mu=unfold, value=data, observed=True, size=recodim) allnuisances = mc.Container(bckgnuisances + objnuisances) modelelements = [unfolded, unfold, truth, allnuisances] if self.regularization: modelelements += [truthpot] model = mc.Model(modelelements) if self.use_emcee: from emcee_sampler import sample_emcee mcmc = sample_emcee(model, nwalkers=self.nwalkers, samples=self.nMCMC / self.nwalkers, burn=self.nBurn / self.nwalkers, thin=self.nThin) else: map_ = mc.MAP(model) map_.fit() mcmc = mc.MCMC(model) mcmc.use_step_method(mc.AdaptiveMetropolis, truth + allnuisances) mcmc.sample(self.nMCMC, burn=self.nBurn, thin=self.nThin) # mc.Matplot.plot(mcmc) self.trace = [ mcmc.trace('truth%d' % bin)[:] for bin in xrange(truthdim) ] self.nuisancestrace = {} for name, err in zip(backgroundkeys, backgroundnormsysts): if err < 0.: self.nuisancestrace[name] = mcmc.trace('norm_%s' % name)[:] if err > 0.: self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:] for name in objsystkeys: if self.systfixsigma == 0.: self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:] if self.monitoring: import monitoring monitoring.plot(self.name + '_monitoring', data, backgrounds, resmat, self.trace, self.nuisancestrace, self.lower, self.upper)
def turnover_piecewise_exponential_model(): # hyperpriors for team-level distributions std_dev_att = pm.Uniform('std_dev_att', lower=0, upper=50) # priors on coefficients baseline_hazards = pm.Normal('baseline_hazards', 0, .0001, size=num_pieces, value=baseline_starting_vals.values) two_minute_drill = pm.Normal('two_minute_drill', 0, .0001, value=-.01) offense_losing_badly = pm.Normal('offense_losing_badly', 0, .0001, value=-.01) offense_winning_greatly = pm.Normal('offense_winning_greatly', 0, .0001, value=.01) home = pm.Normal('home', 0, .0001, value=-.01) @pm.deterministic(plot=False) def tau_att(std_dev_att=std_dev_att): return std_dev_att**-2 # team-specific parameters atts_star = pm.Normal("atts_star", mu=0, tau=tau_att, size=num_teams, value=np.zeros(num_teams)) # trick to code the sum to zero contraint @pm.deterministic def atts(atts_star=atts_star): atts = atts_star.copy() atts = atts - np.mean(atts_star) return atts @pm.deterministic def lambdas(attacking_team=attacking_team, defending_team=defending_team, defending_team_is_home=defending_team_is_home, two_minute_drill=two_minute_drill, drive_is_two_minute_drill=drive_is_two_minute_drill, offense_losing_badly=offense_losing_badly, offense_is_losing_badly=offense_is_losing_badly, offense_winning_greatly=offense_winning_greatly, offense_is_winning_greatly=offense_is_winning_greatly, home=home, atts=atts, baseline_hazards=baseline_hazards, observed_exposures=observed_exposures, piece_i=piece_i): return observed_exposures * baseline_hazards[piece_i] * \ np.exp(home * defending_team_is_home + \ two_minute_drill * drive_is_two_minute_drill + \ offense_losing_badly * offense_is_losing_badly + \ offense_winning_greatly * offense_is_winning_greatly + \ atts[attacking_team]) drive_deaths = pm.Poisson("drive_deaths", lambdas, value=observed_drive_deaths_turnover, observed=True) @pm.potential def limit_sd(std_dev_att=std_dev_att): if std_dev_att < 0: return -np.inf return 0 @pm.potential def limit_tau(tau_att=tau_att): if tau_att > 10000: return -np.inf return 0 return locals()
# lamb = np.empty(Nobs,dtype=object) # for i in range(Nobs): # lamb[i] = pymc.Gamma('lamb_%i' %(i+1), alpha = alpha, beta = beta, value=0.5) @pymc.deterministic def poi_mu(lamb = lamb, t = t): return lamb*t # @pymc.stochastic # def data_gen(poi_mu,y): # return -np.sum(poi_mu) + np.sum(np.log(poi_mu)*y) # # # @pymc.stochastic # # def data_gen(poi_mu, y): # # return pymc.Poisson('data',mu=poi_mu, value = y, observed=True) # # data = pymc.Poisson('data',mu=poi_mu, value = y, observed=True) sampler = pymc.MCMC([lamb,beta,data,y,t]) sampler.use_step_method(pymc.Gibbs,lamb[0],beta) sampler.sample(iter=10000,burn=3000,thin=10) print np.mean(beta.trace()) # print np.mean(lamb.trace()) for i in range(Nobs): print np.mean(lamb[i].trace()) # MCMC ## Define prior distribution with the initial value
def make_poisson_hmm(y_data, X_data, initial_params): r""" Construct a PyMC2 scalar poisson-emmisions HMM model. TODO: Update to match normal model design. The model takes the following form: .. math:: y_t &\sim \operatorname{Poisson}(\exp(x_t^{(S_t)\top} \beta^{(S_t)})) \\ \beta^{(S_t)}_i &\sim \operatorname{N}(m^{(S_t)}, C^{(S_t)}), \quad i \in \{1,\dots,M\} \\ S_t \mid S_{t-1} &\sim \operatorname{Categorical}(\pi^{(S_{t-1})}) \\ \pi^{(S_t-1)} &\sim \operatorname{Dirichlet}(\alpha^{(S_{t-1})}) where :math:`C^{(S_t)} = \lambda_i^{(S_t) 2} \tau^{(S_t) 2}` and .. math:: \lambda^{(S_t)}_i &\sim \operatorname{Cauchy}^{+}(0, 1) \\ \tau^{(S_t)} &\sim \operatorname{Cauchy}^{+}(0, 1) for observations :math:`y_t` in :math:`t \in \{0, \dots, T\}`, features :math:`x_t^{(S_t)} \in \mathbb{R}^M`, regression parameters :math:`\beta^{(S_t)}`, state sequences :math:`\{S_t\}^T_{t=1}` and state transition probabilities :math:`\pi \in [0, 1]^{K}`. :math:`\operatorname{Cauchy}^{+}` is the standard half-Cauchy distribution and :math:`\operatorname{N}` is the normal/Gaussian distribution. The set of random variables, :math:`\mathcal{S} = \{\{\beta^{(k)}, \lambda^{(k)}, \tau^{(k)}, \tau^{(k)}, \pi^{(k)}\}_{k=1}^K, \{S_t\}^T_{t=1}\}`, are referred to as "stochastics" throughout the code. Parameters ========== y_data: pandas.DataFrame Usage/response observations :math:`y_t`. X_data: list of pandas.DataFrame List of design matrices for each state, i.e. :math:`x_t^{(S_t)}`. Each must span the entire length of observations (i.e. `y_data`). initial_params: NormalHMMInitialParams The initial parameters, which include :math:`\pi_0, m^{(k)}, \alpha^{(k)}, V^{(k)}`. Ignores `V` parameters. FIXME: using the "Normal" initial params objects is only temporary. Returns ======= A ``pymc.Model`` object used for sampling. """ N_states = len(X_data) N_obs = X_data[0].shape[0] alpha_trans = initial_params.alpha_trans trans_mat = TransProbMatrix("trans_mat", alpha_trans, value=initial_params.trans_mat) states = HMMStateSeq("states", trans_mat, N_obs, p0=initial_params.p0, value=initial_params.states) betas = [] etas = [] lambdas = [] for s in range(N_states): initial_beta = None if initial_params.betas is not None: initial_beta = initial_params.betas[s] size_s = X_data[s].shape[1] size_s = size_s if size_s > 1 else None lambda_s = pymc.HalfCauchy('lambda-{}'.format(s), 0., 1., size=size_s) eta_s = pymc.HalfCauchy('tau-{}'.format(s), 0., 1.) beta_s = pymc.Normal('beta-{}'.format(s), 0., (lambda_s * eta_s)**(-2), value=initial_beta, size=size_s) betas += [beta_s] etas += [eta_s] lambdas += [lambda_s] mu_reg = HMMLinearCombination('mu', X_data, betas, states, trace=False) @pymc.deterministic(trace=True, plot=False) def mu(mu_reg_=mu_reg): return np.exp(mu_reg_) if y_data is not None: y_data = np.ma.masked_invalid(y_data).astype(np.object) y_data.set_fill_value(None) y_rv = pymc.Poisson('y', mu, value=y_data, observed=True if y_data is not None else False) del initial_params, s, beta_s, size_s, lambda_s, eta_s return pymc.Model(locals())
def model_factory(): """Build a PyMC model and return it as a dict""" x = pymc.Uniform("x", value=S0[0], lower=XMIN, upper=XMAX) y = pymc.Uniform("y", value=S0[1], lower=YMIN, upper=YMAX) I = pymc.Uniform("I", value=I0, lower=IMIN, upper=IMAX) # Distributions for the cross sections # Just the interstitial material s_i_xs = P.interstitial_material.Sigma_T interstitial_xs = pymc.Uniform( "Sigma_inter", s_i_xs * (1 - XS_DELTA), s_i_xs * (1 + XS_DELTA), value=s_i_xs, observed=False, ) # All the rest mu_xs = np.array([M.Sigma_T for M in P.materials]) building_xs = pymc.Uniform( "Sigma", mu_xs * (1 - XS_DELTA), mu_xs * (1 + XS_DELTA), value=mu_xs, observed=False, ) # Predictions @pymc.deterministic(plot=False) def model_pred(x=x, y=y, I=I, interstitial_xs_p=interstitial_xs, building_xs_p=building_xs): # The _p annotation is so that I can access the actual stochastics # in the enclosing scope, see down a couple lines where I resample inter_mat = gefry3.Material(1.0, interstitial_xs_p) building_mats = [gefry3.Material(1.0, s) for s in building_xs_p] # Force the cross sections to be resampled interstitial_xs.set_value(interstitial_xs.random(), force=True) building_xs.set_value(building_xs.random(), force=True) return P( [x, y], I, inter_mat, building_mats, ) background = pymc.Poisson( "b", DWELL * BG, value=DWELL * BG, observed=False, plot=False, ) @pymc.stochastic(plot=False, observed=True) def observed_response(value=[], model_pred=model_pred, background=background): resp = model_pred + background return multivariate_normal.logpdf(data, mean=resp, cov=np.diag(resp)) return { "x": x, "y": y, "I": I, "interstitial_xs": interstitial_xs, "building_xs": building_xs, "model_pred": model_pred, "background": background, "observed_response": observed_response, }
# -*- coding: utf-8 -*- """ Created on Sun Aug 17 15:34:47 2014 @author: Koangel """ import pymc as pm a = [1, 2, 3, 4, 5] val = pm.Poisson("obs", 0.1, value=a, observed=True) print val.value val.random() print val.value val1 = pm.Poisson("obs1", 0.1) print val1.value val1.random() print val1.value
import pymc as pm import numpy as np import matplotlib.pyplot as plt parameter = pm.Exponential( "poisson_param", 1 ) data_generator = pm.Poisson( "data_generator", parameter ) data_plus_one = data_generator + 1 # 'parents' influence another variable # 'children' subject of parent vars parameter.children data_generator.parents data_generator.children # 'value' attribute parameter.value data_generator.value data_plus_one.value # 'stochastic' vars - still random even if parents are known # 'deterministic' vars - not random if parents are known # Initializing variables # * name argument - retrieves posterior dist # * class specific arguments # * size - multivariate indp array of stochastic vars some_var = pm.DiscreteUniform( "discrete_uni_var", 0, 4 ) betas = pm.Uniform( "betas", 0, 1, size=10 ) betas.value
lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_count_data) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out observation = pm.Poisson("obs", lambda_, value=count_data, observed=True) model = pm.Model([observation, lambda_1, lambda_2, tau]) mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 1) lambda_1_samples = mcmc.trace('lambda_1')[:] lambda_2_samples = mcmc.trace('lambda_2')[:] tau_samples = mcmc.trace('tau')[:] # histogram of the samples: ax = plt.subplot(311) ax.set_autoscaley_on(False)
doc='Switchpoint[year]') avg = np.mean(stormsNumbers) early_mean = pm.Exponential('early_mean', beta=1./avg) late_mean = pm.Exponential('late_mean', beta=1./avg) @ pm.deterministic(plot=False) def rate(s=switchpoint, e=early_mean, l=late_mean): # Concatenate Poisson means out = np.zeros(len(stormsNumbers)) out[:s] = e out[s:] = l return out storms = pm.Poisson('storms', mu=rate, value=stormsNumbers, observed=True) storms_model = pm.Model([storms, early_mean, late_mean, rate]) strmsM = pm.MCMC(storms_model) strmsM.sample(iter=40000, burn=1000, thin=20) plt.hist(strmsM.trace('late_mean')[:], edgecolor="k") general.set_grid_to_plot() plt.savefig(general.folderPath2 + "exp2_late_mean.png") plt.clf() plt.hist(strmsM.trace('early_mean')[:], edgecolor="k")
def compare_groups(list1, list2): data = list1 + list2 count_data = np.array(data) n_count_data = len(count_data) plt.bar(np.arange(n_count_data), count_data, color="#348ABD") plt.xlabel("Time (days)") plt.ylabel("count of text-msgs received") plt.title( "Did the viewers' ad viewing increase with the number of ads shown?") plt.xlim(0, n_count_data) #plt.show() alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts print alpha lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_count_data) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out observation = pm.Poisson("obs", lambda_, value=count_data, observed=True) model = pm.Model([observation, lambda_1, lambda_2, tau]) mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 1) lambda_1_samples = mcmc.trace('lambda_1')[:] lambda_2_samples = mcmc.trace('lambda_2')[:] tau_samples = mcmc.trace('tau')[:] print tau_samples # histogram of the samples: ax = plt.subplot(311) ax.set_autoscaley_on(False) plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85, label="posterior of $\lambda_1$", color="#A60628", normed=True) plt.legend(loc="upper left") plt.title(r"""Posterior distributions of the variables $\lambda_1,\;\lambda_2,\;\tau$""") plt.xlim([0, 6]) plt.ylim([0, 7]) plt.xlabel("$\lambda_1$ value") ax = plt.subplot(312) ax.set_autoscaley_on(False) plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85, label="posterior of $\lambda_2$", color="#7A68A6", normed=True) plt.legend(loc="upper left") plt.xlim([0, 6]) plt.ylim([0, 7]) plt.xlabel("$\lambda_2$ value") plt.subplot(313) w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples) plt.hist(tau_samples, bins=n_count_data, alpha=1, label=r"posterior of $\tau$", color="#467821", weights=w, rwidth=2.) plt.xticks(np.arange(n_count_data)) plt.legend(loc="upper left") plt.ylim([0, .75]) plt.xlim([0, len(count_data)]) plt.xlabel(r"$\tau$ (iterations)") plt.ylabel("probability") plt.show()