def __init__(self, name, role=None, groups=[], lam=None, env=None): super(PoissonStudent, self).__init__(name, role, groups, env) if lam is not None: self.lam = lam else: self.lam = Uniform('lam_%s' % self.name, lower=0, upper=1) self.expT = Exponential('tau_%s' % self.name, self.lam) self.dt = [] self.timestamps = [] self.t = 0 self.params = [self.lam, self.expT]
def make_model(self, data): assert len(data) == 2, 'There must be exactly two data arrays' name1, name2 = sorted(data.keys()) y1 = np.array(data[name1]) y2 = np.array(data[name2]) assert y1.ndim == 1 assert y2.ndim == 1 y = np.concatenate((y1, y2)) mu_m = np.mean(y) mu_p = 0.000001 * 1 / np.std(y)**2 sigma_low = np.std(y) / 1000 sigma_high = np.std(y) * 1000 # the five prior distributions for the parameters in our model group1_mean = Normal('group1_mean', mu_m, mu_p) group2_mean = Normal('group2_mean', mu_m, mu_p) group1_std = Uniform('group1_std', sigma_low, sigma_high) group2_std = Uniform('group2_std', sigma_low, sigma_high) nu_minus_one = Exponential('nu_minus_one', 1 / 29) @deterministic(plot=False) def nu(n=nu_minus_one): out = n + 1 return out @deterministic(plot=False) def lam1(s=group1_std): out = 1 / s**2 return out @deterministic(plot=False) def lam2(s=group2_std): out = 1 / s**2 return out group1 = NoncentralT(name1, group1_mean, lam1, nu, value=y1, observed=True) group2 = NoncentralT(name2, group2_mean, lam2, nu, value=y2, observed=True) return Model({ 'group1': group1, 'group2': group2, 'group1_mean': group1_mean, 'group2_mean': group2_mean, 'group1_std': group1_std, 'group2_std': group2_std, })
def set_models(self): """Define models for each group. :return: None """ for group in ['control', 'variant']: self.stochastics[group] = Exponential(group, self.stochastics[group + '_lambda'], value=getattr(self, group), observed=True)
def update(self, statements): """ Updates the sample graph using the history of xAPI statements Parameters ---------- statements: list[Statements] A list of xAPI statements Return ------ self: PoissonStudent The current instance of a PoissonStudent """ self.timestamps = self._get_timestamps(statements) tt = zip([0] + self.timestamps[:-1], self.timestamps) dt = [y - x for x, y in tt] self.dt = np.array(dt, dtype=float) self.expT = Exponential('tau_%s' % self.name, self.lam, value=self.dt, observed=True) self.params.append(self.expT) return self
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ obs = np.concatenate((self.control, self.variant)) obs_mean, obs_sigma = np.mean(obs), np.std(obs) for group in ['control', 'variant']: self.stochastics[group + '_mean'] = Normal(group + '_mean', obs_mean, 0.000001 / obs_sigma**2) self.stochastics[group + '_sigma'] = Uniform( group + '_sigma', obs_sigma / 1000, obs_sigma * 1000) self.stochastics[group + '_nu_minus_one'] = Exponential( group + '_nu_minus_one', 1 / 29)
def _model(data, robust=False): # priors might be adapted here to be less flat mu = Normal('mu', 0, 0.000001, size=2) sigma = Uniform('sigma', 0, 1000, size=2) rho = Uniform('r', -1, 1) # we have a further parameter (prior) for the robust case if robust == True: nu = Exponential('nu', 1 / 29., 1) # we model nu as an Exponential plus one @pymc.deterministic def nuplus(nu=nu): return nu + 1 @pymc.deterministic def precision(sigma=sigma, rho=rho): ss1 = float(sigma[0] * sigma[0]) ss2 = float(sigma[1] * sigma[1]) rss = float(rho * sigma[0] * sigma[1]) return inv(np.mat([[ss1, rss], [rss, ss2]])) if robust == True: # log-likelihood of multivariate t-distribution @pymc.stochastic(observed=True) def mult_t(value=data.T, mu=mu, tau=precision, nu=nuplus): k = float(tau.shape[0]) res = 0 for r in value: delta = r - mu enum1 = gammaln((nu + k) / 2.) + 0.5 * log(det(tau)) denom = (k / 2.) * log(nu * pi) + gammaln(nu / 2.) enum2 = (-(nu + k) / 2.) * log(1 + (1 / nu) * delta.dot(tau).dot(delta.T)) result = enum1 + enum2 - denom res += result[0] return res[0, 0] else: mult_n = MvNormal('mult_n', mu=mu, tau=precision, value=data.T, observed=True) return locals()
from numpy.ma import masked_array import numpy as np # Missing values indicated by -999 placeholder values disasters_array = np.array([ 4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, -999, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, -999, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1 ]) # Switchpoint switch = DiscreteUniform('switch', lower=0, upper=110) # Early mean early_mean = Exponential('early_mean', beta=1) # Late mean late_mean = Exponential('late_mean', beta=1) @deterministic(plot=False) def rates(s=switch, e=early_mean, l=late_mean): """Allocate appropriate mean to time series""" out = np.empty(len(disasters_array)) # Early mean prior to switchpoint out[:s] = e # Late mean following switchpoint out[s:] = l return out
def test_mixture_of_mixture(self): if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 nbr = 4 with Model() as model: # mixtures components g_comp = Normal.dist( mu=Exponential("mu_g", lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr ) l_comp = LogNormal.dist( mu=Exponential("mu_l", lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr ) # weight vector for the mixtures g_w = Dirichlet("g_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None, shape=(nbr,)) l_w = Dirichlet("l_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None, shape=(nbr,)) # mixture components g_mix = Mixture.dist(w=g_w, comp_dists=g_comp) l_mix = Mixture.dist(w=l_w, comp_dists=l_comp) # mixture of mixtures mix_w = Dirichlet("mix_w", a=floatX(np.ones(2)), transform=None, shape=(2,)) mix = Mixture("mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x)) test_point = model.recompute_initial_point() def mixmixlogp(value, point): floatX = aesara.config.floatX priorlogp = ( st.dirichlet.logpdf( x=point["g_w"], alpha=np.ones(nbr) * 0.0000001, ).astype(floatX) + st.expon.logpdf(x=point["mu_g"]).sum(dtype=floatX) + st.dirichlet.logpdf( x=point["l_w"], alpha=np.ones(nbr) * 0.0000001, ).astype(floatX) + st.expon.logpdf(x=point["mu_l"]).sum(dtype=floatX) + st.dirichlet.logpdf( x=point["mix_w"], alpha=np.ones(2), ).astype(floatX) ) complogp1 = st.norm.logpdf(x=value, loc=point["mu_g"]).astype(floatX) mixlogp1 = logsumexp( np.log(point["g_w"]).astype(floatX) + complogp1, axis=-1, keepdims=True ) complogp2 = st.lognorm.logpdf(value, 1.0, 0.0, np.exp(point["mu_l"])).astype(floatX) mixlogp2 = logsumexp( np.log(point["l_w"]).astype(floatX) + complogp2, axis=-1, keepdims=True ) complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1) mixmixlogpg = logsumexp( np.log(point["mix_w"]).astype(floatX) + complogp_mix, axis=-1, keepdims=False ) return priorlogp, mixmixlogpg value = np.exp(self.norm_x)[:, None] priorlogp, mixmixlogpg = mixmixlogp(value, test_point) # check logp of mixture assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) # check model logp assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) # check input and check logp again test_point["g_w"] = np.asarray([0.1, 0.1, 0.2, 0.6]) test_point["mu_g"] = np.exp(np.random.randn(nbr)) priorlogp, mixmixlogpg = mixmixlogp(value, test_point) assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol)
class PoissonStudent(Student): """ Student model that generates activity according to a Poisson distribution. Parameters ---------- name: str The name of the student lam: float or pymc.Distribution `lambda` parameter for the Poisson distribution """ def __init__(self, name, role=None, groups=[], lam=None, env=None): super(PoissonStudent, self).__init__(name, role, groups, env) if lam is not None: self.lam = lam else: self.lam = Uniform('lam_%s' % self.name, lower=0, upper=1) self.expT = Exponential('tau_%s' % self.name, self.lam) self.dt = [] self.timestamps = [] self.t = 0 self.params = [self.lam, self.expT] def study(self): tau = self.expT.random() self.dt.append(tau) self.t += tau s = { 'actor': self.name, 'verb': 'studied', 'object': 'resource', 'timestamp': self.t } if self.env is not None: self.env._statements[self.name].append(s) return s def _get_timestamps(self, statements): """ Extract timestamps from xAPI statement Parameters ---------- statements: list[Statement] The xAPI statements used to compute time intervals. Return ------ timestamps: float A sorted list of timestamps """ timestamps = [] for statement in statements: actor, name = statement['actor'], self.name if actor != name: raise WrongAssignment('Statement with actor %s assigned to' ' %s' % (actor, name)) timestamps.append(statement['timestamp']) return sorted(timestamps) def update(self, statements): """ Updates the sample graph using the history of xAPI statements Parameters ---------- statements: list[Statements] A list of xAPI statements Return ------ self: PoissonStudent The current instance of a PoissonStudent """ self.timestamps = self._get_timestamps(statements) tt = zip([0] + self.timestamps[:-1], self.timestamps) dt = [y - x for x, y in tt] self.dt = np.array(dt, dtype=float) self.expT = Exponential('tau_%s' % self.name, self.lam, value=self.dt, observed=True) self.params.append(self.expT) return self
from pymc.distributions import Impute import numpy as np # Missing values indicated by None placeholders disasters_array = np.array([ 4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, None, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, None, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1 ]) # Switchpoint s = DiscreteUniform('s', lower=0, upper=110) # Early mean e = Exponential('e', beta=1) # Late mean l = Exponential('l', beta=1) @deterministic(plot=False) def r(s=s, e=e, l=l): """Allocate appropriate mean to time series""" out = np.empty(len(disasters_array)) # Early mean prior to switchpoint out[:s] = e # Late mean following switchpoint out[s:] = l return out
print("Noise = {} Jy/Beam".format(sigma)) print("Base Flux = {} Jy/Beam".format(fluxscale)) #PyMC stuff natoms = 1 xpos = [] ypos = [] flux = [] for i in range(0, natoms): xpos.append(Uniform('xpos{}'.format(i), lower=0, upper=511)) ypos.append(Uniform('ypos{}'.format(i), lower=0, upper=511)) flux.append(Exponential('flux{}'.format(i), beta=sigma)) @deterministic def chisq(x=xpos, y=ypos, f=flux): atoms = [] for i in range(0, natoms): atoms.append((x[i], y[i], fluxscale * np.exp(f[i]))) return base + (1. / sigmasq) * sk.deconvolve(atoms) @potential def logfitness(c=chisq): return -0.5 * c
def __init__(self, fname, playedto=None): super(LeagueMultiHomeModel, self).__init__() league = League(fname, playedto) N = len(league.teams) def outcome_eval(home=None, away=None): if home > away: return 1 if home < away: return -1 if home == away: return 0 def clip_rate(val): if val > 0.2: return val else: return 0.2 self.goal_rate = np.empty(N, dtype=object) self.home_adv = np.empty(N, dtype=object) self.def_rate = np.empty(N, dtype=object) self.match_rate = np.empty(len(league.games) * 2, dtype=object) self.outcome_future = np.empty(len(league.games), dtype=object) self.match_goals_future = np.empty(len(league.future_games) * 2, dtype=object) self.league = league fmesh = np.arange(0., league.n_days + 2.) for t in league.teams.values(): self.goal_rate[t.team_id] = Exponential('goal_rate_%i' % t.team_id, beta=1.) self.def_rate[t.team_id] = Normal('def_rate_%i' % t.team_id, tau=1., mu=0.) self.home_adv[t.team_id] = Normal('home_adv_%i' % t.team_id, tau=1., mu=0.) for game in range(len(league.games)): self.match_rate[2 * game] = Poisson( 'match_rate_%i' % (2 * game), mu=Deterministic( eval=clip_rate, parents={ 'val': self.goal_rate[league.games[game].hometeam.team_id] - self.def_rate[league.games[game].awayteam.team_id] + self.home_adv[league.games[game].hometeam.team_id] }, doc='clipped goal rate', name='clipped_h_%i' % game), value=league.games[game].homescore, observed=True) self.match_rate[2 * game + 1] = Poisson( 'match_rate_%i' % (2 * game + 1), mu=Deterministic( eval=clip_rate, parents={ 'val': self.goal_rate[league.games[game].awayteam.team_id] - self.def_rate[league.games[game].hometeam.team_id] }, doc='clipped goal rate', name='clipped_a_%i' % game), value=league.games[game].awayscore, observed=True) for game in range(len(league.future_games)): self.match_goals_future[2 * game] = Poisson( 'match_goals_future_%i_home' % game, mu=Deterministic( eval=clip_rate, parents={ 'val': self.goal_rate[league.future_games[game][0].team_id] - self.def_rate[league.future_games[game][1].team_id] + self.home_adv[league.future_games[game][0].team_id] }, doc='clipped goal rate', name='clipped_fut_h_%i' % game)) self.match_goals_future[2 * game + 1] = Poisson( 'match_goals_future_%i_away' % game, mu=Deterministic( eval=clip_rate, parents={ 'val': self.goal_rate[league.future_games[game][1].team_id] - self.def_rate[league.future_games[game][0].team_id] }, doc='clipped goal rate', name='clipped_fut_a_%i' % game)) self.outcome_future[game] = Deterministic( eval=outcome_eval, parents={ 'home': self.match_goals_future[2 * game], 'away': self.match_goals_future[2 * game + 1] }, name='match_outcome_future_%i' % game, dtype=int, doc='The outcome of the match')
def __init__(self, fname, playedto=None): super(LeagueFullModel, self).__init__() league = League(fname, playedto) N = len(league.teams) def outcome_eval(home=None, away=None): if home > away: return 1 if home < away: return -1 if home == away: return 0 def clip_rate(val): if val > 0.2: return val else: return 0.2 def linfun(x, c): return 0. * x + c # The covariance dtrm C is valued as a Covariance object. #@pm.deterministic #def C(eval_fun = gp.matern.euclidean, diff_degree=diff_degree, amp=amp, scale=scale): # return gp.NearlyFullRankCovariance(eval_fun, diff_degree=diff_degree, amp=amp, scale=scale) self.goal_rate = np.empty(N, dtype=object) self.def_rate = np.empty(N, dtype=object) self.goal_var = np.empty(N, dtype=object) self.def_var = np.empty(N, dtype=object) self.match_rate = np.empty(len(league.games) * 2, dtype=object) self.outcome_future = np.empty(len(league.games), dtype=object) self.match_goals_future = np.empty(len(league.future_games) * 2, dtype=object) self.home_adv = Uniform(name='home_adv', lower=0., upper=2.0) self.league = league fmesh = np.arange(0., league.n_days + 2.) for t in league.teams.values(): # Prior parameters of C diff_degree_g = pm.Uniform('diff_degree_g_%i' % t.team_id, 1., 3) amp_g = pm.Uniform('amp_g_%i' % t.team_id, .01, 2.) scale_g = pm.Uniform('scale_g_%i' % t.team_id, 1., 10.) diff_degree_d = pm.Uniform('diff_degree_d_%i' % t.team_id, 1., 3) amp_d = pm.Uniform('amp_d_%i' % t.team_id, .01, 2.) scale_d = pm.Uniform('scale_d_%i' % t.team_id, 1., 10.) @pm.deterministic(name='C_d%i' % t.team_id) def C_d(eval_fun=gp.matern.euclidean, diff_degree=diff_degree_d, amp=amp_d, scale=scale_d): return gp.NearlyFullRankCovariance(eval_fun, diff_degree=diff_degree, amp=amp, scale=scale) @pm.deterministic(name='C_g%i' % t.team_id) def C_g(eval_fun=gp.matern.euclidean, diff_degree=diff_degree_g, amp=amp_g, scale=scale_g): return gp.NearlyFullRankCovariance(eval_fun, diff_degree=diff_degree, amp=amp, scale=scale) self.goal_rate[t.team_id] = Exponential('goal_rate_%i' % t.team_id, beta=1) self.def_rate[t.team_id] = Exponential('def_rate_%i' % t.team_id, beta=1) @pm.deterministic(name='M_d%i' % t.team_id) def M_d(eval_fun=linfun, c=self.def_rate[t.team_id]): return gp.Mean(eval_fun, c=c) @pm.deterministic(name='M_g%i' % t.team_id) def M_g(eval_fun=linfun, c=self.goal_rate[t.team_id]): return gp.Mean(eval_fun, c=c) self.def_var[t.team_id] = gp.GPSubmodel('smd_%i' % t.team_id, M_d, C_d, fmesh) self.goal_var[t.team_id] = gp.GPSubmodel('smg_%i' % t.team_id, M_g, C_g, fmesh) for game in range(len(league.games)): gd = int(game / (league.n_teams / 2)) assert (gd < league.n_days) self.match_rate[2 * game] = Poisson( 'match_rate_%i' % (2 * game), mu=Deterministic( eval=clip_rate, parents={ 'val': self.goal_var[ league.games[game].hometeam.team_id].f_eval[gd] - self.def_var[ league.games[game].awayteam.team_id].f_eval[gd] + self.home_adv }, doc='clipped goal rate', name='clipped_h_%i' % game), value=league.games[game].homescore, observed=True) self.match_rate[2 * game + 1] = Poisson( 'match_rate_%i' % (2 * game + 1), mu=Deterministic( eval=clip_rate, parents={ 'val': self.goal_var[ league.games[game].awayteam.team_id].f_eval[gd] - self.def_var[ league.games[game].hometeam.team_id].f_eval[gd] }, doc='clipped goal rate', name='clipped_a_%i' % game), value=league.games[game].awayscore, observed=True) for game in range(len(league.future_games)): gd = league.n_days self.match_goals_future[2 * game] = Poisson( 'match_goals_future_%i_home' % game, mu=Deterministic( eval=clip_rate, parents={ 'val': self.goal_var[league.future_games[game] [0].team_id].f_eval[gd] - self.def_var[league.future_games[game] [1].team_id].f_eval[gd] + self.home_adv }, doc='clipped goal rate', name='clipped_fut_h_%i' % game)) self.match_goals_future[2 * game + 1] = Poisson( 'match_goals_future_%i_away' % game, mu=Deterministic(eval=clip_rate, parents={ 'val': self.goal_var[league.future_games[game] [1].team_id].f_eval[gd] - self.def_var[league.future_games[game] [0].team_id].f_eval[gd] }, doc='clipped goal rate', name='clipped_fut_a_%i' % game)) self.outcome_future[game] = Deterministic( eval=outcome_eval, parents={ 'home': self.match_goals_future[2 * game], 'away': self.match_goals_future[2 * game + 1] }, name='match_outcome_future_%i' % game, dtype=int, doc='The outcome of the match')
Claim: advertising on the tube increased the rate of conversions. (Conversions_t | start_t, before_rate, after_rate) ~ Poisson(rate_t) start_t ~ DiscreteUniform(first_period, last_period) //can be any period with equal probability rate_t = ( before_rate ... //start_t // .. after_rate) ''' conversions_data = np.array( [1, 2, 1, 2, 3, 4, 1, 2, 3, 5, 7, 3, 8, 7, 4, 5, 7]) start_t = DiscreteUniform("start_t", lower=0, upper=len(conversions_data)) before_mean = Exponential('before_mean', beta=1.) after_mean = Exponential('after_mean', beta=1.) @deterministic(plot=False) def rate(start_period=start_t, before_period_mean=before_mean, after_period_mean=after_mean): output = np.empty(len(conversions_data)) output[:start_period] = before_period_mean output[start_period:] = after_period_mean return output conversions = Poisson("conversions", mu=rate,