def test_common_errors(self): with pytest.raises(ValueError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2, 2]) m.logp() assert "Length of" in str(e.value) with pytest.raises(ValueError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size=[2, 2, 2]) m.logp() assert "Length of" in str(e.value) with pytest.raises(TypeError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size="foo") m.logp() assert "Unrecognized" in str(e.value) with pytest.raises(TypeError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size=["foo"]) m.logp() assert "Unrecognized" in str(e.value) with pytest.raises(ValueError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size=[Ellipsis, Ellipsis]) m.logp() assert "Double Ellipsis" in str(e.value)
def test_simple(self): # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s**-2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(10000, 5000, progress_bar=0) # Check length of value assert_equal(len(x.value), 100) # Check size of trace tr = M.trace('x')() assert_equal(shape(tr), (5000, 2)) sd2 = [-2 < i < 2 for i in ravel(tr)] # Check for standard normal output assert_almost_equal(sum(sd2) / 10000., 0.95, decimal=1)
def test_density_scaling_with_generator(self): # We have different size generators def true_dens(): g = gen1() for i, point in enumerate(g): yield stats.norm.logpdf(point).sum() * 10 t = true_dens() # We have same size models with pm.Model() as model1: Normal("n", observed=gen1(), total_size=100) p1 = aesara.function([], model1.logp()) with pm.Model() as model2: gen_var = generator(gen2()) Normal("n", observed=gen_var, total_size=100) p2 = aesara.function([], model2.logp()) for i in range(10): _1, _2, _t = p1(), p2(), next(t) decimals = select_by_precision(float64=7, float32=1) np.testing.assert_almost_equal( _1, _t, decimal=decimals) # Value O(-50,000) np.testing.assert_almost_equal(_1, _2)
def test_allinmodel(): model1 = Model() model2 = Model() with model1: x1 = Normal("x1", mu=0, sigma=1) y1 = Normal("y1", mu=0, sigma=1) with model2: x2 = Normal("x2", mu=0, sigma=1) y2 = Normal("y2", mu=0, sigma=1) x1 = model1.rvs_to_values[x1] y1 = model1.rvs_to_values[y1] x2 = model2.rvs_to_values[x2] y2 = model2.rvs_to_values[y2] starting.allinmodel([x1, y1], model1) starting.allinmodel([x1], model1) with pytest.raises( ValueError, match=r"Some variables not in the model: \['x2', 'y2'\]"): starting.allinmodel([x2, y2], model1) with pytest.raises(ValueError, match=r"Some variables not in the model: \['x2'\]"): starting.allinmodel([x2, y1], model1) with pytest.raises(ValueError, match=r"Some variables not in the model: \['x2'\]"): starting.allinmodel([x2], model1)
def test_container_parents(self): A = Normal('A', 0, 1) B = Normal('B', 0, 1) C = Normal('C', [A, B], 1) assert_equal(Container([A, B]).value, [A.value, B.value]) assert_equal(C.parents.value['mu'], [A.value, B.value])
def test_density_scaling(self): with pm.Model() as model1: Normal("n", observed=[[1]], total_size=1) p1 = aesara.function([], model1.logp()) with pm.Model() as model2: Normal("n", observed=[[1]], total_size=2) p2 = aesara.function([], model2.logp()) assert p1() * 2 == p2()
def make_model(self, data): assert len(data) == 2, 'There must be exactly two data arrays' name1, name2 = sorted(data.keys()) y1 = np.array(data[name1]) y2 = np.array(data[name2]) assert y1.ndim == 1 assert y2.ndim == 1 y = np.concatenate((y1, y2)) mu_m = np.mean(y) mu_p = 0.000001 * 1 / np.std(y)**2 sigma_low = np.std(y) / 1000 sigma_high = np.std(y) * 1000 # the five prior distributions for the parameters in our model group1_mean = Normal('group1_mean', mu_m, mu_p) group2_mean = Normal('group2_mean', mu_m, mu_p) group1_std = Uniform('group1_std', sigma_low, sigma_high) group2_std = Uniform('group2_std', sigma_low, sigma_high) nu_minus_one = Exponential('nu_minus_one', 1 / 29) @deterministic(plot=False) def nu(n=nu_minus_one): out = n + 1 return out @deterministic(plot=False) def lam1(s=group1_std): out = 1 / s**2 return out @deterministic(plot=False) def lam2(s=group2_std): out = 1 / s**2 return out group1 = NoncentralT(name1, group1_mean, lam1, nu, value=y1, observed=True) group2 = NoncentralT(name2, group2_mean, lam2, nu, value=y2, observed=True) return Model({ 'group1': group1, 'group2': group2, 'group1_mean': group1_mean, 'group2_mean': group2_mean, 'group1_std': group1_std, 'group2_std': group2_std, })
def test_free_rv(self): with pm.Model() as model4: Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p4 = aesara.function([], model4.logp()) with pm.Model() as model5: n = Normal("n", total_size=[2, Ellipsis, 2], size=(2, 2)) p5 = aesara.function([n.tag.value_var], model5.logp()) assert p4() == p5(pm.floatX([[1]])) assert p4() == p5(pm.floatX([[1, 1], [1, 1]]))
def simple_arbitrary_det(): scalar_type = at.dscalar if aesara.config.floatX == "float64" else at.fscalar @as_op(itypes=[scalar_type], otypes=[scalar_type]) def arbitrary_det(value): return value with Model() as model: a = Normal("a") b = arbitrary_det(a) Normal("obs", mu=b.astype("float64"), observed=floatX_array([1, 3, 5])) return model.compute_initial_point(), model
def toy_model(tau=10000, prior='Beta0.5'): b_obs = 200 f_AB = 400 f_CB = 1000 f_CA = 600 A = np.array([0, f_AB, f_CA, 0, f_CB, 0]) if prior == 'Normal': ABp = Normal('ABp', mu=0.5, tau=100, trace=True) CBp = Normal('CBp', mu=0.5, tau=100, trace=True) CAp = Normal('CAp', mu=0.5, tau=100, trace=True) elif prior == 'Uniform': ABp = Uniform('ABp', lower=0.0, upper=1.0, trace=True) CBp = Uniform('CBp', lower=0.0, upper=1.0, trace=True) CAp = Uniform('CAp', lower=0.0, upper=1.0, trace=True) elif prior == 'Beta0.25': ABp = Beta('ABp', alpha=0.25, beta=0.25, trace=True) CBp = Beta('CBp', alpha=0.25, beta=0.25, trace=True) CAp = Beta('CAp', alpha=0.25, beta=0.25, trace=True) elif prior == 'Beta0.5': ABp = Beta('ABp', alpha=0.5, beta=0.5, trace=True) CBp = Beta('CBp', alpha=0.5, beta=0.5, trace=True) CAp = Beta('CAp', alpha=0.5, beta=0.5, trace=True) elif prior == 'Beta2': ABp = Beta('ABp', alpha=2, beta=2, trace=True) CBp = Beta('CBp', alpha=2, beta=2, trace=True) CAp = Beta('CAp', alpha=2, beta=2, trace=True) elif prior == 'Gamma': ABp = Gamma('ABp', alpha=1, beta=0.5, trace=True) CBp = Gamma('CBp', alpha=1, beta=0.5, trace=True) CAp = Gamma('CAp', alpha=1, beta=0.5, trace=True) AB1 = ABp AB3 = 1 - ABp CB4 = CBp CB5 = 1 - CBp CA42 = CAp CA52 = 1 - CAp b = Normal('b', mu=f_AB * AB3 + f_CB * CB4 + f_CA * CA42, tau=tau, value=b_obs, observed=True, trace=True) # print [x.value for x in [ABp,CBp,CAp]] # print b.logp return locals()
def multidimensional_model(): mu = -2.1 tau = 1.3 with Model() as model: x = Normal('x', mu, tau, shape=(3, 2), testval=.1 * np.ones((3, 2))) return model.test_point, model, (mu, tau**-1)
def grid_model(fname, tau=10000, sparse=True): data = loadmat('data/%s.mat' % fname) A = data['phi'] b_obs = data['f'] x_true = data['real_a'] block_sizes = data['block_sizes'] if sparse == True: alpha = 0.3 else: alpha = 1 # construct graphical model # ----------------------------------------------------------------------------- # construct sparse prior on all routes x_blocks = [Dirichlet('x%d' % i,np.array([alpha]*(x[0])),trace=True) for (i,x) in \ enumerate(block_sizes)] x_blocks_expanded = [[x[xi] for xi in range(i-1)] for (i,x) in \ zip(block_sizes,x_blocks)] [x.append(1 - sum(x)) for x in x_blocks_expanded] x_pri = list(chain(*x_blocks_expanded)) # construct skinny normal distributions with observations mus = [dot(a, x_pri) for a in A] b = [Normal('b%s' % i,mu=mu, tau=tau,value=b_obsi[0],observed=True) for \ (i,(mu,b_obsi)) in enumerate(zip(mus,b_obs))] return locals()
def model(prob): #observations obs = prob.get_observation_values() #priors variables = [] sig = Uniform('sig', 0.0, 100.0, value=1.) variables.append(sig) a1 = Uniform('a1', 0.0, 5.0) variables.append(a1) k1 = Uniform('k1', 0.01, 2.0) variables.append(k1) a2 = Uniform('a2', 0.0, 5.0) variables.append(a2) k2 = Uniform('k2', 0.01, 2.0) variables.append(k2) #model @deterministic() def response(pars=variables, prob=prob): values = [] for par in pars: values.append(par) values = array(values) prob.set_parameters(values) prob.forward() return prob.get_simvalues() #likelihood y = Normal('y', mu=response, tau=1.0 / sig**2, value=obs, observed=True) variables.append(y) return variables
def test_nested_tuple_container(self): A = Normal('A', 0, 1) try: Container(([A], )) raise AssertionError, 'A NotImplementedError should have resulted.' except NotImplementedError: pass
def create_model(AA, bb_obs, EQ, x_true, sparse=False): output = {} # change variable names # sparse = True alpha = 0.3 if sparse else 1 # construct graphical model # ----------------------------------------------------------------------------- import time with pm.Model() as model: ivar = 10000 START = time.time() # construct sparse prior on all routes # Dirichlet distribution doesn't give values that sum to 1 (continuous distribution), so # instead we normalize draws from a Gamma distribution # CAUTION x_pri is route splits x_pri = array( generate_route_flows_from_incidence_matrix(EQ, alpha=alpha)) # construct skinny normal distributions with observations #FIXME sparse dot product (i.e. Mscale.dot(x_pri)) gives error: # TypeError: no supported conversion for types: (dtype('float64'), dtype('O')) mus_bb = array(AA.todense().dot(x_pri)) bb = [Normal('b%s' % i, mu=mu, tau=ivar, observed=obsi) for \ (i,(mu,obsi)) in enumerate(zip(mus_bb,bb_obs))] print 'Time to build model: %ds' % (time.time() - START) return model, alpha, x_pri
def simple_model(): mu = -2.1 tau = 1.3 with Model() as model: x = Normal('x', mu, tau, shape=2, testval=[.1] * 2) return model.test_point, model, (mu, tau**-1)
def test_mixture_list_of_normals(self): with Model() as model: w = Dirichlet("w", floatX(np.ones_like(self.norm_w)), shape=self.norm_w.size) mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size) tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size) Mixture( "x_obs", w, [ Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1]) ], observed=self.norm_x, ) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1)
def simple_model(): mu = -2.1 tau = 1.3 with Model() as model: Normal("x", mu, tau=tau, size=2, initval=floatX_array([0.1, 0.1])) return model.compute_initial_point(), model, (mu, tau**-0.5)
def multidimensional_model(): mu = -2.1 tau = 1.3 with Model() as model: Normal("x", mu, tau=tau, size=(3, 2), initval=0.1 * np.ones((3, 2))) return model.compute_initial_point(), model, (mu, tau**-0.5)
def _create_proposal_random_variables(self, ): centers = self.proposal_center scales = self.proposal_scales random_variables = dict() for key in self._mcmc.params.keys(): variance = (centers[key] * scales[key])**2 random_variables[key] = Normal(key, centers[key], 1 / variance) return random_variables
def test_gradient_with_scaling(self): with pm.Model() as model1: genvar = generator(gen1()) m = Normal("m") Normal("n", observed=genvar, total_size=1000) grad1 = aesara.function([m.tag.value_var], at.grad(model1.logpt(), m.tag.value_var)) with pm.Model() as model2: m = Normal("m") shavar = aesara.shared(np.ones((1000, 100))) Normal("n", observed=shavar) grad2 = aesara.function([m.tag.value_var], at.grad(model2.logpt(), m.tag.value_var)) for i in range(10): shavar.set_value(np.ones((100, 100)) * i) g1 = grad1(1) g2 = grad2(1) np.testing.assert_almost_equal(g1, g2)
def createHistogramFitModel(data_bins, simulation_bins, scaleGuess): scale = Normal('scale', mu=scaleGuess, tau=sigToTau(.10 * scaleGuess)) scaled_sim = scale * simulation_bins baseline_observed = Poisson("baseline_observed", mu=scaled_sim, value=data_bins, observed=True) return locals()
def createSignalModelExponential(data): """ Toy model that treats the first ~10% of the waveform as an exponential. Does a good job of finding the start time (t_0) Since I made this as a toy, its super brittle. Waveform must be normalized """ print "Creating model" switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data)) noise_sigma = HalfNormal('noise_sigma', tau=sigToTau(.01)) exp_sigma = HalfNormal('exp_sigma', tau=sigToTau(.05)) #Modeling these parameters this way is why wf needs to be normalized exp_rate = Uniform('exp_rate', lower=0, upper=.1) exp_scale = Uniform('exp_scale', lower=0, upper=.1) timestamp = np.arange(0, len(data), dtype=np.float) @deterministic(plot=False, name="test") def uncertainty_model(s=switchpoint, n=noise_sigma, e=exp_sigma): ''' Concatenate Poisson means ''' out = np.empty(len(data)) out[:s] = n out[s:] = e return out @deterministic def tau(eps=uncertainty_model): return np.power(eps, -2) ## @deterministic(plot=False, name="test2") ## def adjusted_scale(s=switchpoint, s1=exp_scale): ## out = np.empty(len(data)) ## out[:s] = s1 ## out[s:] = s1 ## return out # # scale_param = adjusted_scale(switchpoint, exp_scale) @deterministic(plot=False) def baseline_model(s=switchpoint, r=exp_rate, scale=exp_scale): out = np.zeros(len(data)) out[s:] = scale * (np.exp(r * (timestamp[s:] - s)) - 1.) # plt.figure(fig.number) # plt.clf() # plt.plot(out ,color="blue" ) # plt.plot(data ,color="red" ) # value = raw_input(' --> Press q to quit, any other key to continue\n') return out baseline_observed = Normal("baseline_observed", mu=baseline_model, tau=tau, value=data, observed=True) return locals()
def createSignalModel(data): #set up your model parameters switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data)) early_sigma = HalfNormal('early_sigma', tau=sigToTau(1)) late_sigma = HalfNormal('late_sigma', tau=sigToTau(1)) early_mu = Normal('early_mu', mu=.5, tau=sigToTau(1)) late_mu = Normal('late_mu', mu=.5, tau=sigToTau(1)) #set up the model for uncertainty (ie, the noise) and the signal (ie, the step function) ############################ @deterministic(plot=False, name="test") def uncertainty_model(s=switchpoint, n=early_sigma, e=late_sigma): #Concatenate Uncertainty sigmas (or taus or whatever) around t0 s = np.around(s) out = np.empty(len(data)) out[:s] = n out[s:] = e return out ############################ @deterministic def tau(eps=uncertainty_model): #pymc uses this tau parameter instead of sigma to model a gaussian. its annoying. return np.power(eps, -2) ############################ @deterministic(plot=False, name="siggenmodel") def signal_model(s=switchpoint, e=early_mu, l=late_mu): #makes the step function using the means out = np.zeros(len(data)) out[:s] = e out[s:] = l return out ############################ #Full model: normally distributed noise around a step function baseline_observed = Normal("baseline_observed", mu=signal_model, tau=tau, value=data, observed=True) return locals()
def set_models(self): """Define models for each group. :return: None """ for group in ['control', 'variant']: self.stochastics[group] = Normal(group, self.stochastics[group + '_mean'], self.stochastics[group + '_sigma'], value=getattr(self, group), observed=True)
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ obs = np.concatenate((self.control, self.variant)) obs_mean, obs_sigma = np.mean(obs), np.std(obs) for group in ['control', 'variant']: self.stochastics[group + '_mean'] = Normal(group + '_mean', obs_mean, 0.000001 / obs_sigma ** 2) self.stochastics[group + '_sigma'] = Uniform(group + '_sigma', obs_sigma / 1000, obs_sigma * 1000)
def test_multidim_scaling(self): with pm.Model() as model0: Normal("n", observed=[[1, 1], [1, 1]], total_size=[]) p0 = aesara.function([], model0.logp()) with pm.Model() as model1: Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p1 = aesara.function([], model1.logp()) with pm.Model() as model2: Normal("n", observed=[[1], [1]], total_size=[2, 2]) p2 = aesara.function([], model2.logp()) with pm.Model() as model3: Normal("n", observed=[[1, 1]], total_size=[2, 2]) p3 = aesara.function([], model3.logp()) with pm.Model() as model4: Normal("n", observed=[[1]], total_size=[2, 2]) p4 = aesara.function([], model4.logp()) with pm.Model() as model5: Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2]) p5 = aesara.function([], model5.logp()) _p0 = p0() assert (np.allclose(_p0, p1()) and np.allclose(_p0, p2()) and np.allclose(_p0, p3()) and np.allclose(_p0, p4()) and np.allclose(_p0, p5()))
def _create_proposal_random_variables(self, ): centers = self.proposal_center scales = self.proposal_scales random_variables = dict() params = self._mcmc.params.keys() if 'std_dev' in self._mcmc.pymc_mod_order: params.append('std_dev') for key in params: variance = (scales[key])**2 random_variables[key] = Normal(key, centers[key], 1 / variance) return random_variables
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ obs = np.concatenate((self.control, self.variant)) mean, sigma, med = np.mean(obs), np.std(obs), np.median(obs) location = np.log(med) scale = np.sqrt(2 * np.log(mean / med)) for group in ['control', 'variant']: self.stochastics[group + '_location'] = Normal(group + '_location', location, 0.000001 / sigma ** 2) self.stochastics[group + '_scale'] = Uniform(group + '_scale', scale / 1000, scale * 1000)
def test_non_missing(self): """ Test to ensure that masks without any missing values are not imputed. """ fake_data = rnormal(0, 1, size=10) m = ma.masked_array(fake_data, fake_data == -999) # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s**-2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(20000, 19000, progress_bar=0) # Ensure likelihood does not have a trace assert_raises(AttributeError, x.__getattribute__, 'trace')