def group_model(self): with pm.Model() as gmodel: # uniform priors on h m = pm.DiscreteUniform('h', 0., 20.) std = pm.InverseGamma('s', 3., 0.5) mean = 2 * m + 1 alphas = np.arange(1., 101., 5.) p = self.discreteNormal(alphas, mean, std) for i in range(self.nruns): hab_ten = pm.Categorical('h_{}'.format(i), p) alpha = tt.as_tensor_variable([hab_ten]) probs_a, probs_r = self.inferrer(alpha) # use a DensityDist pm.Categorical('actions_{}'.format(i), probs_a, observed=self.actions[i]) pm.Categorical('rewards_{}'.format(i), probs_r, observed=self.rewards[i]) return gmodel
def model_static(mcmc_in, alpha_prior=(1,1), beta_prior=(0,1), asymmetric_accuracy=True, hashtag_treatment="strong", draws=500, tune=500): ''' alpha prior = (mu,sd) beta prior = (mu,sd) if z_obs is present then oracle ''' model = pm.Model() with model: if hashtag_treatment=="strong": rho_prior = np.ones((2,2)) rho_prior[1,1] = 49 rho = pm.Dirichlet('rho', a=rho_prior, shape=(mcmc_in.K,2,2)) z = pm.Categorical('z', p=rho[mcmc_in.kk_lkup, mcmc_in.flag_hashtag], observed=np.ma.masked_values(mcmc_in.z_obs, value=-999), testval=mcmc_in.z_init, shape=mcmc_in.N) if hashtag_treatment=="weak": rho_prior = np.ones((2,2)) rho = pm.Dirichlet('rho', a=rho_prior, shape=(mcmc_in.K,2,2)) z = pm.Categorical('z', p=rho[mcmc_in.kk_lkup, mcmc_in.flag_hashtag], observed=np.ma.masked_values(mcmc_in.z_obs, value=-999), testval=mcmc_in.z_init, shape=mcmc_in.N) elif hashtag_treatment=="oracle" or hashtag_treatment=="none": rho_prior = np.ones((1,2)) rho = pm.Dirichlet('rho', a=rho_prior, shape=(mcmc_in.K,2)) z = pm.Categorical('z', p=rho[mcmc_in.kk_lkup], observed=np.ma.masked_values(mcmc_in.z_obs, value=-999), testval=mcmc_in.z_init, shape=mcmc_in.N) beta_prime = pm.Normal('beta_prime', mu=beta_prior[0], sd=beta_prior[1], shape=mcmc_in.K) if asymmetric_accuracy==True: alpha = pm.Normal('alpha', mu=alpha_prior[0], sd=alpha_prior[1], shape=(mcmc_in.J,2)) def logp(r, z=z, alpha=alpha, beta_prime=beta_prime): out = T.switch(T.eq(z[mcmc_in.ii],r), -1*T.log(1+T.exp(-1*alpha[mcmc_in.jj,z[mcmc_in.ii]]*T.exp(beta_prime[mcmc_in.kk]))), -1*alpha[mcmc_in.jj,z[mcmc_in.ii]]*T.exp(beta_prime[mcmc_in.kk]) - 1*T.log(1+T.exp(-1*alpha[mcmc_in.jj,z[mcmc_in.ii]]*T.exp(beta_prime[mcmc_in.kk]))) ) return T.sum(out) else: alpha = pm.Normal('alpha', mu=alpha_prior[0], sd=alpha_prior[1], shape=mcmc_in.J) def logp(r, z=z, alpha=alpha, beta_prime=beta_prime): out = T.switch(T.eq(z[mcmc_in.ii],r), -1*T.log(1+T.exp(-1*alpha[mcmc_in.jj]*T.exp(beta_prime[mcmc_in.kk]))), -1*alpha[mcmc_in.jj]*T.exp(beta_prime[mcmc_in.kk]) - 1*T.log(1+T.exp(-1*alpha[mcmc_in.jj]*T.exp(beta_prime[mcmc_in.kk]))) ) return T.sum(out) r = pm.DensityDist('r', logp, observed=mcmc_in.r_obs, shape=len(mcmc_in.r_obs)) with model: trace = pm.sample(draws=draws, tune=tune, chains=1) return trace
def model_dawidskene(mcmc_in, alpha_prior, asymmetric_accuracy=True, hashtag_treatment="strong", draws=500, tune=500): ''' alpha prior = (K,J,2) matrix of pseudocounts for dirichlet if z_obs is present then oracle ''' model = pm.Model() with model: if hashtag_treatment=="strong": rho_prior = np.ones((2,2)) rho_prior[1,1] = 49 rho = pm.Dirichlet('rho', a=rho_prior, shape=(mcmc_in.K,2,2)) z = pm.Categorical('z', p=rho[mcmc_in.kk_lkup, mcmc_in.flag_hashtag], observed=np.ma.masked_values(mcmc_in.z_obs, value=-999), testval=mcmc_in.z_init, shape=mcmc_in.N) elif hashtag_treatment=="weak": rho_prior = np.ones((2,2)) rho = pm.Dirichlet('rho', a=rho_prior, shape=(mcmc_in.K,2,2)) z = pm.Categorical('z', p=rho[mcmc_in.kk_lkup, mcmc_in.flag_hashtag], observed=np.ma.masked_values(mcmc_in.z_obs, value=-999), testval=mcmc_in.z_init, shape=mcmc_in.N) elif hashtag_treatment=="oracle" or hashtag_treatment=="none": rho_prior = np.ones((1,2)) rho = pm.Dirichlet('rho', a=rho_prior, shape=(mcmc_in.K,2)) z = pm.Categorical('z', p=rho[mcmc_in.kk_lkup], observed=np.ma.masked_values(mcmc_in.z_obs, value=-999), testval=mcmc_in.z_init, shape=mcmc_in.N) if asymmetric_accuracy==True: alpha = pm.Dirichlet("alpha", a=alpha_prior, shape=(2,mcmc_in.K,mcmc_in.J,2)) def logp(r, z=z, alpha=alpha): out = T.switch(T.eq(z[mcmc_in.ii],r), T.log(alpha[z[mcmc_in.ii],mcmc_in.kk,mcmc_in.jj,1]), T.log(1-alpha[z[mcmc_in.ii],mcmc_in.kk,mcmc_in.jj,1]) ) return T.sum(out) else: alpha = pm.Dirichlet("alpha", a=alpha_prior, shape=(mcmc_in.K,mcmc_in.J,2)) def logp(r, z=z, alpha=alpha): out = T.switch(T.eq(z[mcmc_in.ii],r), T.log(alpha[mcmc_in.kk,mcmc_in.jj,1]), T.log(1-alpha[mcmc_in.kk,mcmc_in.jj,1]) ) return T.sum(out) r = pm.DensityDist('r', logp, observed=mcmc_in.r_obs, shape=len(mcmc_in.r_obs)) with model: step1 = pm.NUTS(vars=[rho, alpha]) step2 = pm.CategoricalGibbsMetropolis(vars=[z.missing_values]) trace = pm.sample(draws=draws, tune=tune, step=[step1, step2], chains=1) return trace
def pymc3_dist(self, name, hypers): p = self.p if(len(hypers) == 1): hyper_dist = hypers[0][0] hyper_name = hypers[0][1] p = hyper_dist.pymc3_dist(hyper_name, []) if(self.num_elements==-1): return pm.Categorical(name, p=p) else: return pm.Categorical(name, p=p, shape=self.num_elements)
def test_2d_w(self): nd = self.nd npop = self.npop mus = self.mus size = 100 with pm.Model() as model: m = pm.NormalMixture( "m", w=np.ones((nd, npop)) / npop, mu=mus, sigma=1e-5, comp_shape=(nd, npop), shape=nd, ) z = pm.Categorical("z", p=np.ones(npop) / npop, shape=nd) mu = tt.as_tensor_variable([mus[i, z[i]] for i in range(nd)]) latent_m = pm.Normal("latent_m", mu=mu, sigma=1e-5, shape=nd) m_val = m.random(size=size) latent_m_val = latent_m.random(size=size) assert m_val.shape == latent_m_val.shape # Test that each element in axis = -1 can come from independent # components assert not all(np.all(np.diff(m_val) < 1e-3, axis=-1)) assert not all(np.all(np.diff(latent_m_val) < 1e-3, axis=-1)) self.samples_from_same_distribution(m_val, latent_m_val) self.logp_matches(m, latent_m, z, npop, model=model)
def build_ann_conv(init): network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.tanh, W=init) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.tanh, W=init) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network = lasagne.layers.DenseLayer( network, num_units=256, nonlinearity=lasagne.nonlinearities.tanh, b=init, W=init) network = lasagne.layers.DenseLayer( network, num_units=10, nonlinearity=lasagne.nonlinearities.softmax, b=init, W=init) prediction = lasagne.layers.get_output(network) return pm.Categorical('out', prediction, observed=target_var)
def exercise4(): with pm.Model() as basic_model: probabilities = [0.3, 0.7, 0.95] likelihood_params = np.array( [np.divide(1, 3) * (1 + 2 * prob) for prob in probabilities]) group = pm.Categorical('group', p=np.array([1, 1, 1])) p = pm.Deterministic('p', theano.shared(likelihood_params)[group]) positive_answers = pm.Binomial('positive_answers', n=num_questions, p=p, observed=[7]) trace = pm.sample(4000, progressbar=True) az.plot_trace(trace) plt.show() az.plot_posterior(trace) plt.show() az.summary(trace) return trace
def sample_sticky_only(model_matrix, sample_kwargs=None): # load the data x_sc = model_matrix['x_sc'] subj_idx = model_matrix['subj_idx'] y = model_matrix['y'] n_subj = model_matrix['n_subj'] n, d = model_matrix['x_mu_kal'].shape if sample_kwargs is None: sample_kwargs = dict(draws=2000, njobs=2, tune=2000, init='advi+adapt_diag') with pm.Model() as hier_sticky: mu_1 = pm.Normal('mu_beta_stick', mu=0., sd=100.) sigma_1 = pm.HalfCauchy('sigma_stick', beta=100) b_1 = pm.Normal('beta_sticky', mu=mu_1, sd=sigma_1, shape=n_subj) rho = tt.tile(tt.reshape(b_1[subj_idx], (n, 1)), d) * x_sc p_hat = softmax(rho) # Data likelihood yl = pm.Categorical('yl', p=p_hat, observed=y) # inference! trace_kal_scram = pm.sample(**sample_kwargs) return hier_sticky, trace_kal_scram
def build_softmax_linear(X, y, force_softmax=False): """ Sample from Bayesian Softmax Linear Regression """ num_features = X.shape[1] num_classes = len(np.unique(y)) logistic_regression = num_classes == 2 Xt = theano.shared(X) if logistic_regression and not force_softmax: print('running logistic regression') with pm.Model() as model: W = pm.Normal('W', 0, sd=1e6, shape=num_features) b = pm.Flat('b') logit = Xt.dot(W) + b p = tt.nnet.sigmoid(logit) observed = pm.Bernoulli('obs', p=p, observed=y) else: with pm.Model() as model: W = pm.Normal('W', 0, sd=1e6, shape=(num_features, num_classes)) b = pm.Flat('b', shape=num_classes) logit = Xt.dot(W) + b p = tt.nnet.softmax(logit) observed = pm.Categorical('obs', p=p, observed=y) return model
def get_dirichlet_multinomial_dpmixture(X, params): n_doc, n_feat = X.shape n_comp = params['n_trunc'] with pm.Model() as model: # sample P ~ DP(G0) beta = pm.Beta('beta', 1., params['dp_alpha'], shape=n_comp) p_comp = pm.Deterministic( 'p_comp', beta * tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])) pkw = pm.Dirichlet('pkw', a=params['pkw_dirichlet_dist_alpha'] * np.ones(n_feat), shape=(n_comp, n_feat)) # sample X ~ P z = pm.Categorical('z', p=p_comp, shape=n_doc) x = pm.Multinomial('x', n=X.sum(axis=1), p=pkw[z], observed=X) return model
def add_observations(): with hierarchical_model.pymc_model: for i in range(hierarchical_model.n_groups): observations.append( pm.Categorical(f'y_{i}', levelProbs[:, i], observed=hierarchical_model.y[i]))
def _define_model(self): self.model = pm.Model() with self.model: p = pm.Dirichlet('p', a=np.array([1., 1., 1.]), shape=self.number_of_hidden_states) p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0)) means = pm.Normal('means', mu=[0, 0, 0], sd=2.0, shape=self.number_of_hidden_states) # break symmetry order_means_potential = pm.Potential('order_means_potential', tt.switch(means[1] - means[0] < 0, -np.inf, 0) + tt.switch(means[2] - means[1] < 0, -np.inf, 0)) sd = pm.HalfCauchy('sd', beta=2, shape=self.number_of_hidden_states) category = pm.Categorical('category', p=p, shape=self.number_of_data) points = pm.Normal('obs', mu=means[category], sd=sd[category], observed=self.data)
def group_static_ucb_mes_model(X, explore_param_alpha=.01, explore_param_beta=.01, temperature_alpha=1., temperature_beta=10., method_alpha=.001, maxk=10, samples=200): X = X.copy().transpose((2,1,3,0)) nparticipants = X.shape[3] nchoices = X.shape[2] ntrials = X.shape[1] actions = theano.shared(X[-1]) mean = theano.shared(X[0]) var = theano.shared(X[1]) mes = theano.shared(X[3]) #mes = theano.shared(X[4]) random_likelihood = theano.shared((1./nchoices)*np.ones(shape=(ntrials, maxk, nchoices, nparticipants))) with pm.Model() as model: explore_param = pm.Gamma('var_param', explore_param_alpha, explore_param_beta, shape=maxk) temperature = pm.Gamma('temperature', temperature_alpha, temperature_beta, shape=maxk) method = pm.Dirichlet('method', np.ones(3)*method_alpha, shape=(maxk,3)) alpha = pm.Gamma('alpha', 10**-10., 10**-10.) beta = pm.Beta('beta', 1., alpha, shape=maxk) weights = pm.Deterministic('w', stick_breaking(beta)) assignments = pm.Categorical('assignments', weights, shape=nparticipants) obs = pm.Potential('obs', sparse_group_static_ucb_mes_likelihood(actions, mean, var, mes, random_likelihood, method, explore_param, temperature, assignments, maxk)) trace = pm.sample(samples, njobs=4) return trace
def test_logistic_regression(): data_X = np.array([[0, 0], [1, 0], [0, 1], [1, 1]]) data_y = np.array([0, 1, 0, 1]) with pymc3.Model() as model: X = pymc3.Normal(name='X', mu=1, sd=2, observed=data_X) alpha1_precision = pymc3.Uniform(name='alpha1_precision') alpha1 = pymc3.Normal(name='alpha1', mu=0, sd=1.0 / alpha1_precision) alpha2_precision = pymc3.Uniform(name='alpha2_precision') alpha2 = pymc3.Normal(name='alpha2', mu=0, sd=1.0 / alpha2_precision) beta1_precision = pymc3.Uniform(name='beta1_precision') beta2_precision = pymc3.Uniform(name='beta2_precision') beta1 = pymc3.Normal(name='beta1', mu=0.0, sd=1.0 / beta1_precision, shape=2) beta2 = pymc3.Normal(name='beta2', mu=0.0, sd=1.0 / beta2_precision, shape=2) v1 = alpha1 + beta1.dot(X.T) v2 = alpha2 + beta2.dot(X.T) denom = T.exp(v1) + T.exp(v2) sm1 = T.exp(v1) / denom sm2 = T.exp(v2) / denom v = T.stack(sm1, sm2) y = pymc3.Categorical(p=v, name='y', observed=data_y, shape=len(data_y)) step = pymc3.Metropolis() trace = pymc3.sample(1000, step) pass
def run(): data = sim() n_rooms = len(data) prior = [0.5, 0.25, 0.125, 0.0625, 0.03125] prior = [0.25, 0., 0.25, 0.25, 0.5] #p_find=np.array([0.25,0.25,0.25,0.25,0.25]) #p_find=tt.cast([0.25,0.25,0.25,0.25,0.25],'int64') p_find = theano.shared(np.array([0.25, 0.25, 0.25, 0.25, 0.25])) tmp_find = theano.shared(np.array([0.25, 0.25, 0.25, 0.25, 0.25])) datas = theano.shared(data) print(data) with pm.Model() as model: target_loc = pm.Categorical('target_loc', p=prior) #Likelihood #room_search=pm.DiscreteUniform('room_search',1,n_rooms) #p = pm.math.switch(theano.tensor.eq(room_search,target_loc),p_find[target_loc], 0) tmp_find = tmp_find * 0 tmp_find = tt.set_subtensor(tmp_find[target_loc], p_find[target_loc]) #theano.printing.Print('tmp_find')(tmp_find) y = pm.Binomial('y', p=tmp_find, n=[5, 1., 1., 10., 10.], observed=[0, 0, 0, 0, 0]) trace = pm.sample(5000, cores=4) pm.plots.traceplot(trace, combined=True) #pm.traceplot(trace, # combined=True, # prior=[target_loc.distribution]); plt.show() print(pm.summary(trace))
def test_1d_w(self): nd = self.nd npop = self.npop mus = self.mus size = 100 with pm.Model() as model: m = pm.NormalMixture("m", w=np.ones(npop) / npop, mu=mus, sigma=1e-5, comp_shape=(nd, npop), shape=nd) z = pm.Categorical("z", p=np.ones(npop) / npop) latent_m = pm.Normal("latent_m", mu=mus[..., z], sigma=1e-5, shape=nd) m_val = m.random(size=size) latent_m_val = latent_m.random(size=size) assert m_val.shape == latent_m_val.shape # Test that each element in axis = -1 comes from the same mixture # component assert all(np.all(np.diff(m_val) < 1e-3, axis=-1)) assert all(np.all(np.diff(latent_m_val) < 1e-3, axis=-1)) self.samples_from_same_distribution(m_val, latent_m_val) self.logp_matches(m, latent_m, z, npop, model=model)
def get_beta_bernoulli_dpmixture(X, params): n_doc, n_feat = X.shape n_comp = params['n_trunc'] with pm.Model() as model: # sample P ~ DP(G0) beta = pm.Beta('beta', 1., params['dp_alpha'], shape=n_comp) p_comp = pm.Deterministic( 'p_comp', beta * tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])) pkw = pm.Beta('pkw', alpha=params['pkw_beta_dist_alpha'], beta=params['pkw_beta_dist_beta'], shape=(n_comp, n_feat)) # sample X ~ P z = pm.Categorical('z', p=p_comp, shape=n_doc) x = pm.Bernoulli('x', p=pkw[z], shape=(n_doc, n_feat), observed=X) return model
def Generate_theta_p(x, s, I, N, K, prod_f): #print(s,N,len(x)) T = len(x) D = len(x[0]) s = [one_hot(ss, N) for ss in s] # x = [[xt for _ in range(N)] for xt in x] x = np.array(x) s = np.array(s) model = pm.Model() with model: # Priors for unknown model parameters theta = pm.Normal("theta", mu=0, sigma=1, shape=(D, K)) / np.sqrt( K * D) p_list = [] for t in range(T): #print(prod_f) #print(np.transpose(theta)) wt = dot(dot(prod_f, transpose(theta)), x[t]) swt = s[t] * wt sum_sw = sum(swt) p = exp(swt) / (1 + sum_sw) p0 = 1 / (1 + sum_sw) p_list.append(concatenate(([p0], p))) I_obs = pm.Categorical("I_obs", p=stack(p_list, axis=0), observed=I) with model: step = pm.Metropolis() trace1 = pm.sample(tune=2000, chains=1, step=step) return trace1["theta"][-1]
def fit(self, x, y, niters=500, **kwargs): """ Train model """ self.classes = np.sort(np.unique(y)) F = x.shape[1] K = np.unique(y).shape[0] with pm.Model() as linear_model: # Creating the model beta = pm.Normal('beta', mu=0, sd=10, shape=(F, K)) alpha = pm.Normal('alpha', mu=0, sd=10, shape=K) mu = tt.dot(x, beta) + alpha p = pm.Deterministic('p', tt.nnet.softmax(mu)) yl = pm.Categorical('yl', p=p, observed=y) with linear_model: # trace = pm.sample(niters, njobs=1, chains=1, verbose=False) # No U-turn sampler trace = pm.sample(step=pm.Metropolis(), draws=50000, njobs=1, tune=50) self.params['beta'] = trace['beta'][-niters:].copy( ) # storing last 500 samples from sampler self.params['alpha'] = trace['alpha'][-niters:].copy()
def build_fc_nn(X, y, output='regression', hidden_dims=[NUM_HIDDEN]): """ Build basic fully connected Bayesian neural network Args: X: data matrix y: targets output: one of SUPPORTED_OUTPUTS to specify the kind of outputs hidden_dims: integer list indicating the size of the hidden layers Returns: PyMC3 Bayesian neural network model """ if output not in SUPPORTED_OUTPUTS: raise ValueError( 'Unsupported neural network output: {}\nSupported outputs: {}'. format(output, SUPPORTED_OUTPUTS)) if 'regression' == output: num_output_units = 1 elif 'classification' == output: num_output_units = len(np.unique(y)) floatX = theano.config.floatX Xt = theano.shared(X) num_features = X.shape[1] layer_dims = [num_features] + hidden_dims # initialize weights (switch to Xavier initiallization?) Ws = [] for i in range(len(layer_dims) - 1): in_dim = layer_dims[i] out_dim = layer_dims[i + 1] Ws.append(np.random.randn(in_dim, out_dim).astype(floatX)) with pm.Model() as model_nn: for i in range(len(Ws)): # priors W_i = pm.Normal('W' + str(i), 0, sd=WEIGHT_SD, shape=Ws[i].shape, testval=Ws[i]) b_i = pm.Flat('b' + str(i), shape=Ws[i].shape[1]) # deterministic transformations in_layer = a_i if i > 0 else Xt z_i = in_layer.dot(W_i) + b_i a_i = pm.math.tanh(z_i) # format output and plug in data # uses pre-activation of last layer if 'regression' == output: observed = pm.Normal('obs', mu=z_i, sd=LIKELIHOOD_SD, observed=y) elif 'classification' == output: p = tt.nnet.softmax(z_i) observed = pm.Categorical('obs', p=p, observed=y) return model_nn
def bms(L, hdi_prob=0.95, **sample_kwargs): """This function computes the exceedance probabilities (xp) and expected relative frequencies (r) from an array of log-evidences. Args: L (numpy.ndarray): Array of model log-evidences (higher is better fit). Array shape should be (K models; N subjects) **sample_kwargs: Additional arguments to the pymc.sample function. Currently `cores=1` seems to be necessary. Returns: dict: Dictionary with values xp and r. Reference: Stephan, K. E., Penny, W. D., Daunizeau, J., Moran, R. J., & Friston, K. J. (2009). Bayesian model selection for group studies. Neuroimage, 46(4), 1004-1017. """ K, N = L.shape with pm.Model() as bms: def lookup_L(L, N): """This function looks up the log-evidences for all N subjects, given the current model labels m. """ return L[tt.cast(m, dtype="int32"), tt.cast(tt.arange(N), dtype="int32")] # Priors alpha = pm.Uniform("alpha", 0, N, shape=K, testval=np.ones(K)) # Model r = pm.Dirichlet("r", a=alpha, testval=np.ones(K) / K) m = pm.Categorical("m", p=r, shape=N, testval=0) # Look up log evidence ll = pm.DensityDist("ll", logp=lookup_L, observed=dict(L=L, N=N)) # Sample inferencedata = pm.sample(return_inferencedata=True, **sample_kwargs) # Build results result = {} result["summary"] = az.summary(inferencedata, hdi_prob=hdi_prob, var_names=["alpha", "r"]) result["xp"] = np.array([ np.mean(inferencedata.posterior["r"].data[:, :, k] == inferencedata.posterior["r"].data.max(axis=-1)) for k in range(K) ]) r_unscaled = np.array([ np.mean(inferencedata.posterior["r"].data[:, :, k]) for k in range(K) ]) result["r"] = r_unscaled / r_unscaled.sum() return result
def test_FFBSStep(): with pm.Model(), pytest.raises(ValueError): P_rv = np.eye(2)[None, ...] S_rv = DiscreteMarkovChain("S_t", P_rv, np.r_[1.0, 0.0], shape=10) S_2_rv = DiscreteMarkovChain("S_2_t", P_rv, np.r_[0.0, 1.0], shape=10) PoissonZeroProcess("Y_t", 9.0, S_rv + S_2_rv, observed=np.random.poisson(9.0, size=10)) # Only one variable can be sampled by this step method ffbs = FFBSStep([S_rv, S_2_rv]) with pm.Model(), pytest.raises(TypeError): S_rv = pm.Categorical("S_t", np.r_[1.0, 0.0], shape=10) PoissonZeroProcess("Y_t", 9.0, S_rv, observed=np.random.poisson(9.0, size=10)) # Only `DiscreteMarkovChains` can be sampled with this step method ffbs = FFBSStep([S_rv]) with pm.Model(), pytest.raises(TypeError): P_rv = np.eye(2)[None, ...] S_rv = DiscreteMarkovChain("S_t", P_rv, np.r_[1.0, 0.0], shape=10) pm.Poisson("Y_t", S_rv, observed=np.random.poisson(9.0, size=10)) # Only `SwitchingProcess`es can used as dependent variables ffbs = FFBSStep([S_rv]) np.random.seed(2032) poiszero_sim, _ = simulate_poiszero_hmm(30, 150) y_test = poiszero_sim["Y_t"] with pm.Model() as test_model: p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) P_tt = at.stack([p_0_rv, p_1_rv]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) pi_0_tt = compute_steady_state(P_rv) S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=y_test.shape[0]) PoissonZeroProcess("Y_t", 9.0, S_rv, observed=y_test) with test_model: ffbs = FFBSStep([S_rv]) test_point = test_model.test_point.copy() test_point["p_0_stickbreaking__"] = poiszero_sim["p_0_stickbreaking__"] test_point["p_1_stickbreaking__"] = poiszero_sim["p_1_stickbreaking__"] res = ffbs.step(test_point) assert np.array_equal(res["S_t"], poiszero_sim["S_t"])
def test_pymc3_convert_dists(): """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs.""" tt.config.compute_test_value = "ignore" theano.config.cxx = "" with pm.Model() as model: norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0) mvnorm_rv = pm.MvNormal("mvnorm_rv", np.r_[0.0], np.c_[1.0], shape=1, observed=np.r_[1.0]) cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0) halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0) uniform_rv = pm.Uniform("uniform_rv", observed=1.0) gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0) invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0) exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0) halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0) beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0) binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5) dirichlet_rv = pm.Dirichlet("dirichlet_rv", np.r_[0.1, 0.1], observed=np.r_[0.1, 0.1]) poisson_rv = pm.Poisson("poisson_rv", 10, observed=5) bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0) betabinomial_rv = pm.BetaBinomial("betabinomial_rv", 0.1, 0.1, 10, observed=5) categorical_rv = pm.Categorical("categorical_rv", np.r_[0.5, 0.5], observed=1) multinomial_rv = pm.Multinomial("multinomial_rv", 5, np.r_[0.5, 0.5], observed=np.r_[2]) # Convert to a Theano `FunctionGraph` fgraph = model_graph(model) rvs_by_name = { n.owner.inputs[1].name: n.owner.inputs[1] for n in fgraph.outputs } pymc_rv_names = {n.name for n in model.observed_RVs} assert all( isinstance(rvs_by_name[n].owner.op, RandomVariable) for n in pymc_rv_names) # Now, convert back to a PyMC3 model pymc_model = graph_model(fgraph) new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs} pymc_rv_names == new_pymc_rv_names
def _create_model(self): with pm.Model() as self.model: # getting the location primers for layer_index in range(self.num_layers): setattr(self, 'w%d' % layer_index, self.__get_weights(layer_index, self.weight_shapes[layer_index])) setattr(self, 'b%d' % layer_index, self.__get_biases(layer_index, self.bias_shapes[layer_index])) if layer_index == 0: fc = pm.Deterministic('fc%d' % layer_index, pm.math.tanh(pm.math.dot(self.network_input, self.weight(layer_index)) + self.bias(layer_index))) setattr(self, 'fc%d' % layer_index, fc) elif 0 < layer_index < self.num_layers - 1: fc = pm.Deterministic('fc%d' % layer_index, pm.math.tanh(pm.math.dot(getattr(self, 'fc%d' % (layer_index - 1)), self.weight(layer_index)) + self.bias(layer_index))) setattr(self, 'fc%d' % layer_index, fc) else: self._loc = pm.Deterministic('bnn_out', pm.math.sigmoid(pm.math.dot(getattr(self, 'fc%d' % (layer_index - 1)), self.weight(layer_index)) + self.bias(layer_index)) ) # getting the precision / standard deviation / variance self.tau_rescaling = np.zeros((self.num_obs, self.network_input.shape[1])) for obs_index in range(self.num_obs): self.tau_rescaling[obs_index] += self.var_e_ranges self.tau_rescaling = self.tau_rescaling**2 tau = pm.Gamma('tau', self.num_obs**2, 1., shape = (self.num_obs, self.network_input.shape[1])) self.tau = tau / self.tau_rescaling self.scale = pm.Deterministic('scale', 1. / pm.math.sqrt(self.tau)) # learn the floats self.loc = pm.Deterministic('loc', (self.upper_rescalings - self.lower_rescalings) * self._loc + self.lower_rescalings) self.out_floats = pm.Normal('out_floats', self.loc[:, self.floats], tau = self.tau[:, self.floats], observed = self.network_output[:, self._floats]) # learn the integers self.int_scale = pm.Deterministic('int_scale', 1. * self.scale) self.out_ints = DiscreteLaplace('out_ints', loc = self.loc[:, self.ints], scale = self.int_scale[:, self.ints], observed = self.network_output[:, self._ints]) # learn the categories dist_counter, cat_var_index = 0, 0 self.alpha = pm.Deterministic('alpha', (self.loc + 1.) * self.scale) self.num_cats = 0 for var_e_index, var_e_type in enumerate(self.var_e_types): if var_e_type == 'categorical' and self.var_e_begin[var_e_index] == var_e_index: begin, end = self.var_e_begin[var_e_index], self.var_e_end[var_e_index] var_e_name = self.var_e_names[var_e_index] param_index = np.argwhere(self.var_p_names == var_e_name)[0, 0] self.param_index = param_index out_dirichlet = pm.Dirichlet('dirich_%d' % dist_counter, a = self.alpha[:, begin : end], shape = (self.num_obs, int(end - begin)) ) out_cats = pm.Categorical('out_cats_%d' % dist_counter, p = out_dirichlet, observed = self.network_output[:, param_index]) self.num_cats += 1 dist_counter += 1
def sample_heir_rbf_kal(model_matrix, sample_kwargs=None): # load the data x_mu_rbf = model_matrix['x_mu_rbf'] x_sd_rbf = model_matrix['x_sd_rbf'] x_mu_kal = model_matrix['x_mu_kal'] x_sd_kal = model_matrix['x_sd_kal'] x_sc = model_matrix['x_sc'] subj_idx = model_matrix['subj_idx'] y = model_matrix['y'] n_subj = model_matrix['n_subj'] n, d = x_mu_rbf.shape if sample_kwargs is None: sample_kwargs = dict(draws=2000, njobs=2, tune=2000, init='advi+adapt_diag') with pm.Model() as hier_rbf_kal: mu_1 = pm.Normal('mu_beta_rbf_mean', mu=0., sd=100.) mu_2 = pm.Normal('mu_beta_rbf_stdv', mu=0., sd=100.) mu_3 = pm.Normal('mu_beta_kal_mean', mu=0., sd=100.) mu_4 = pm.Normal('mu_beta_kal_stdv', mu=0., sd=100.) mu_5 = pm.Normal('mu_beta_stick', mu=0., sd=100.) sigma_1 = pm.HalfCauchy('sigma_rbf_means', beta=100) sigma_2 = pm.HalfCauchy('sigma_rbf_stdev', beta=100) sigma_3 = pm.HalfCauchy('sigma_kal_means', beta=100) sigma_4 = pm.HalfCauchy('sigma_kal_stdev', beta=100) sigma_5 = pm.HalfCauchy('sigma_stick', beta=100) b_1 = pm.Normal('beta_rbf_mu', mu=mu_1, sd=sigma_1, shape=n_subj) b_2 = pm.Normal('beta_rbf_std', mu=mu_2, sd=sigma_2, shape=n_subj) b_3 = pm.Normal('beta_kal_mu', mu=mu_3, sd=sigma_3, shape=n_subj) b_4 = pm.Normal('beta_kal_std', mu=mu_4, sd=sigma_4, shape=n_subj) b_5 = pm.Normal('beta_sc', mu=mu_5, sd=sigma_5, shape=n_subj) rho = \ tt.tile(tt.reshape(b_1[subj_idx], (n, 1)), d) * x_mu_rbf + \ tt.tile(tt.reshape(b_2[subj_idx], (n, 1)), d) * x_sd_rbf + \ tt.tile(tt.reshape(b_3[subj_idx], (n, 1)), d) * x_mu_kal + \ tt.tile(tt.reshape(b_4[subj_idx], (n, 1)), d) * x_sd_kal + \ tt.tile(tt.reshape(b_5[subj_idx], (n, 1)), d) * x_sc p_hat = softmax(rho) # Data likelihood yl = pm.Categorical('yl', p=p_hat, observed=y) # inference! trace_gprbf_kal = pm.sample(**sample_kwargs) return hier_rbf_kal, trace_gprbf_kal
def test_discrete_not_allowed(): mu_true = np.array([-2, 0, 2]) z_true = np.random.randint(len(mu_true), size=100) y = np.random.normal(mu_true[z_true], np.ones_like(z_true)) with pm.Model(): mu = pm.Normal('mu', mu=0, sigma=10, shape=3) z = pm.Categorical('z', p=tt.ones(3) / 3, shape=len(y)) pm.Normal('y_obs', mu=mu[z], sigma=1., observed=y) with pytest.raises(opvi.ParametrizationError): pm.fit(n=1) # fails
def build_model(self, n=None, name='archimedian_model'): with pm.Model(name=name) as self.model: if n is None: # one n per galaxy, or per arm? self.n_choice = pm.Categorical('n_choice', [1, 1, 0, 1, 1], testval=1, shape=len(self.galaxies)) self.n = pm.Deterministic('n', self.n_choice - 2) self.chirality_correction = tt.switch(self.n < 0, -1, 1) else: msg = 'Parameter $n$ must be a nonzero float' try: n = float(n) except ValueError: pass finally: assert isinstance(n, float) and n != 0, msg self.n_choice = None self.n = pm.Deterministic('n', np.repeat(n, len(self.galaxies))) self.chirality_correction = tt.switch(self.n < 0, -1, 1) self.a = pm.HalfCauchy('a', beta=1, testval=1, shape=self.n_arms) self.psi = pm.Normal( 'psi', mu=0, sigma=1, testval=0.1, shape=self.n_arms, ) self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5) # Unfortunately, as we need to reverse the theta points for arms # with n < 1, and rotate all arms to start at theta = 0, # we need to do some model-mangling self.t_mins = Series({ i: self.data.query('arm_index == @i')['theta'].min() for i in np.unique(self.data['arm_index']) }) r_stack = [ self.a[i] * tt.power( (self.data.query('arm_index == @i')['theta'].values - self.t_mins[i] + self.psi[i]), 1 / self.n[int(self.gal_arm_map[i])]) [::self.chirality_correction[int(self.gal_arm_map[i])]] for i in np.unique(self.data['arm_index']) ] r = pm.Deterministic('r', tt.concatenate(r_stack)) self.likelihood = pm.StudentT( 'Likelihood', mu=r, sigma=self.sigma_r, observed=self.data['r'].values, )
def single_model(self, idx): minimum = 0. maximum = 8. sample_space = np.arange(minimum, maximum + 1, 1) sample_space = 1. / 10**(sample_space / 4.) with pm.Model() as smodel: # uniform priors on h hab_ten = pm.DiscreteUniform('h', 0., 8.) # convert to a tensor alpha = tt.as_tensor_variable([10**(hab_ten / 4.)]) probs_a, probs_r = self.inferrer(alpha) # use a DensityDist pm.Categorical('actions', probs_a, observed=self.actions[idx]) pm.Categorical('rewards', probs_r, observed=self.rewards[idx]) return smodel, sample_space
def test_bernoulli_process(self): """Testing the Bridge Sampler with a Beta-Bernoulli-Process model""" # prior parameters alpha = np.random.gamma(1.0, 2.0) beta = np.random.gamma(1.0, 2.0) n = 100 draws = 10000 tune = 1000 print("Testing with alpha = ", alpha, "and beta = ", beta) # random data p0 = np.random.random() expected_error = np.sqrt(p0 * (1 - p0) / n) # reasonable approximation observations = (np.random.random(n) <= p0).astype("int") with pm.Model() as BernoulliBeta: theta = pm.Beta('pspike', alpha=alpha, beta=beta) obs = pm.Categorical('obs', p=pm.math.stack([theta, 1.0 - theta]), observed=observations) trace = pm.sample(draws=draws, tune=tune) # calculate exact marginal likelihood n = len(observations) k = sum(observations) print(n, k) exact_log_marg_ll = spf.betaln(alpha + k, beta + (n - k)) - spf.betaln(alpha, beta) # estimate with bridge sampling logml_dict = marginal_llk(trace, model=BernoulliBeta, maxiter=10000) expected_p = 1.0 - trace["pspike"].mean() # should be true in 95% of the runs self.assertTrue( np.abs(expected_p - p0) < 2 * expected_error, msg= "Estimated probability is {0:5.3f}, exact is {1:5.3f}, estimated standard deviation is {2:5.3f}. Is this OK?" .format(expected_p, p0, expected_error)) estimated_log_marg_ll = logml_dict["logml"] # 3.2 corresponds to a bayes factor of 'Not worth more than a bare mention' self.assertTrue( np.abs(estimated_log_marg_ll - exact_log_marg_ll) < np.log(3.2), msg= "Estimated marginal log likelihood {0:2.5f}, exact marginal log likelihood {1:2.5f}. Is this OK?" .format(estimated_log_marg_ll, exact_log_marg_ll))
def _set_clustering_model(self): with pm.Model() as model: logger.info("Using {} cluster centers".format(self.n_states)) p = pm.Dirichlet("p", a=np.repeat(1, self.n_states), shape=self.n_states) pm.Potential("p_pot", var=tt.switch(tt.min(p) < 0.05, -np.inf, 0.)) z = pm.Categorical("z", p=p, shape=self.n_genes) tau_g, mean_g, gamma = self._gamma_mix(model, z) param_hlm = self._hlm(model, gamma) self._set_steps(model, z, p, tau_g, mean_g, gamma, *param_hlm) return self