def test_density_scaling_with_generator(self): # We have different size generators def true_dens(): g = gen1() for i, point in enumerate(g): yield stats.norm.logpdf(point).sum() * 10 t = true_dens() # We have same size models with pm.Model() as model1: Normal("n", observed=gen1(), total_size=100) p1 = aesara.function([], model1.logp()) with pm.Model() as model2: gen_var = generator(gen2()) Normal("n", observed=gen_var, total_size=100) p2 = aesara.function([], model2.logp()) for i in range(10): _1, _2, _t = p1(), p2(), next(t) decimals = select_by_precision(float64=7, float32=1) np.testing.assert_almost_equal( _1, _t, decimal=decimals) # Value O(-50,000) np.testing.assert_almost_equal(_1, _2)
def test_simple(self): # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s**-2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(10000, 5000, progress_bar=0) # Check length of value assert_equal(len(x.value), 100) # Check size of trace tr = M.trace('x')() assert_equal(shape(tr), (5000, 2)) sd2 = [-2 < i < 2 for i in ravel(tr)] # Check for standard normal output assert_almost_equal(sum(sd2) / 10000., 0.95, decimal=1)
def test_mixture_list_of_normals(self): with Model() as model: w = Dirichlet("w", floatX(np.ones_like(self.norm_w)), shape=self.norm_w.size) mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size) tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size) Mixture( "x_obs", w, [ Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1]) ], observed=self.norm_x, ) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1)
def test_allinmodel(): model1 = Model() model2 = Model() with model1: x1 = Normal("x1", mu=0, sigma=1) y1 = Normal("y1", mu=0, sigma=1) with model2: x2 = Normal("x2", mu=0, sigma=1) y2 = Normal("y2", mu=0, sigma=1) x1 = model1.rvs_to_values[x1] y1 = model1.rvs_to_values[y1] x2 = model2.rvs_to_values[x2] y2 = model2.rvs_to_values[y2] starting.allinmodel([x1, y1], model1) starting.allinmodel([x1], model1) with pytest.raises( ValueError, match=r"Some variables not in the model: \['x2', 'y2'\]"): starting.allinmodel([x2, y2], model1) with pytest.raises(ValueError, match=r"Some variables not in the model: \['x2'\]"): starting.allinmodel([x2, y1], model1) with pytest.raises(ValueError, match=r"Some variables not in the model: \['x2'\]"): starting.allinmodel([x2], model1)
def test_common_errors(self): with pytest.raises(ValueError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2, 2]) m.logp() assert "Length of" in str(e.value) with pytest.raises(ValueError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size=[2, 2, 2]) m.logp() assert "Length of" in str(e.value) with pytest.raises(TypeError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size="foo") m.logp() assert "Unrecognized" in str(e.value) with pytest.raises(TypeError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size=["foo"]) m.logp() assert "Unrecognized" in str(e.value) with pytest.raises(ValueError) as e: with pm.Model() as m: Normal("n", observed=[[1]], total_size=[Ellipsis, Ellipsis]) m.logp() assert "Double Ellipsis" in str(e.value)
def test_container_parents(self): A = Normal('A', 0, 1) B = Normal('B', 0, 1) C = Normal('C', [A, B], 1) assert_equal(Container([A, B]).value, [A.value, B.value]) assert_equal(C.parents.value['mu'], [A.value, B.value])
def test_density_scaling(self): with pm.Model() as model1: Normal("n", observed=[[1]], total_size=1) p1 = aesara.function([], model1.logp()) with pm.Model() as model2: Normal("n", observed=[[1]], total_size=2) p2 = aesara.function([], model2.logp()) assert p1() * 2 == p2()
def make_model(self, data): assert len(data) == 2, 'There must be exactly two data arrays' name1, name2 = sorted(data.keys()) y1 = np.array(data[name1]) y2 = np.array(data[name2]) assert y1.ndim == 1 assert y2.ndim == 1 y = np.concatenate((y1, y2)) mu_m = np.mean(y) mu_p = 0.000001 * 1 / np.std(y)**2 sigma_low = np.std(y) / 1000 sigma_high = np.std(y) * 1000 # the five prior distributions for the parameters in our model group1_mean = Normal('group1_mean', mu_m, mu_p) group2_mean = Normal('group2_mean', mu_m, mu_p) group1_std = Uniform('group1_std', sigma_low, sigma_high) group2_std = Uniform('group2_std', sigma_low, sigma_high) nu_minus_one = Exponential('nu_minus_one', 1 / 29) @deterministic(plot=False) def nu(n=nu_minus_one): out = n + 1 return out @deterministic(plot=False) def lam1(s=group1_std): out = 1 / s**2 return out @deterministic(plot=False) def lam2(s=group2_std): out = 1 / s**2 return out group1 = NoncentralT(name1, group1_mean, lam1, nu, value=y1, observed=True) group2 = NoncentralT(name2, group2_mean, lam2, nu, value=y2, observed=True) return Model({ 'group1': group1, 'group2': group2, 'group1_mean': group1_mean, 'group2_mean': group2_mean, 'group1_std': group1_std, 'group2_std': group2_std, })
def test_dimensions(self): a1 = Normal.dist(mu=0, sigma=1) a2 = Normal.dist(mu=10, sigma=1) mix = Mixture.dist(w=np.r_[0.5, 0.5], comp_dists=[a1, a2]) assert mix.mode.ndim == 0 assert mix.logp(0.0).ndim == 0 value = np.r_[0.0, 1.0, 2.0] assert mix.logp(value).ndim == 1
def test_free_rv(self): with pm.Model() as model4: Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p4 = aesara.function([], model4.logp()) with pm.Model() as model5: n = Normal("n", total_size=[2, Ellipsis, 2], size=(2, 2)) p5 = aesara.function([n.tag.value_var], model5.logp()) assert p4() == p5(pm.floatX([[1]])) assert p4() == p5(pm.floatX([[1, 1], [1, 1]]))
def oneway_banova(y,X): # X is a design matrix with a 1 one where factor is present with Model() as banova: sigma = Uniform('SD lowest', lower=0,upper=10) sd_prior = pm.Gamma('SD Prior', 1.01005, 0.1005) offset = Normal('offset', mu=0, tau=0.001) alphas = Normal('alphas', mu=0.0, sd=sd_prior, shape=X.shape[0]) betas = alphas - alphas.mean() betas = Deterministic('betas', betas) data = Normal('data', mu= offset + Tns.dot(X.T, betas), sd=sigma, observed=y) return banova
def simple_arbitrary_det(): scalar_type = at.dscalar if aesara.config.floatX == "float64" else at.fscalar @as_op(itypes=[scalar_type], otypes=[scalar_type]) def arbitrary_det(value): return value with Model() as model: a = Normal("a") b = arbitrary_det(a) Normal("obs", mu=b.astype("float64"), observed=floatX_array([1, 3, 5])) return model.compute_initial_point(), model
def toy_model(tau=10000, prior='Beta0.5'): b_obs = 200 f_AB = 400 f_CB = 1000 f_CA = 600 A = np.array([0, f_AB, f_CA, 0, f_CB, 0]) if prior == 'Normal': ABp = Normal('ABp', mu=0.5, tau=100, trace=True) CBp = Normal('CBp', mu=0.5, tau=100, trace=True) CAp = Normal('CAp', mu=0.5, tau=100, trace=True) elif prior == 'Uniform': ABp = Uniform('ABp', lower=0.0, upper=1.0, trace=True) CBp = Uniform('CBp', lower=0.0, upper=1.0, trace=True) CAp = Uniform('CAp', lower=0.0, upper=1.0, trace=True) elif prior == 'Beta0.25': ABp = Beta('ABp', alpha=0.25, beta=0.25, trace=True) CBp = Beta('CBp', alpha=0.25, beta=0.25, trace=True) CAp = Beta('CAp', alpha=0.25, beta=0.25, trace=True) elif prior == 'Beta0.5': ABp = Beta('ABp', alpha=0.5, beta=0.5, trace=True) CBp = Beta('CBp', alpha=0.5, beta=0.5, trace=True) CAp = Beta('CAp', alpha=0.5, beta=0.5, trace=True) elif prior == 'Beta2': ABp = Beta('ABp', alpha=2, beta=2, trace=True) CBp = Beta('CBp', alpha=2, beta=2, trace=True) CAp = Beta('CAp', alpha=2, beta=2, trace=True) elif prior == 'Gamma': ABp = Gamma('ABp', alpha=1, beta=0.5, trace=True) CBp = Gamma('CBp', alpha=1, beta=0.5, trace=True) CAp = Gamma('CAp', alpha=1, beta=0.5, trace=True) AB1 = ABp AB3 = 1 - ABp CB4 = CBp CB5 = 1 - CBp CA42 = CAp CA52 = 1 - CAp b = Normal('b', mu=f_AB * AB3 + f_CB * CB4 + f_CA * CA42, tau=tau, value=b_obs, observed=True, trace=True) # print [x.value for x in [ABp,CBp,CAp]] # print b.logp return locals()
def multidimensional_model(): mu = -2.1 tau = 1.3 with Model() as model: x = Normal('x', mu, tau, shape=(3, 2), testval=.1 * np.ones((3, 2))) return model.test_point, model, (mu, tau**-1)
def grid_model(fname, tau=10000, sparse=True): data = loadmat('data/%s.mat' % fname) A = data['phi'] b_obs = data['f'] x_true = data['real_a'] block_sizes = data['block_sizes'] if sparse == True: alpha = 0.3 else: alpha = 1 # construct graphical model # ----------------------------------------------------------------------------- # construct sparse prior on all routes x_blocks = [Dirichlet('x%d' % i,np.array([alpha]*(x[0])),trace=True) for (i,x) in \ enumerate(block_sizes)] x_blocks_expanded = [[x[xi] for xi in range(i-1)] for (i,x) in \ zip(block_sizes,x_blocks)] [x.append(1 - sum(x)) for x in x_blocks_expanded] x_pri = list(chain(*x_blocks_expanded)) # construct skinny normal distributions with observations mus = [dot(a, x_pri) for a in A] b = [Normal('b%s' % i,mu=mu, tau=tau,value=b_obsi[0],observed=True) for \ (i,(mu,b_obsi)) in enumerate(zip(mus,b_obs))] return locals()
def model(prob): #observations obs = prob.get_observation_values() #priors variables = [] sig = Uniform('sig', 0.0, 100.0, value=1.) variables.append(sig) a1 = Uniform('a1', 0.0, 5.0) variables.append(a1) k1 = Uniform('k1', 0.01, 2.0) variables.append(k1) a2 = Uniform('a2', 0.0, 5.0) variables.append(a2) k2 = Uniform('k2', 0.01, 2.0) variables.append(k2) #model @deterministic() def response(pars=variables, prob=prob): values = [] for par in pars: values.append(par) values = array(values) prob.set_parameters(values) prob.forward() return prob.get_simvalues() #likelihood y = Normal('y', mu=response, tau=1.0 / sig**2, value=obs, observed=True) variables.append(y) return variables
def test_nested_tuple_container(self): A = Normal('A', 0, 1) try: Container(([A], )) raise AssertionError, 'A NotImplementedError should have resulted.' except NotImplementedError: pass
def create_model(AA, bb_obs, EQ, x_true, sparse=False): output = {} # change variable names # sparse = True alpha = 0.3 if sparse else 1 # construct graphical model # ----------------------------------------------------------------------------- import time with pm.Model() as model: ivar = 10000 START = time.time() # construct sparse prior on all routes # Dirichlet distribution doesn't give values that sum to 1 (continuous distribution), so # instead we normalize draws from a Gamma distribution # CAUTION x_pri is route splits x_pri = array( generate_route_flows_from_incidence_matrix(EQ, alpha=alpha)) # construct skinny normal distributions with observations #FIXME sparse dot product (i.e. Mscale.dot(x_pri)) gives error: # TypeError: no supported conversion for types: (dtype('float64'), dtype('O')) mus_bb = array(AA.todense().dot(x_pri)) bb = [Normal('b%s' % i, mu=mu, tau=ivar, observed=obsi) for \ (i,(mu,obsi)) in enumerate(zip(mus_bb,bb_obs))] print 'Time to build model: %ds' % (time.time() - START) return model, alpha, x_pri
def simple_model(): mu = -2.1 tau = 1.3 with Model() as model: x = Normal('x', mu, tau, shape=2, testval=[.1] * 2) return model.test_point, model, (mu, tau**-1)
def multidimensional_model(): mu = -2.1 tau = 1.3 with Model() as model: Normal("x", mu, tau=tau, size=(3, 2), initval=0.1 * np.ones((3, 2))) return model.compute_initial_point(), model, (mu, tau**-0.5)
def simple_model(): mu = -2.1 tau = 1.3 with Model() as model: Normal("x", mu, tau=tau, size=2, initval=floatX_array([0.1, 0.1])) return model.compute_initial_point(), model, (mu, tau**-0.5)
def appc_gamma_model( y, observer, ambiguity_regressor, context, observed=True): ''' Hierarchical Gamma model to predict APPC data. ''' num_obs_ctxt = len(np.unique(observer[context == 1])) num_obs_noctxt = len(np.unique(observer[context == 0])) obs = [num_obs_noctxt, num_obs_ctxt] with Model() as pl: # Population level: obs_ambiguity = Normal( 'DNP Mean_Ambiguity', mu=0, sd=500.0, shape=2) # Define variable for sd of data distribution: data_sd = pm.Gamma('Data_SD', *gamma_params(mode=y.std(), sd=y.std())) for ctxt, label in zip([1, 0], ['context', 'nocontext']): obs_offset = Normal('DNP Mean_Offset_' + label, mu=y.mean(), sd=y.std() * 1.0) obs_sd_offset = Gamma('Mean_Offset_SD' + label, *gamma_params(mode=y.std(), sd=y.std())) # Observer level: offset = Normal( 'DNP Offset_' + label, mu=obs_offset, sd=obs_sd_offset, shape=(obs[ctxt],)) # Compute predicted mode for each fixation: data = y[context == ctxt] obs_c = observer[context == ctxt] ambig_reg_c = ambiguity_regressor[context == ctxt] b0 = obs_ambiguity.mean() obs_ambiguity_transformed = Deterministic("DNS Population Ambiguity" +label , obs_ambiguity-b0 ) offset_transformed = Deterministic('DNS Subject Offsets ' + label, offset+b0) obs_offset_transformed = Deterministic('DNS Population Offsets ' + label, obs_offset+b0) oat = obs_ambiguity - b0 # Dummy coding mode = ( offset_transformed[obs_c] + obs_ambiguity_transformed[ambig_reg_c == 1]) # Convert to shape rate parameterization shape, rate = gamma_params(mode, data_sd) data_dist = Gamma('Data_' + label, shape, rate, observed=data) return pl
def _create_proposal_random_variables(self, ): centers = self.proposal_center scales = self.proposal_scales random_variables = dict() for key in self._mcmc.params.keys(): variance = (centers[key] * scales[key])**2 random_variables[key] = Normal(key, centers[key], 1 / variance) return random_variables
def test_gradient_with_scaling(self): with pm.Model() as model1: genvar = generator(gen1()) m = Normal("m") Normal("n", observed=genvar, total_size=1000) grad1 = aesara.function([m.tag.value_var], at.grad(model1.logpt(), m.tag.value_var)) with pm.Model() as model2: m = Normal("m") shavar = aesara.shared(np.ones((1000, 100))) Normal("n", observed=shavar) grad2 = aesara.function([m.tag.value_var], at.grad(model2.logpt(), m.tag.value_var)) for i in range(10): shavar.set_value(np.ones((100, 100)) * i) g1 = grad1(1) g2 = grad2(1) np.testing.assert_almost_equal(g1, g2)
def createHistogramFitModel(data_bins, simulation_bins, scaleGuess): scale = Normal('scale', mu=scaleGuess, tau=sigToTau(.10 * scaleGuess)) scaled_sim = scale * simulation_bins baseline_observed = Poisson("baseline_observed", mu=scaled_sim, value=data_bins, observed=True) return locals()
def createSignalModelExponential(data): """ Toy model that treats the first ~10% of the waveform as an exponential. Does a good job of finding the start time (t_0) Since I made this as a toy, its super brittle. Waveform must be normalized """ print "Creating model" switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data)) noise_sigma = HalfNormal('noise_sigma', tau=sigToTau(.01)) exp_sigma = HalfNormal('exp_sigma', tau=sigToTau(.05)) #Modeling these parameters this way is why wf needs to be normalized exp_rate = Uniform('exp_rate', lower=0, upper=.1) exp_scale = Uniform('exp_scale', lower=0, upper=.1) timestamp = np.arange(0, len(data), dtype=np.float) @deterministic(plot=False, name="test") def uncertainty_model(s=switchpoint, n=noise_sigma, e=exp_sigma): ''' Concatenate Poisson means ''' out = np.empty(len(data)) out[:s] = n out[s:] = e return out @deterministic def tau(eps=uncertainty_model): return np.power(eps, -2) ## @deterministic(plot=False, name="test2") ## def adjusted_scale(s=switchpoint, s1=exp_scale): ## out = np.empty(len(data)) ## out[:s] = s1 ## out[s:] = s1 ## return out # # scale_param = adjusted_scale(switchpoint, exp_scale) @deterministic(plot=False) def baseline_model(s=switchpoint, r=exp_rate, scale=exp_scale): out = np.zeros(len(data)) out[s:] = scale * (np.exp(r * (timestamp[s:] - s)) - 1.) # plt.figure(fig.number) # plt.clf() # plt.plot(out ,color="blue" ) # plt.plot(data ,color="red" ) # value = raw_input(' --> Press q to quit, any other key to continue\n') return out baseline_observed = Normal("baseline_observed", mu=baseline_model, tau=tau, value=data, observed=True) return locals()
def createSignalModel(data): #set up your model parameters switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data)) early_sigma = HalfNormal('early_sigma', tau=sigToTau(1)) late_sigma = HalfNormal('late_sigma', tau=sigToTau(1)) early_mu = Normal('early_mu', mu=.5, tau=sigToTau(1)) late_mu = Normal('late_mu', mu=.5, tau=sigToTau(1)) #set up the model for uncertainty (ie, the noise) and the signal (ie, the step function) ############################ @deterministic(plot=False, name="test") def uncertainty_model(s=switchpoint, n=early_sigma, e=late_sigma): #Concatenate Uncertainty sigmas (or taus or whatever) around t0 s = np.around(s) out = np.empty(len(data)) out[:s] = n out[s:] = e return out ############################ @deterministic def tau(eps=uncertainty_model): #pymc uses this tau parameter instead of sigma to model a gaussian. its annoying. return np.power(eps, -2) ############################ @deterministic(plot=False, name="siggenmodel") def signal_model(s=switchpoint, e=early_mu, l=late_mu): #makes the step function using the means out = np.zeros(len(data)) out[:s] = e out[s:] = l return out ############################ #Full model: normally distributed noise around a step function baseline_observed = Normal("baseline_observed", mu=signal_model, tau=tau, value=data, observed=True) return locals()
def set_models(self): """Define models for each group. :return: None """ for group in ['control', 'variant']: self.stochastics[group] = Normal(group, self.stochastics[group + '_mean'], self.stochastics[group + '_sigma'], value=getattr(self, group), observed=True)
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ obs = np.concatenate((self.control, self.variant)) obs_mean, obs_sigma = np.mean(obs), np.std(obs) for group in ['control', 'variant']: self.stochastics[group + '_mean'] = Normal(group + '_mean', obs_mean, 0.000001 / obs_sigma ** 2) self.stochastics[group + '_sigma'] = Uniform(group + '_sigma', obs_sigma / 1000, obs_sigma * 1000)
def test_multidim_scaling(self): with pm.Model() as model0: Normal("n", observed=[[1, 1], [1, 1]], total_size=[]) p0 = aesara.function([], model0.logp()) with pm.Model() as model1: Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p1 = aesara.function([], model1.logp()) with pm.Model() as model2: Normal("n", observed=[[1], [1]], total_size=[2, 2]) p2 = aesara.function([], model2.logp()) with pm.Model() as model3: Normal("n", observed=[[1, 1]], total_size=[2, 2]) p3 = aesara.function([], model3.logp()) with pm.Model() as model4: Normal("n", observed=[[1]], total_size=[2, 2]) p4 = aesara.function([], model4.logp()) with pm.Model() as model5: Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2]) p5 = aesara.function([], model5.logp()) _p0 = p0() assert (np.allclose(_p0, p1()) and np.allclose(_p0, p2()) and np.allclose(_p0, p3()) and np.allclose(_p0, p4()) and np.allclose(_p0, p5()))
def _create_proposal_random_variables(self, ): centers = self.proposal_center scales = self.proposal_scales random_variables = dict() params = self._mcmc.params.keys() if 'std_dev' in self._mcmc.pymc_mod_order: params.append('std_dev') for key in params: variance = (scales[key])**2 random_variables[key] = Normal(key, centers[key], 1 / variance) return random_variables
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ obs = np.concatenate((self.control, self.variant)) mean, sigma, med = np.mean(obs), np.std(obs), np.median(obs) location = np.log(med) scale = np.sqrt(2 * np.log(mean / med)) for group in ['control', 'variant']: self.stochastics[group + '_location'] = Normal(group + '_location', location, 0.000001 / sigma ** 2) self.stochastics[group + '_scale'] = Uniform(group + '_scale', scale / 1000, scale * 1000)
def gamma_model_2conditions(y, x, observer, condition, observed=True): ''' Hierarchical Gamma model to predict fixation durations. ''' # Different slopes for different observers. num_observer = len(np.unique(observer)) print '\n Num Observers: %d \n' % num_observer with Model() as pl: obs_splits = TruncatedNormal( 'Mean_Split', mu=90, sd=500, lower=5, upper=175) obs_offset = Normal('Mean_Offset', mu=y.mean(), sd=y.std()*10.0) obs_slopes1 = Normal('Mean_Slope1', mu=0.0, sd=1.0) obs_slopes2 = Normal('Mean_Slope2', mu=0, sd=1.0) obs_sd_split = pm.Gamma( 'Obs_SD_Split', *gamma_params(mode=1.0, sd=100.0)) obs_sd_intercept = pm.Gamma( 'Obs_SD_Offset', *gamma_params(mode=1, sd=100.)) obs_sd_slopes1 = pm.Gamma( 'Obs_SD_slope1', *gamma_params(mode=.01, sd=2.01)) obs_sd_slopes2 = pm.Gamma( 'Obs_SD_slope2', *gamma_params(mode=.01, sd=2.01)) data_sd = pm.Gamma('Data_SD', *gamma_params(mode=y.std(), sd=y.std())) split = TruncatedNormal( 'Split', mu=obs_splits, sd=obs_sd_split, lower=5, upper=175, shape=(num_observer,)) intercept = Normal( 'Offset', mu=obs_offset, sd=obs_sd_intercept, shape=(num_observer,)) slopes1 = Normal( 'Slope1', mu=obs_slopes1, sd=obs_sd_slopes1, shape=(num_observer,)) slopes2 = Normal( 'Slope2', mu=obs_slopes2, sd=obs_sd_slopes2, shape=(num_observer,)) # Now add the condition part # split_cond = Normal( 'Cond_Split', mu=0, sd=10, shape=(2,)) intercept_cond = Normal( 'Cond_Offset', mu=0, sd=20, shape=(2,)) slopes1_cond = Normal( 'Cond_Slope1', mu=0, sd=1, shape=(2,)) slopes2_cond = Normal( 'Cond_Slope2', mu=0, sd=1, shape=(2,)) intercept_cond = intercept_cond - intercept_cond.mean() split_cond = split_cond - split_cond.mean() slopes1_cond = slopes1_cond - slopes1_cond.mean() slopes2_cond = slopes2_cond - slopes2_cond.mean() mu_sub = piecewise_predictor( x, split[observer] + split_cond[condition], intercept[observer] + intercept_cond[condition], slopes1[observer] + slopes1_cond[condition], slopes2[observer] + slopes2_cond[condition] ) shape, rate = gamma_params(mu_sub, data_sd) data = Gamma('Data', shape, rate, observed=y) return pl
def create_model(): all_vars = [] mu_sc = Normal('mu_sc', 0.5, 1/(0.25**2)) mu_sc.value=0.5 # pot = TruncPotential('mu_sc_potential', 0, 1, mu_sc) # all_vars.append(pot) # target = 0.1**2 # a = 2 # b = target*(a+1) target = 0.02**2 a = 0.7 b = target*(a+1) tau_qd = InverseGamma('tau_qd', a, b) tau_sc = InverseGamma('tau_sc', a, b) tau_bias = InverseGamma('tau_bias', a, b) # plot_dist(tau_qd, transform=lambda x: np.sqrt(x), high=0.5) print np.mean([np.sqrt(tau_qd.random()) for i in xrange(10000)]) print np.std([np.sqrt(tau_qd.random()) for i in xrange(10000)]) target = 0.02**2 a = 0.9 b = target*(a+1) tau_student_question_capabilities = InverseGamma('tau_student_question_capabilities', a, b) tau_student_handin_capabilities = InverseGamma('tau_student_handin_capabilities', a, b) # plot_dist(tau_student_handin_capabilities, transform=lambda x: np.sqrt(x), high=0.5) all_vars.append(mu_sc) all_vars.append(tau_sc) all_vars.append(tau_bias) all_vars.append(tau_student_handin_capabilities) all_vars.append(tau_student_question_capabilities) all_vars.append(tau_qd) for i in xrange(num_assignments): questions = [] for j in xrange(num_questions_pr_handin): # tau = pymc.Lambda('tau_%i_%i'% (i,j), lambda a=tau_qd: 1/ (tau_qd.value*tau_qd.value)) tau = tau_qd difficulty = Normal('difficulty_q_%i_%i'% (i,j), 0, 1/tau) q = Question(difficulty) questions.append(q) all_vars.append(difficulty) assignment = Assignment(questions) assignments.append(assignment) for i in xrange(num_students): tau = tau_sc student_capabilities = Normal('student_capabilities_s_%i'%i, mu_sc, 1/tau) all_vars.append(student_capabilities) tau = tau_bias grading_bias = Normal('grading_bias_s_%i'%i, 0, 1/tau) all_vars.append(grading_bias) s = Student(student_capabilities, grading_bias) students.append(s) for j, assignment in enumerate(assignments): tau = tau_student_handin_capabilities student_handin_capabilities = Normal('student_handin_capabilities_sh_%i_%i' % (i,j), 0, 1/tau) all_vars.append(student_handin_capabilities) question_capabilities = [] for k, q in enumerate(assignment.questions): tau = tau_student_question_capabilities student_question_capabilities = Normal('student_question_capabilities_shq_%i_%i_%i' % (i,j,k ), 0, 1/tau) all_vars.append(student_question_capabilities) question_capabilities.append(student_question_capabilities) handins.append(Handin(s, assignment, student_handin_capabilities, question_capabilities)) # assign grader all_grades = [] num_grades_given = np.zeros((len(students),1)) for handin in handins: potential_graders = range(0, len(students)) potential_graders.remove(students.index(handin.student)) idx = np.random.randint(0, len(potential_graders)+1, num_graders_pr_handin) num_grades_given[idx] += 1 graders = [students[i] for i in idx] grades = handin.grade(graders) all_grades.append(grades) # print num_grades_given grade_list = sum(sum(all_grades, []),[]) tau = 1/0.02**2 print "Creating grade list" grade_list_real = [g.value for g in grade_list] print "Number of illegal grades: %i (out of %i)" % (len([g for g in grade_list_real if g > 1 or g < 0]), len(grade_list)) grade_list_real = [min(max((g), 0), 1) for g in grade_list_real] grade_list_new = [] for i, (g_real, g) in enumerate(zip(grade_list_real, grade_list)): grade_list_new.append(Normal('grade_%i'%i, g, tau, value=g_real, observed=True)) if i % 10000 == 0: print i grade_list = grade_list_new print "Grade list created" all_vars += grade_list all_vars = list(set(all_vars)) print len(all_vars) return locals(), grade_list_real, all_vars
def create_model(): all_vars = [] # b = 0.02 # target_mean = 1/0.1**2 # a = b*target_mean b = 0.001 target_mean = 10/0.1**2 a = b*target_mean # print a/b # print np.sqrt(a/(b**2)) # tau_qd = Normal('tau_qd', 0.05,1/(0.15**2)) # tau_qd = Exponential('tau_qd', 10) tau_qd = Gamma('tau_qd', a, b) plot_dist(tau_qd, transform=lambda x: 1/np.sqrt(x)) print np.mean([1/np.sqrt(tau_qd.random()) for i in xrange(1000)]) print np.std([1/np.sqrt(tau_qd.random()) for i in xrange(1000)]) # pot = TruncPotential('tau_qd_potential', 0, 0.2, tau_qd) # all_vars.append(pot) # tau_qd.value = 1/0.05**2 all_vars.append(tau_qd) # plot_dist(tau_qd, 0, 0.2) mu_sc = Normal('mu_sc', 0.5, 1/(0.25**2)) mu_sc.value=0.5 pot = TruncPotential('mu_sc_potential', 0, 1, mu_sc) all_vars.append(pot) # tau_sc = Normal('tau_sc', 0.2, 1/(0.25**2)) # tau_sc = Exponential('tau_sc', 10) tau_sc = Gamma('tau_sc', a, b) # plot_dist(tau_sc) # tau_sc.value = 1/0.1**2 # pot = TruncPotential('tau_sc_potential', 0, 0.3, tau_sc) # all_vars.append(pot) # tau_bias = Normal('tau_bias', 0.05, 1/(0.25**2)) # tau_bias = Exponential('tau_bias', 10) tau_bias = Gamma('tau_bias', a, b) # pot = TruncPotential('tau_bias_potential', 0, 0.2, tau_bias) # all_vars.append(pot) # tau_bias.value = 1/0.01**2 # tau_student_handin_capabilities = Normal('tau_student_handin_capabilities', 0.05, 1/(0.15**2)) # tau_student_handin_capabilities = Exponential('tau_student_handin_capabilities', 10) tau_student_handin_capabilities = Gamma('tau_student_handin_capabilities', a, b) # tau_student_handin_capabilities.value = 1/0.05**2 # pot = TruncPotential('tau_student_handin_capabilities_potential', 0, 0.3, tau_student_handin_capabilities) # all_vars.append(pot) # tau_student_question_capabilities = Normal('tau_student_question_capabilities', 0.05,1/(0.15**2)) # tau_student_question_capabilities = Exponential('tau_student_question_capabilities', 10) tau_student_question_capabilities = Gamma('tau_student_question_capabilities', a, b) # plot_dist(tau_student_question_capabilities) # tau_student_question_capabilities.value = 1/0.05**2 # pot = TruncPotential('tau_student_question_capabilities_potential', 0, 0.15, tau_student_question_capabilities) # all_vars.append(pot) # plot_dist(tau_student_question_capabilities, 0, 0.2) all_vars.append(mu_sc) all_vars.append(tau_sc) all_vars.append(tau_bias) all_vars.append(tau_student_handin_capabilities) all_vars.append(tau_student_question_capabilities) for i in xrange(num_assignments): questions = [] for j in xrange(num_questions_pr_handin): # tau = pymc.Lambda('tau_%i_%i'% (i,j), lambda a=tau_qd: 1/ (tau_qd.value*tau_qd.value)) tau = tau_qd difficulty = Normal('difficulty_q_%i_%i'% (i,j), 0, tau) q = Question(difficulty) questions.append(q) all_vars.append(difficulty) assignment = Assignment(questions) assignments.append(assignment) for i in xrange(num_students): # tau = pymc.Lambda('tau1_%i'%i, lambda a=tau_sc: 1/(tau_sc.value*tau_sc.value)) tau = tau_sc student_capabilities = Normal('student_capabilities_s_%i'%i, mu_sc, tau) # pot = TruncPotential('student_capabilities_potential_s_%i'%i, 0, 1, student_capabilities) all_vars.append(student_capabilities) # all_vars.append(pot) # grading_bias = Normal('grading_bias_s_%i'%i, 0, 1/tau_bias) # tau = pymc.Lambda('tau2_%i'%i, lambda a=tau_bias: 1/ (tau_bias.value*tau_bias.value)) tau = tau_bias grading_bias = Normal('grading_bias_s_%i'%i, 0, tau) all_vars.append(grading_bias) s = Student(student_capabilities, grading_bias) students.append(s) for j, assignment in enumerate(assignments): # student_handin_capabilities = Normal('student_handin_capabilities_sh_%i_%i' % (i,j), 0, 1/tau_student_handin_capabilities) tau = tau_student_handin_capabilities student_handin_capabilities = Normal('student_handin_capabilities_sh_%i_%i' % (i,j), 0, tau) all_vars.append(student_handin_capabilities) question_capabilities = [] for k, q in enumerate(assignment.questions): tau = tau_student_question_capabilities student_question_capabilities = Normal('student_question_capabilities_shq_%i_%i_%i' % (i,j,k ), 0, tau) all_vars.append(student_question_capabilities) question_capabilities.append(student_question_capabilities) handins.append(Handin(s, assignment, student_handin_capabilities, question_capabilities)) # assign grader all_grades = [] for handin in handins: potential_graders = range(0, len(students)) potential_graders.remove(students.index(handin.student)) idx = np.random.randint(0, len(potential_graders), num_graders_pr_handin) graders = [students[i] for i in idx] grades = handin.grade(graders) all_grades.append(grades) grade_list = sum(sum(all_grades, []),[]) b = 1.0 target_mean = 1/np.sqrt(0.01) a = b*target_mean tau_exo_grade = Gamma('tau_exo_grade', a, b) # plt.hist([1/tau_exo_grade.random()**2 for i in xrange(50000)]) # tau_exo_grade = Exponential('mu_exo_grade', 20) # tau_exo_grade = Normal('mu_exo_grade', 0.05, 1/(0.1**2)) # pot = TruncPotential('tau_exo_grade', 0, 0.2, tau_exo_grade) # all_vars.append(pot) tau = tau_exo_grade all_vars.append(tau_exo_grade) print "Creating grade list" # grade_list_real = [g.value for g in grade_list] # print 1 # grade_list_real = [min(max((g), 0), 1) for g in grade_list_real] # print 2 # grade_list = [Normal('grade_%i'%i, g, tau, value=g_real, observed=True) for i, (g_real, g) in enumerate(zip(grade_list_real, grade_list))] # print 3 # grade_list_potentials = [TruncPotential('grade_potential_%i'%i, 0, 1, g) for i,g in enumerate(grade_list)] # print 4 # take one MCMC step in order to make it more probable that all variables are in the allowed range # var_dict = {str(v):v for v in all_vars} # sampler = pymc.MCMC(var_dict) # sampler.sample(iter=1) grade_list_real = [g.value for g in grade_list] # plt.hist(grade_list_real) # plt.show() print "Number of illegal grades: %i (out of %i)" % (len([g for g in grade_list_real if g > 1 or g < 0]), len(grade_list)) grade_list_real = [min(max((g), 0), 1) for g in grade_list_real] # grade_list = Normal('grade_%i'%i, np.array(grade_list), np.array([tau]*len(grade_list)), value=grade_list_real, observed=True) # grade_list = [Normal('grade_%i'%i, g, tau, value=g_real, observed=True) for i, (g_real, g) in enumerate(zip(grade_list_real, grade_list))] grade_list_new = [] for i, (g_real, g) in enumerate(zip(grade_list_real, grade_list)): grade_list_new.append(Normal('grade_%i'%i, g, tau, value=g_real, observed=True)) if i % 100 == 0: print i # grade_list_potentials = [TruncPotential('grade_potential_%i'%i, 0, 1, g) for i,g in enumerate(grade_list)] grade_list = grade_list_new print "Grade list created" all_vars += grade_list # all_vars += grade_list_potentials all_vars = list(set(all_vars)) print len(all_vars) # print [str(v) for v in all_vars] return locals(), grade_list_real, all_vars