def _offsetModel(kd1, kd2, maxShift, rotOrigin, precision): """ Factory function to return MCMC model for coordinate offset calculation :param kd1: :param kd2: :param maxShift: :param rotOrigin: :param precision: :return: """ rotOrigin = np.array(rotOrigin) maxShift = np.array(maxShift) shift = Uniform('shift', lower=-maxShift, upper=maxShift) xyError = Uniform('xyerror', lower=precision / 1000, upper=precision * 1000) @deterministic(plot=False) def coordResiduals(kd1=kd1, kd2=kd2, shift=shift): c1Xform, c2Xform = _transformCoords(kd1.data, kd2.data, shift, rotOrigin) d12, ind1near2 = kd1.query(c2Xform) d21, ind2near1 = kd2.query(c1Xform) msk1 = ind1near2[ind2near1] == np.arange(ind2near1.size) resid = c1Xform - kd2.data[ind2near1] resid[~msk1] = 0 return resid opt = Cauchy('data', value=np.zeros_like(kd1.data), alpha=coordResiduals, beta=xyError, observed=True) return [shift, xyError, coordResiduals, opt]
def model(prob): #observations obs = prob.get_observation_values() #priors variables = [] sig = Uniform('sig', 0.0, 100.0, value=1.) variables.append(sig) a1 = Uniform('a1', 0.0, 5.0) variables.append(a1) k1 = Uniform('k1', 0.01, 2.0) variables.append(k1) a2 = Uniform('a2', 0.0, 5.0) variables.append(a2) k2 = Uniform('k2', 0.01, 2.0) variables.append(k2) #model @deterministic() def response(pars=variables, prob=prob): values = [] for par in pars: values.append(par) values = array(values) prob.set_parameters(values) prob.forward() return prob.get_simvalues() #likelihood y = Normal('y', mu=response, tau=1.0 / sig**2, value=obs, observed=True) variables.append(y) return variables
def Model_twostage_fit_v2(n_TF, n_gene, p_gene_array, p_TF_gene_array, num_iter, num_burn, num_thin, prior_T, prior_T_method, r_TF_gene, a_TF_gene_h1, a_TF_gene_h0, a_gene): """ Assumptions: We allow learning of parameters """ a_gp = float(a_gene) if a_TF_gene_h0 == 'None': a_tg0 = Uniform('a_tg0', lower=0.5, upper=1) else: a_tg0 = float(a_TF_gene_h0) if a_TF_gene_h1 == 'None': a_tg1 = Uniform('a_tg1', lower=0, upper=0.5) else: a_tg1 = float(a_TF_gene_h1) p_T = float(prior_T) if r_TF_gene == 'None': r_tg = Uniform('r_tg', lower=0, upper=1) else: r_tg = float(r_TF_gene) p_gene = np.zeros(n_gene, dtype=object) #the ovserved variables T = np.zeros((n_TF, n_gene), dtype=object) #variables showing TF-gene-pheno relationship T_sum = np.zeros(n_gene, dtype=object) p_TF_gene = np.zeros((n_TF, n_gene), dtype=object) #p-value of correlation of gene TF for j in range(n_gene): for i in range(n_TF): T[i, j] = Bernoulli('T_%i_%i' %(i, j), p=p_T) #If T[i, j] = 0: then p_TF_gene is coming from a mixture of beta and uniform (r is the mixture param) @pymc.stochastic(name='p_TF_gene_%i_%i' %(i, j), dtype=float, observed=True) def temp_p_TF_gene(value=p_TF_gene_array[i, j], TF_gene_ind=T[i, j], a0=a_tg0, a1=a_tg1, r=r_tg) : if TF_gene_ind: out = pymc.distributions.beta_like(value, alpha=a1, beta=1) else: out = np.log(r * np.exp(pymc.distributions.beta_like(value, alpha=a1, beta=1)) + (1 - r) * np.exp(pymc.distributions.beta_like(value, alpha=a0, beta=1))) return out p_TF_gene[i, j] = temp_p_TF_gene #we define a deterministic function to find values of T @pymc.deterministic(name='T_sum_%i' %j, plot=False) def temp_T_sum(ind_vec=T[:,j]): return (np.sum(ind_vec)>0) T_sum[j] = temp_T_sum #If T_sum[j] == 0: then p_TF_gene is coming from a uniform; else, beta @pymc.stochastic(name='p_gene_%i' %j, dtype=float, observed=True) def temp_p_gene(value=p_gene_array[j], ind=T_sum[j], a=a_gp): if ind: out = pymc.distributions.beta_like(value, alpha=a, beta=1) else: out = pymc.distributions.uniform_like(value, 0, 1) return out p_gene[j] = temp_p_gene if a_gene == None and a_TF_gene_h0 == None and a_TF_gene_h1 == None: M5 = pymc.MCMC([T, T_sum, a_gp, a_tg0, a_tg1]) else: M5 = pymc.MCMC([T, T_sum]) M5.sample(iter=int(num_iter), burn=int(num_burn), thin=int(num_thin)) return(M5)
def createSignalModelExponential(data): """ Toy model that treats the first ~10% of the waveform as an exponential. Does a good job of finding the start time (t_0) Since I made this as a toy, its super brittle. Waveform must be normalized """ print "Creating model" switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data)) noise_sigma = HalfNormal('noise_sigma', tau=sigToTau(.01)) exp_sigma = HalfNormal('exp_sigma', tau=sigToTau(.05)) #Modeling these parameters this way is why wf needs to be normalized exp_rate = Uniform('exp_rate', lower=0, upper=.1) exp_scale = Uniform('exp_scale', lower=0, upper=.1) timestamp = np.arange(0, len(data), dtype=np.float) @deterministic(plot=False, name="test") def uncertainty_model(s=switchpoint, n=noise_sigma, e=exp_sigma): ''' Concatenate Poisson means ''' out = np.empty(len(data)) out[:s] = n out[s:] = e return out @deterministic def tau(eps=uncertainty_model): return np.power(eps, -2) ## @deterministic(plot=False, name="test2") ## def adjusted_scale(s=switchpoint, s1=exp_scale): ## out = np.empty(len(data)) ## out[:s] = s1 ## out[s:] = s1 ## return out # # scale_param = adjusted_scale(switchpoint, exp_scale) @deterministic(plot=False) def baseline_model(s=switchpoint, r=exp_rate, scale=exp_scale): out = np.zeros(len(data)) out[s:] = scale * (np.exp(r * (timestamp[s:] - s)) - 1.) # plt.figure(fig.number) # plt.clf() # plt.plot(out ,color="blue" ) # plt.plot(data ,color="red" ) # value = raw_input(' --> Press q to quit, any other key to continue\n') return out baseline_observed = Normal("baseline_observed", mu=baseline_model, tau=tau, value=data, observed=True) return locals()
def make_model(self, data): assert len(data) == 2, 'There must be exactly two data arrays' name1, name2 = sorted(data.keys()) y1 = np.array(data[name1]) y2 = np.array(data[name2]) assert y1.ndim == 1 assert y2.ndim == 1 y = np.concatenate((y1, y2)) mu_m = np.mean(y) mu_p = 0.000001 * 1 / np.std(y)**2 sigma_low = np.std(y) / 1000 sigma_high = np.std(y) * 1000 # the five prior distributions for the parameters in our model group1_mean = Normal('group1_mean', mu_m, mu_p) group2_mean = Normal('group2_mean', mu_m, mu_p) group1_std = Uniform('group1_std', sigma_low, sigma_high) group2_std = Uniform('group2_std', sigma_low, sigma_high) nu_minus_one = Exponential('nu_minus_one', 1 / 29) @deterministic(plot=False) def nu(n=nu_minus_one): out = n + 1 return out @deterministic(plot=False) def lam1(s=group1_std): out = 1 / s**2 return out @deterministic(plot=False) def lam2(s=group2_std): out = 1 / s**2 return out group1 = NoncentralT(name1, group1_mean, lam1, nu, value=y1, observed=True) group2 = NoncentralT(name2, group2_mean, lam2, nu, value=y2, observed=True) return Model({ 'group1': group1, 'group2': group2, 'group1_mean': group1_mean, 'group2_mean': group2_mean, 'group1_std': group1_std, 'group2_std': group2_std, })
def compute(var_LB, var_UB, num_samples=10): from pymc import Uniform, MCMC X = Uniform('X', var_LB, var_UB) mc = MCMC([X]) mc.sample(num_samples) #import matplotlib.pyplot as plt #plt.plot(X.trace()[:,0], X.trace()[:,1],',') #plt.show() return X.trace()
def toy_model(tau=10000, prior='Beta0.5'): b_obs = 200 f_AB = 400 f_CB = 1000 f_CA = 600 A = np.array([0, f_AB, f_CA, 0, f_CB, 0]) if prior == 'Normal': ABp = Normal('ABp', mu=0.5, tau=100, trace=True) CBp = Normal('CBp', mu=0.5, tau=100, trace=True) CAp = Normal('CAp', mu=0.5, tau=100, trace=True) elif prior == 'Uniform': ABp = Uniform('ABp', lower=0.0, upper=1.0, trace=True) CBp = Uniform('CBp', lower=0.0, upper=1.0, trace=True) CAp = Uniform('CAp', lower=0.0, upper=1.0, trace=True) elif prior == 'Beta0.25': ABp = Beta('ABp', alpha=0.25, beta=0.25, trace=True) CBp = Beta('CBp', alpha=0.25, beta=0.25, trace=True) CAp = Beta('CAp', alpha=0.25, beta=0.25, trace=True) elif prior == 'Beta0.5': ABp = Beta('ABp', alpha=0.5, beta=0.5, trace=True) CBp = Beta('CBp', alpha=0.5, beta=0.5, trace=True) CAp = Beta('CAp', alpha=0.5, beta=0.5, trace=True) elif prior == 'Beta2': ABp = Beta('ABp', alpha=2, beta=2, trace=True) CBp = Beta('CBp', alpha=2, beta=2, trace=True) CAp = Beta('CAp', alpha=2, beta=2, trace=True) elif prior == 'Gamma': ABp = Gamma('ABp', alpha=1, beta=0.5, trace=True) CBp = Gamma('CBp', alpha=1, beta=0.5, trace=True) CAp = Gamma('CAp', alpha=1, beta=0.5, trace=True) AB1 = ABp AB3 = 1 - ABp CB4 = CBp CB5 = 1 - CBp CA42 = CAp CA52 = 1 - CAp b = Normal('b', mu=f_AB * AB3 + f_CB * CB4 + f_CA * CA42, tau=tau, value=b_obs, observed=True, trace=True) # print [x.value for x in [ABp,CBp,CAp]] # print b.logp return locals()
def test_simple(self): # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s**-2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(10000, 5000, progress_bar=0) # Check length of value assert_equal(len(x.value), 100) # Check size of trace tr = M.trace('x')() assert_equal(shape(tr), (5000, 2)) sd2 = [-2 < i < 2 for i in ravel(tr)] # Check for standard normal output assert_almost_equal(sum(sd2) / 10000., 0.95, decimal=1)
def _model(data, robust=False): # priors might be adapted here to be less flat mu = Normal('mu', 0, 0.000001, size=2) sigma = Uniform('sigma', 0, 1000, size=2) rho = Uniform('r', -1, 1) # we have a further parameter (prior) for the robust case if robust == True: nu = Exponential('nu', 1 / 29., 1) # we model nu as an Exponential plus one @pymc.deterministic def nuplus(nu=nu): return nu + 1 @pymc.deterministic def precision(sigma=sigma, rho=rho): ss1 = float(sigma[0] * sigma[0]) ss2 = float(sigma[1] * sigma[1]) rss = float(rho * sigma[0] * sigma[1]) return inv(np.mat([[ss1, rss], [rss, ss2]])) if robust == True: # log-likelihood of multivariate t-distribution @pymc.stochastic(observed=True) def mult_t(value=data.T, mu=mu, tau=precision, nu=nuplus): k = float(tau.shape[0]) res = 0 for r in value: delta = r - mu enum1 = gammaln((nu + k) / 2.) + 0.5 * log(det(tau)) denom = (k / 2.) * log(nu * pi) + gammaln(nu / 2.) enum2 = (-(nu + k) / 2.) * log(1 + (1 / nu) * delta.dot(tau).dot(delta.T)) result = enum1 + enum2 - denom res += result[0] return res[0, 0] else: mult_n = MvNormal('mult_n', mu=mu, tau=precision, value=data.T, observed=True) return locals()
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ for group in ['control', 'variant']: self.stochastics[group + '_p'] = Uniform(group + '_p', 0, 1)
def CreateBGModel(data, ene, flat, tritSpec, ge68Peak, fe55Peak, zn65Peak, flatSpec): # flat = Container([]) # flatSpec = Container([flat.append(1.0) for i in xrange(len(ene))]) # print type(flatSpec) # flat = [] # for i in xrange(len(ene)): # flat.append(1.0) # flatSpec = np.asarray(flat) # stochastic variables (not compleletely detmined by parent values; they have a prob. distribution.) # need to float resolution, energy, and offset for each peak ... tritScale = Uniform('tritScale', lower=0, upper=10) flatScale = Uniform('flatScale', lower=0, upper=10) fe55Scale = Uniform('fe55Scale', lower=0, upper=10) zn65Scale = Uniform('zn65Scale', lower=0, upper=10) ge68Scale = Uniform('ge68Scale', lower=0, upper=10) # deterministic variables : given by values of parents # set up the model for uncertainty (ie, the noise) and the signal (ie, the spectrum) @deterministic(plot=False, name="uncertainty") def uncertainty_model(s=tritScale): out = s * np.ones(len(data)) return out @deterministic def tau(eps=uncertainty_model): # pymc uses this tau parameter instead of sigma to model a gaussian. its annoying. return np.power(eps, -2) @deterministic(plot=False, name="BGModel") def signal_model(t=tritScale, f=flatScale, fe=fe55Scale, zn=zn65Scale, ge=ge68Scale): theModel = t * tritSpec + fe * fe55Peak + zn * zn65Peak + ge * ge68Peak return theModel # full model baseline_observed = Normal("baseline_observed", mu=signal_model, tau=tau, value=data, observed=True) return locals()
def __init__(self, name, role=None, groups=[], lam=None, env=None): super(PoissonStudent, self).__init__(name, role, groups, env) if lam is not None: self.lam = lam else: self.lam = Uniform('lam_%s' % self.name, lower=0, upper=1) self.expT = Exponential('tau_%s' % self.name, self.lam) self.dt = [] self.timestamps = [] self.t = 0 self.params = [self.lam, self.expT]
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ obs = np.concatenate((self.control, self.variant)) obs_mean = np.mean(obs) for group in ['control', 'variant']: self.stochastics[group + '_mu'] = Uniform(group + '_mu', 0, 100 * obs_mean)
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ obs = np.concatenate((self.control, self.variant)) obs_mean, obs_sigma = np.mean(obs), np.std(obs) for group in ['control', 'variant']: self.stochastics[group + '_mean'] = Normal(group + '_mean', obs_mean, 0.000001 / obs_sigma ** 2) self.stochastics[group + '_sigma'] = Uniform(group + '_sigma', obs_sigma / 1000, obs_sigma * 1000)
def test_find_MAP(): tol = 2.0**-11 # 16 bit machine epsilon, a low bar data = np.random.randn(100) # data should be roughly mean 0, std 1, but let's # normalize anyway to get it really close data = (data - np.mean(data)) / np.std(data) with Model(): mu = Uniform("mu", -1, 1) sigma = Uniform("sigma", 0.5, 1.5) Normal("y", mu=mu, tau=sigma**-2, observed=data) # Test gradient minimization map_est1 = starting.find_MAP(progressbar=False) # Test non-gradient minimization map_est2 = starting.find_MAP(progressbar=False, method="Powell") close_to(map_est1["mu"], 0, tol) close_to(map_est1["sigma"], 1, tol) close_to(map_est2["mu"], 0, tol) close_to(map_est2["sigma"], 1, tol)
def test_find_MAP(): tol = 2.0**-11 # 16 bit machine epsilon, a low bar data = np.random.randn(100) # data should be roughly mean 0, std 1, but let's # normalize anyway to get it really close data = (data - np.mean(data)) / np.std(data) with Model() as model: mu = Uniform('mu', -1, 1) sigma = Uniform('sigma', .5, 1.5) y = Normal('y', mu=mu, tau=sigma**-2, observed=data) # Test gradient minimization map_est1 = starting.find_MAP() # Test non-gradient minimization map_est2 = starting.find_MAP(fmin=starting.optimize.fmin_powell) close_to(map_est1['mu'], 0, tol) close_to(map_est1['sigma'], 1, tol) close_to(map_est2['mu'], 0, tol) close_to(map_est2['sigma'], 1, tol)
def __model__(self, RAA_x, RAA_y, RAA_xerr, RAA_yerr): '''RAA model for pymc ''' a = Uniform('a', lower=0, upper=10) b = Uniform('b', lower=0, upper=10) c = Uniform('c', lower=0, upper=1) @deterministic(plot=False) def mu(a=a, b=b, c=c): '''compute the RAA from convolution: int_d(Delta_pt) P(Delta_pT) * sigma(pt+Delta_pt) / sigma(pt) ''' intg_res = np.zeros_like(RAA_x) for i, x in enumerate(gala30x): # integral DeltaPt from 0 to infinity scale_fct = RAA_x / gala30x[-1] x = x * scale_fct shifted_pt = RAA_x + x mean_dpt = self.mean_ptloss(shifted_pt, b, c) alpha = a beta = a / mean_dpt pdpt = gamma_dist(x, alpha, beta) if self.with_ppdata: intg_res += scale_fct * gala30w[i] * pdpt * self.pp_fit( shifted_pt) else: intg_res += scale_fct * gala30w[i] * pdpt if self.with_ppdata: return intg_res / self.pp_fit(RAA_x) else: return intg_res likelihood_y = Normal('likelihood_y', mu=mu, tau=1 / (RAA_yerr)**2, observed=True, value=RAA_y) return locals()
def set_priors(self): """set parameters prior distributions. Hardcoded behavior for now, with non committing prior knowledge. :return: None """ obs = np.concatenate((self.control, self.variant)) mean, sigma, med = np.mean(obs), np.std(obs), np.median(obs) location = np.log(med) scale = np.sqrt(2 * np.log(mean / med)) for group in ['control', 'variant']: self.stochastics[group + '_location'] = Normal(group + '_location', location, 0.000001 / sigma ** 2) self.stochastics[group + '_scale'] = Uniform(group + '_scale', scale / 1000, scale * 1000)
def test_fit(self): statement = { 'actor': u'123456789-1234-1234-1234-12345678901234', 'verb': u'http://adlnet.gov/expapi/verbs/completed', 'timestamp': 1519862425.0 } user_name = '123456789-1234-1234-1234-12345678901234' lam = Uniform('lam', lower=0, upper=1) s1 = PoissonStudent(user_name, lam=lam) env = Environment([s1], [statement]) env.add_agent(s1) res = env.fit([lam], method='mcmc') self.assertIsInstance(res, MCMC, msg="The output of fit is not an PYMC instance")
def main2(): data = load_file("statements-brneac3-20180301-20180531.json") statements = load_statements(data) user_name = "2890ebd9-1147-4f16-8a65-b7239bd54bd0" user_name = "2890ebd9-1147-4f16-8a65-b7239bd54bd0" lam = Uniform('lam', lower=0, upper=1) s1 = PoissonStudent(user_name, lam=lam) ## Creating environment and fitting data env = Environment([s1], statements) env.add_student(s1) res = env.fit([lam], method='mcmc') print(res) ## plotting. can be integrated in env hist(res.trace('lambda_{}'.format(user_name))[:]) show()
def main(): s1 = PoissonStudent("arnaud", 1) s2 = PoissonStudent("francois", 1) s3 = PoissonStudent("david", 0.5) students = [s1, s2, s3] env = Environment(students) statements = env.simulate(1000, verbose=True) student_names = set(s['actor'] for s in statements) lam = Uniform('lam', lower=0, upper=1) students = [PoissonStudent(name=name, lam=lam) for name in student_names] env = Environment(students, statements) params = [lam] for s in students: params.extend(s.params) m = MCMC(params) m.sample(iter=10000, burn=1000, thin=10) hist(m.trace('lambda_david')[:]) show()
def test_non_missing(self): """ Test to ensure that masks without any missing values are not imputed. """ fake_data = rnormal(0, 1, size=10) m = ma.masked_array(fake_data, fake_data == -999) # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s**-2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(20000, 19000, progress_bar=0) # Ensure likelihood does not have a trace assert_raises(AttributeError, x.__getattribute__, 'trace')
def generate_model(subset=CONCURRENT): """Generate a model for a given subset of data""" data = get_data(subset) obs_indiv = data["obs_indiv"] obs_summ = data["obs_summ"] # ========================= # = Individual-level data = # ========================= if subset is HISTORICAL: # Impute Phe for individuals with only range given phe_isnan = np.isnan(obs_indiv['phe']) phe_low_missing = obs_indiv['phe_low'][phe_isnan] phe_high_missing = obs_indiv['phe_high'][phe_isnan] phe_missing = Uniform('phe_missing', lower=phe_low_missing, upper=phe_high_missing, plot=False) else: phe_missing = None # Study unit random effect for intercept # Mean intercept mu_int = Normal('mu_int', mu=100, tau=0.01, value=100) # SD of intercepts sigma_int = Uniform('sigma_int', lower=0, upper=100, value=10.) tau_int = Lambda('tau_int', lambda s=sigma_int: s**-2) # Intecepts by study beta0 = Normal('beta0', mu=mu_int, tau=tau_int, value=np.zeros(len(data["unique_papers"]))) # Study unit random effect for slope # Mean slope mu_slope = Normal('mu_slope', mu=0, tau=0.1, value=-0.01) # SD of slopes sigma_slope = Uniform('sigma_slope', lower=0, upper=10, value=1.) tau_slope = Lambda('tau_slope', lambda s=sigma_slope: s**-2) # Random slopes by study alpha0 = Normal('alpha0', mu=mu_slope, tau=tau_slope, value=np.zeros(len(data["unique_papers"]))) alpha1 = Normal('alpha1', mu=0, tau=0.01, value=0.0) @deterministic def beta1_indiv(a0=alpha0, a1=alpha1, crit=obs_indiv['critical_period']): """Calculate Phe effect (slope) for each individual""" return a0[data["paper_id_indiv"]] + a1 * crit @deterministic def mu_iq(b0=beta0, b1=beta1_indiv, phe=obs_indiv['phe'], phe_m=phe_missing): """Expected IQ""" if subset is HISTORICAL: # Insert values for missing phe phe[phe_isnan] = phe_m return b0[data["paper_id_indiv"]] + b1 * phe # Process noise (variance of observations about predicted mean) sigma_iq = Uniform('sigma_iq', lower=0, upper=100, value=1) tau_iq = Lambda('tau_iq', lambda s=sigma_iq: s**-2) # Data likelihood iq_like = Normal('iq_like', mu=mu_iq, tau=tau_iq, value=obs_indiv['iq'], observed=True) @deterministic def iq_pred(mu=mu_iq, tau=tau_iq): """Simulated data for posterior predictive checks""" return rnormal(mu, tau, size=len(obs_indiv['iq'])) # ==================================================== # = Infer slope from correlations of summarized data = # ==================================================== # Means and stdevs of phe and IQ stdev_phe = obs_summ['phe_sd'] stdev_iq = obs_summ['iq_sd'] mean_phe = obs_summ['phe'] mean_iq = obs_summ['iq'] @deterministic(cache_depth=0) def beta1_summ(a0=alpha0, a1=alpha1, crit=obs_summ['critical_period']): """Calculate mean of slope for summarized data""" return a0[data["paper_id_summ"]] + a1 * crit @potential def r_like(b1=beta1_summ, n=obs_summ['n']): """Likelihood for correlation coefficients of summarized data""" # Convert slope to r rho = b1 * stdev_phe / stdev_iq # Fisher transformation to allow for normality assumption eps = np.arctan(rho) - np.arctan(obs_summ['correlation']) # Difference should be mean-zero return normal_like(eps, mu=np.zeros(len(n)), tau=n - 3) # Calculate probabilites of IQ<85 # Generate combinations of predictors crit_pred, phe_pred = np.transpose([[crit, phe] for crit in [0, 1] for phe in np.arange(200, 3200, 200)]) @deterministic(cache_depth=0) def pred(a1=alpha1, mu_int=mu_int, tau_int=tau_int, mu_slope=mu_slope, tau_slope=tau_slope, tau_iq=tau_iq, values=(70, 75, 80, 85)): """Estimate the probability of IQ<85 for different covariate values""" b0 = rnormal(mu_int, tau_int, size=len(phe_pred)) a0 = rnormal(mu_slope, tau_slope, size=len(phe_pred)) b1 = a0 + a1 * crit_pred iq = rnormal(b0 + b1 * phe_pred, tau_iq) return [iq < v for v in values] return locals()
@stochastic def theta(value=array([2., 5.])): """Slope and intercept parameters for a straight line. The likelihood corresponds to the prior probability of the parameters.""" slope, intercept = value prob_intercept = uniform_like(intercept, -10, 10) prob_slope = log(1. / cos(slope)**2) return prob_intercept + prob_slope init_x = data_x.clip(min=0, max=50) # Inferred true inputs. x = Uniform('x', lower=0, upper=50, value=init_x) @deterministic def modelled_y(x=x, theta=theta): """Return y computed from the straight line model, given the inferred true inputs and the model paramters.""" slope, intercept = theta return slope * x + intercept """ Input error model. Define the probability of measuring x knowing the true value. """
Zero-inflated Poisson example using simulated data. """ import numpy as np from pymc import Uniform, Beta, observed, rpoisson, poisson_like # True parameter values mu_true = 5 psi_true = 0.75 n = 100 # Simulate some data data = np.array( [rpoisson(mu_true) * (np.random.random() < psi_true) for i in range(n)]) # Uniorm prior on Poisson mean mu = Uniform('mu', 0, 20) # Beta prior on psi psi = Beta('psi', alpha=1, beta=1) @observed(dtype=int, plot=False) def zip(value=data, mu=mu, psi=psi): """ Zero-inflated Poisson likelihood """ # Initialize likeihood like = 0.0 # Loop over data for x in value: if not x:
# coin_model.py # Binomial model for coin tossing import numpy as np from pymc import Uniform, Cauchy, deterministic from bayes import gen_lighthouse_data # set true values of parameters alpha_true = 1.0 beta_true = 1.5 # set dataset size nflashes = 10000 # gather observed data from true parameters x_obs = gen_lighthouse_data(nflashes, alpha_true, beta_true) # parameter prior: distance along shore alpha = Uniform("alpha", lower = 0.0, upper = 2.0) #alpha = Normal("alpha", mu = 1.5, tau = (1 / (0.3)**2)) #alpha = Normal("alpha", mu = 1.5, tau = (1 / (0.1)**2)) # parameter prior: distance from shore beta = Uniform("beta", lower = 0.0, upper = 2.0) #beta = Normal("beta", mu = 1.5, tau = (1 / (0.3)**2)) #beta = Normal("beta", mu = 1.5, tau = (1 / (0.1)**2)) # model: flash arrival points x = Cauchy("x", alpha = alpha, beta = beta, value = x_obs, observed = True)
ysim = odesolve(model, tspan) # Add error to the underlying data seed = 2 random = np.random.RandomState(seed) sigma = 0.1 ydata = ysim['A_obs'] * (random.randn(len(ysim['A_obs'])) * sigma + 1) solver = Solver(model, tspan) # The prior distribution for our rate parameter, a stochastic variable # with either: # - a uniform distribution between 0 and 1, or # - a lognormal distribution centered at 0.1, with a variance of 3 # log10 units on either side k = Uniform('k', lower=0, upper=1) #k = Lognormal('k', mu=np.log(1e-1), tau=(1/(np.log(10)*np.log(1e3)))) # Our "model" is a deterministic random variable that is determined by the # value of k that is passed in as an argument @deterministic(plot=False) def decay_model(k=k): # The solver object needs all of the parameters, including the initial # condition parameter A_0 that is not being fit; the final parameter # value of 1.0 is the __source_0 parameter which is required because # we are using a degradation rule. # NOTE: Make sure the parameter values passed to the solver are in # the right order! solver.run(np.array([A_0.value, k, 1.0])) y = solver.yobs['A_obs']
def random(theta2, y, rho): mean = y[0] + rho * (theta2 - y[1]) var = 1. - rho ^ 2 return rnormal(value, mean, 1. / var) @stoch def theta2(value, theta1, y, rho): """Conditional probability p(theta2|theta1, y, rho)""" def logp(value, theta2, y, rho): mean = y[1] + rho * (theta1 - y[0]) var = 1. - rho ^ 2 return normal_like(value, mean, 1. / var) def random(theta2, y, rho): mean = y[0] + rho * (theta2 - y[1]) var = 1. - rho ^ 2 return rnormal(value, mean, 1. / var) rho = Uniform('rho', rseed=True, lower=0, upper=1) @data def y(value=(3, 6)): return 0 G = GibbsSampler([theta1, theta2, rho])
import numpy as np xs = np.array([2,3,4,5,6,7]) ys = np.array([1.5, 1.8, 1.9, 2.3, 2.5, 2.8]) data = load('data/bran_body_weight.txt', 33) converted_data = [(float(x[0]), float(x[1]), float(x[2])) for x in data] xs = [x[1] for x in converted_data] ys = [x[2] for x in converted_data] b0 = Normal("b0", 0, 0.0003) b1 = Normal("b1", 0, 0.0003) err = Uniform("err", 0, 500) x_weight = Normal("weight", 0, 1, value=xs, observed=True) @deterministic(plot=False) def pred(b0=b0, b1=b1, x=x_weight): return b0 + b1*x y = Normal("y", mu=pred, tau=err, value=ys, observed=True) model = Model([pred, b0, b1, y, err, x_weight]) m = MCMC(model) m.sample(burn=2000, iter=10000) bb0 = sum(m.trace('b0')[:])/len(m.trace('b0')[:])
""" from pymc import rweibull, Uniform, Weibull """ First, we will create a fake data set using some fixed parameters. In real life, of course, you already have the data ! """ alpha = 3 beta = 5 N = 100 dataset = rweibull(alpha, beta, N) """ Now we create a pymc model that defines the likelihood of the data set and prior assumptions about the value of the parameters. """ a = Uniform('a', lower=0, upper=10, value=5, doc='Weibull alpha parameter') b = Uniform('b', lower=0, upper=10, value=5, doc='Weibull beta parameter') like = Weibull('like', alpha=a, beta=b, value=dataset, observed=True) pred = Weibull('like', alpha=a, beta=b, value=dataset) if __name__ == '__main__': from pymc import MCMC, Matplot # Sample the parameters a and b and analyze the results M = MCMC([a, b, like]) M.sample(10000, 5000) Matplot.plot(M)
def create_model(): all_vars = [] sigma_qd = Uniform('sigma_qd', 0,0.15) sigma_qd.value = 0.05 mu_sc = Uniform('mu_sc', 0,1) mu_sc.value=0.5 sigma_sc = Uniform('sigma_sc', 0, 0.2) sigma_sc.value = 0.1 sigma_bias = Uniform('sigma_bias', 0,0.1) # sigma_bias.value = 0.01 sigma_student_handin_capabilities = Uniform('sigma_student_handin_capabilities', 0,0.2) sigma_student_handin_capabilities.value = 0.05 sigma_student_question_capabilities = Uniform('sigma_student_question_capabilities', 0,0.2) sigma_student_question_capabilities.value = 0.05 all_vars.append(sigma_qd) all_vars.append(mu_sc) all_vars.append(sigma_sc) all_vars.append(sigma_bias) all_vars.append(sigma_student_handin_capabilities) all_vars.append(sigma_student_question_capabilities) for i in xrange(num_assignments): questions = [] for j in xrange(num_questions_pr_handin): difficulty = Normal('difficulty_q_%i_%i'% (i,j), 0, pymc.Lambda('tau_%i_%i'% (i,j), lambda a=sigma_qd: 1/ (sigma_qd.value*sigma_qd.value))) q = Question(difficulty) questions.append(q) all_vars.append(difficulty) assignment = Assignment(questions) assignments.append(assignment) for i in xrange(num_students): # student_capabilities = Normal('student_capabilities_s_%i'%i, mu_sc, 1/sigma_sc) # student_capabilities = TruncNormal('student_capabilities_s_%i'%i, 0, 1, mu_sc, 1/sigma_sc) tau = pymc.Lambda('tau1_%i'%i, lambda a=sigma_sc: 1/(sigma_sc.value*sigma_sc.value)) student_capabilities = TruncNormal('student_capabilities_s_%i'%i, 0, 1, mu_sc, tau) # grading_bias = Normal('grading_bias_s_%i'%i, 0, 1/sigma_bias) tau = pymc.Lambda('tau2_%i'%i, lambda a=sigma_bias: 1/ (sigma_bias.value*sigma_bias.value)) grading_bias = Normal('grading_bias_s_%i'%i, 0, tau) all_vars.append(student_capabilities) all_vars.append(grading_bias) s = Student(student_capabilities, grading_bias) students.append(s) for j, assignment in enumerate(assignments): # student_handin_capabilities = Normal('student_handin_capabilities_sh_%i_%i' % (i,j), 0, 1/sigma_student_handin_capabilities) tau = pymc.Lambda('tau2_%i_i%i'%(i, j), lambda a=sigma_student_handin_capabilities: 1/ (sigma_student_handin_capabilities.value*sigma_student_handin_capabilities.value)) student_handin_capabilities = Normal('student_handin_capabilities_sh_%i_%i' % (i,j), 0, tau) all_vars.append(student_handin_capabilities) question_capabilities = [] for k, q in enumerate(assignment.questions): tau = pymc.Lambda('tau2_%i_i%i_%i'%(i, j,k), lambda a=sigma_student_question_capabilities: 1/ (sigma_student_question_capabilities.value*sigma_student_question_capabilities.value)) student_question_capabilities = Normal('student_question_capabilities_shq_%i_%i_%i' % (i,j,k ), 0, tau) all_vars.append(student_question_capabilities) question_capabilities.append(student_question_capabilities) handins.append(Handin(s, assignment, student_handin_capabilities, question_capabilities)) # assign grader all_grades = [] for handin in handins: potential_graders = range(0, len(students)) potential_graders.remove(students.index(handin.student)) idx = np.random.randint(0, len(potential_graders), num_graders_pr_handin) graders = [students[i] for i in idx] grades = handin.grade(graders) all_grades.append(grades) grade_list = sum(sum(all_grades, []),[]) grade_list_real = [g.value for g in grade_list] # zip([str(g) for g in grade_list[0].extended_parents], [g.value for g in grade_list[0].extended_parents]) print len([f for f in grade_list_real if f >1 or f < 0]) grade_list_real = [min(max((g), 0), 1) for g in grade_list_real] sigma_exo_grade = Uniform('mu_exo_grade', 0,0.2) tau = pymc.Lambda('tau2_%i_i%i_%i'%(i, j,k), lambda a=sigma_exo_grade: 1/ (sigma_exo_grade.value*sigma_exo_grade.value)) all_vars.append(sigma_exo_grade) print "Creating grade list" # grade_list = [Normal('grade_%i'%i, g, 1/mu_exo_grade, value=g_real, observed=True) for i, (g_real, g) in enumerate(zip(grade_list_real, grade_list))] grade_list = [TruncNormal('grade_%i'%i, 0, 1, g, tau, value=g_real, observed=True) for i, (g_real, g) in enumerate(zip(grade_list_real, grade_list))] print "Grade list created" # grade_list = [] # grade_list = [pymc.ScipyDistributions.cdists.truncnorm(a=a_truncnorm, b=b_truncnorm, loc=g, scale=1/mu_exo_grade, value=g_real, observed=True) for ] # for g_real, g in zip(grade_list_real, grade_list): # stok = pymc.stochastic_from_dist('grade', lambda x: pymc.ScipyDistributions.truncated_normal_like(x, a=a_truncnorm, b=b_truncnorm, mu=g, tau=1/mu_exo_grade)) all_vars += grade_list all_vars = list(set(all_vars)) print len(all_vars) # print [str(v) for v in all_vars] return locals(), grade_list_real, all_vars