def sample_bandits(self, n=1): bb_score = np.zeros(n) choices = np.zeros(n) P0_samples = [np.random.rand()] P1_samples = [np.random.rand()] P2_samples = [np.random.rand()] for k in range(n): # sample from the bandits's priors, and select the largest sample choice = np.argmax([np.random.choice(P0_samples), np.random.choice(P1_samples), np.random.choice(P2_samples)]) print(); print(); print(k, choice); # sample the chosen bandit result = self.bandits.pull(choice) # update priors and score self.wins[choice] += result self.trials[choice] += 1 bb_score[k] = result self.N += 1 choices[k] = choice if choice == 0: P0 = pm.Uniform('P0', 0, 1) X0 = pm.Binomial('X0', value = self.wins[0], n = self.trials[0], p = P0, observed = True) mcmc0 = pm.MCMC([P0, X0]) mcmc0.sample(15000, 5000) P0_samples = mcmc0.trace('P0')[:] elif choice == 1: P1 = pm.Uniform('P1', 0, 1) X1 = pm.Binomial('X1', value = self.wins[1], n = self.trials[1], p = P1, observed = True) mcmc1 = pm.MCMC([P1, X1]) mcmc1.sample(15000, 5000) P1_samples = mcmc1.trace('P1')[:] else: P2 = pm.Uniform('P2', 0, 1) X2 = pm.Binomial('X2', value = self.wins[2], n = self.trials[2], p = P2, observed = True) mcmc2 = pm.MCMC([P2, X2]) mcmc2.sample(15000, 5000) P2_samples = mcmc2.trace('P2')[:] self.bb_score = np.r_[self.bb_score, bb_score] self.choices = np.r_[self.choices, choices] return
def binomial_model(): n_samples = 100 xs = intX(np.random.binomial(n=1, p=0.2, size=n_samples)) with pm.Model() as model: p = pm.Beta("p", alpha=1, beta=1) pm.Binomial("xs", n=1, p=p, observed=xs) return model
def test_model_02(x): # https://github.com/pymc-devs/pymc # Import relevant modules import pymc import numpy as np # Some data n = 5 * np.ones(4, dtype=int) #x = np.array([-.86, -.3, -.05, .73]) # Priors on unknown parameters alpha = pymc.Normal('alpha', mu=0, tau=.01) beta = pymc.Normal('beta', mu=0, tau=.01) # Arbitrary deterministic function of parameters @pymc.deterministic def theta(a=alpha, b=beta): """theta = logit^{-1}(a+b)""" return pymc.invlogit(a + b * x) # Binomial likelihood for data d = pymc.Binomial('d', n=n, p=theta, value=np.array([0., 1., 3., 5.]), observed=True) return locals()
def posterior_upvote_ratio(upvotes, downvotes, samples = 20000): p = pm.Uniform("p", 0, 1, value = 0.5) n = upvotes + downvotes obs_upvotes = pm.Binomial("obs", n, p, value = upvotes, observed = True) model = pm.Model([p, obs_upvotes]) mcmc = pm.MCMC(model) mcmc.sample(samples) return mcmc.trace("p")[:]
def test_layers(self): with pm.Model(rng_seeder=232093) as model: a = pm.Uniform("a", lower=0, upper=1, size=10) b = pm.Binomial("b", n=1, p=a, size=10) b_sampler = compile_pymc([], b, mode="FAST_RUN") avg = np.stack([b_sampler() for i in range(10000)]).mean(0) npt.assert_array_almost_equal(avg, 0.5 * np.ones((10,)), decimal=2)
def test_single_observation(self): with pm.Model(): p = pm.Uniform("p", 0, 1) pm.Binomial("w", p=p, n=2, observed=1) inference_data = pm.sample(500, chains=2, return_inferencedata=True) assert inference_data assert inference_data.log_likelihood["w"].shape == (2, 500, 1)
def indep_samples(x1,n1,x2,n2): class experimental_data(object): def __init__(self,x1,n1,x2,n2): self.data1 = np.hstack( (np.ones((x1,)) , np.zeros((n1-x1,))) ) self.data2 = np.hstack( (np.ones((x2,)) , np.zeros((n2-x2,))) ) ### for testing purposes #example = True data = experimental_data(x1,n1,x2,n2) #if example: # example_data1 = np.hstack( (np.ones((15,)) , np.zeros((20,))) ) # example_data2 = np.hstack( (np.ones((16,)) , np.zeros((23,))) ) # sim_data_size = 14 # data1 = example_data1 # data2 = example_data2 p1_val = np.mean(data.data1) p2_val = np.mean(data.data2) ind_val = p1_val+p2_val-p1_val*p2_val print "P1 = " + str(p1_val) print "P2 = " + str(p2_val) print "Independence = " + str(ind_val) p1 = pymc.Beta('p1',alpha=0.5,beta=0.5) p2 = pymc.Beta('p2',alpha=0.5,beta=0.5) x1 = pymc.Binomial('x',n=len(data.data1),p=p1,value=np.sum(data.data1),observed=True) x2 = pymc.Binomial('x',n=len(data.data2),p=p2,value=np.sum(data.data2),observed=True) @pymc.deterministic def ind_assump(p1=p1,p2=p2): return p1+p2-p1*p2 return locals() #@pymc.deterministic #def sim(): # sim_data = pymc.Binomial('sim',n=sim_data_size, p=ind_assump) # return sim_data
def get_Models(): #Full Model (Beta & Binomial) nN, nA, nB = 3, 5, 1 aD = [0, 3, 1] Beta = pm.Beta('Beta', alpha=nA, beta=nB) # @UndefinedVariable BinomD = [ pm.Binomial('BinomD_' + str(i), n=nN, p=Beta, observed=True, value=aD[i]) for i in range(len(aD)) ] # @UndefinedVariable @UnusedVariable BinomQ = pm.Binomial('BinomQ', n=nN, p=Beta) # @UndefinedVariable #Collapsed Model (Binomial) nA2 = nA + sum(aD) nB2 = nB + nN * len(aD) - sum(aD) BetaBinQ = pm.Betabin('BetaBinQ', n=nN, alpha=nA2, beta=nB2) # @UndefinedVariable return np.concatenate([[Beta, BinomQ, BetaBinQ], BinomD])
def posterior_upvote_ratio(upvotes, downvotes, samples=20000): """ This function accepts the number of upvotes and downvotes a particular comment received, and the number of posterior samples to return to the user. Assumes a uniform prior. """ N = upvotes + downvotes upvote_ratio = pm.Uniform("upvote_ratio", 0, 1) observations = pm.Binomial("obs", N, upvote_ratio, value=upvotes, observed=True) # do the fitting; first do a MAP as it is cheap and useful. map_ = pm.MAP([upvote_ratio, observations]).fit() mcmc = pm.MCMC([upvote_ratio, observations]) mcmc.sample(samples, samples / 4) return mcmc.trace("upvote_ratio")[:]
def main(): N = 100 p = pm.Uniform("freq_cheating", 0, 1) true_answers = pm.Bernoulli("truths", p, size=N) first_coin_flips = pm.Bernoulli("first_flips", 0.5, size=N) second_coin_flips = pm.Bernoulli("second_flips", 0.5, size=N) @pm.deterministic def observed_proportion(t_a=true_answers, fc=first_coin_flips, sc=second_coin_flips): result = t_a & fc | ~fc & sc return float(sum(result)) / len(result) X = 35 observations = pm.Binomial("obs", N, observed_proportion, value=X, observed=True) model = pm.Model([ p, true_answers, first_coin_flips, second_coin_flips, observed_proportion, observations ]) # To be explained in Chapter 3! mcmc = pm.MCMC(model) mcmc.sample(40000, 15000) figsize(12.5, 3) p_trace = mcmc.trace("freq_cheating")[:] plt.hist(p_trace, histtype="stepfilled", normed=True, alpha=0.85, bins=30, label="posterior distribution", color="#348ABD") plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3) plt.xlim(0, 1) plt.legend() plt.show()
def test_transformed(self): n = 18 at_bats = 45 * np.ones(n, dtype=int) hits = np.random.randint(1, 40, size=n, dtype=int) draws = 50 with pm.Model() as model: phi = pm.Beta("phi", alpha=1.0, beta=1.0) kappa_log = pm.Exponential("logkappa", lam=5.0) kappa = pm.Deterministic("kappa", at.exp(kappa_log)) thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, size=n) y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) assert gen.prior["phi"].shape == (1, draws) assert gen.prior_predictive["y"].shape == (1, draws, n) assert "thetas" in gen.prior.data_vars
def mymodel(): # Some data n = 5 * np.ones(4, dtype=int) x = np.array([-.86, -.3, -.05, .73]) # Priors on unknown parameters alpha = pymc.Normal('alpha', mu=0, tau=.01) beta = pymc.Normal('beta', mu=0, tau=.01) # Arbitrary deterministic function of parameters @pymc.deterministic def theta(a=alpha, b=beta): """theta = logit^{-1}(a+b)""" return pymc.invlogit(a + b * x) # Binomial likelihood for data d = pymc.Binomial('d', n=n, p=theta, value=np.array([0., 1., 3., 5.]), observed=True)
def test_duplicate_vars(): with pytest.raises(ValueError) as err: with pm.Model(): pm.Normal("a") pm.Normal("a") err.match("already exists") with pytest.raises(ValueError) as err: with pm.Model(): pm.Normal("a") pm.Normal("a", transform=transforms.log) err.match("already exists") with pytest.raises(ValueError) as err: with pm.Model(): a = pm.Normal("a") pm.Potential("a", a**2) err.match("already exists") with pytest.raises(ValueError) as err: with pm.Model(): pm.Binomial("a", 10, 0.5) pm.Normal("a", transform=transforms.log) err.match("already exists")
# Initialize constants N = 100 # 100 of students p = pm.Uniform("freq_cheating", 0, 1) # The freq I want #----------------------------------- # Modeling true_answer = pm.Bernoulli("truths", p, size=N) first_coin_flips = pm.Bernoulli("firtst_flips", 0.5, size=N) second_coin_flips = pm.Bernoulli("second_flips", 0.5, size=N) @pm.deterministic def p_skewed(p=p): return 0.5 * p + 0.25 yes_responses = pm.Binomial("number_cheaters", 100, p_skewed, value=35, observed=True) print("{0} : {1}".format(yes_responses, yes_responses.value)) model = pm.Model([yes_responses, p_skewed, p]) # to be explain in Chapter 3 mcmc = pm.MCMC(model) mcmc.sample(25000, 2500) #----------------------------------- # plot the answer figsize(12.5, 3) p_trace = mcmc.trace("freq_cheating")[:] plt.hist(p_trace, histtype="stepfilled", normed=True, alpha=0.85,
def make_model(cls): with pm.Model() as model: p = pm.Beta("p", [0.5, 0.5, 1.0], [0.5, 0.5, 1.0], size=3) pm.Binomial("y", p=p, n=[4, 12, 9], observed=[1, 2, 9]) return model
# Import relevant modules import pymc import numpy as np # Some data n = 5 * np.ones(4, dtype=int) x = np.array([-.86, -.3, -.05, .73]) # Priors on unknown parameters alpha = pymc.Normal('alpha', mu=0, tau=.01) beta = pymc.Normal('beta', mu=0, tau=.01) # Arbitrary deterministic function of parameters @pymc.deterministic def theta(a=alpha, b=beta): """theta = logit^{-1}(a+b)""" return pymc.invlogit(a + b * x) # Binomial likelihood for data d = pymc.Binomial('d', n=n, p=theta, value=np.array([0.,1.,3.,5.]),\ observed=True)
# Simple dose-response model n = [5]*4 dose = [-.86,-.3,-.05,.73] x = [0,1,3,5] alpha = pm.Normal('alpha', mu=0.0, tau=0.01) beta = pm.Normal('beta', mu=0.0, tau=0.01) @pm.deterministic def theta(a=alpha, b=beta, d=dose): """theta = inv_logit(a+b)""" return pm.invlogit(a+b*d) """deaths ~ binomial(n, p)""" deaths = pm.Binomial('deaths', n=n, p=theta, value=x, observed=True) my_model = [alpha, beta, theta, deaths] # Instantiate and run sampler S = pm.MCMC(my_model) S.sample(10000, burn=5000) # Calculate and plot Geweke scores scores = pm.geweke(S, intervals=20) pm.Matplot.geweke_plot(scores) # Geweke plot for a single parameter trace = S.trace('alpha')[:] alpha_scores = pm.geweke(trace, intervals=20) pm.Matplot.geweke_plot(alpha_scores, 'alpha')
def make_model(N,k,X,backend,manifold): """ A standard spatial logistic regression. - N: Number sampled at each location - k: Number positive at each location - X: x,y,z coords of each location - Backend: The linear algebra backend. So far, this has to be 'cholmod'. - manifold: The manifold to work on. So far, this has to be 'spherical'. """ # Make the Delaunay triangulation. neighbors, triangles, trimap, b = manifold.triangulate_sphere(X) # Uncomment to visualize the triangulation. # manifold.plot_triangulation(X,neighbors) # Generate the C, Ctilde and G matrix in SciPy 'lil' format. triangle_areas = [manifold.triangle_area(X, t) for t in triangles] Ctilde = manifold.Ctilde(X, triangles, triangle_areas) C = manifold.C(X, triangles, triangle_areas) G = manifold.G(X, triangles, triangle_areas) # Convert to SciPy 'csc' format for efficient use by the CHOLMOD backend. C = backend.into_matrix_type(C) Ctilde = backend.into_matrix_type(Ctilde) G = backend.into_matrix_type(G) # Kappa is the scale parameter. It's a free variable. kappa = pm.Exponential('kappa',1,value=3) # Fix the value of alpha. alpha = 2. # amp is the overall amplitude. It's a free variable that will probably be highly confounded with kappa. amp = pm.Exponential('amp', .0001, value=100) # A constant mean. m = pm.Uninformative('m',value=0) @pm.deterministic(trace=False) def M(m=m,n=len(X)): """The mean vector""" return np.ones(n)*m @pm.deterministic(trace=False) def Q(kappa=kappa, alpha=alpha, amp=amp, Ctilde=Ctilde, G=G, backend=backend): "The precision matrix." out = operators.mod_frac_laplacian_precision(Ctilde, G, kappa, alpha, backend)/np.asscalar(amp)**2 return out # Do all the precomputation you can based on the sparsity pattern alone. # Note that if alpha is made free, this needs to be free also, as the sparsity # pattern will be changeable. pattern_products = backend.pattern_to_products(Q.value) @pm.deterministic(trace=False) def precision_products(Q=Q, p=pattern_products): "All the analysis of the precision matrix that the backend needs to do MVN computations." try: return backend.precision_to_products(Q, **p) except backend.NonPositiveDefiniteError: return None # The random field. empirical_S = pm.logit((k+1)/(N+2.)) S=pymc_objects.SparseMVN('S',M, precision_products, backend, value=empirical_S) @pm.deterministic(trace=False) def p(S=S): """The success probability.""" return pm.invlogit(S) # The data. data = pm.Binomial('data', n=N, p=p, value=k, observed=True) # A Fortran representation of the likelihood, to allow for fast Metropolis steps without querying data.logp. likelihood_variables = np.vstack((np.resize(N,k.shape),k)).T likelihood_string = """ lkp = dexp({X})/(1.0D0+dexp({X})) lkp = lv(i,2)*dlog(lkp) + (lv(i,1)-lv(i,2))*dlog(1.0D0-lkp) """ return locals()
import pymc as mc # We define a simple model of a survey with one data point. We use a $Beta$ # distribution for the $p$ parameter in a binomial. We would like to know both # the posterior distribution for p, as well as the predictive posterior # distribution over the survey parameter. alpha = 4 beta = 4 n = 20 yes = 15 with mc.Model() as model: p = mc.Beta('p', alpha, beta) surv_sim = mc.Binomial('surv_sim', n=n, p=p) surv = mc.Binomial('surv', n=n, p=p, observed=yes) # First let's try and use `find_MAP`. with model: print(mc.find_MAP()) # `find_map` defaults to find the MAP for only the continuous variables we have # to specify if we would like to use the discrete variables. with model: print(mc.find_MAP(vars=model.vars, disp=True)) # We set the `disp` variable to display a warning that we are using a # non-gradient minimization technique, as discrete variables do not give much
def cluster(sup,dep,cn_states,Nvar,sparams,cparams,phi_limit,norm,recluster=False): ''' clustering model using Dirichlet Process ''' Ndp = cparams['clus_limit'] if not recluster else 1 n_iter = cparams['n_iter'] if not recluster else cparams['merge_iter'] burn = cparams['burn'] if not recluster else cparams['merge_burn'] thin, use_map = cparams['thin'], cparams['use_map'] use_map = False if recluster else use_map nclus_init = cparams['nclus_init'] purity, ploidy = sparams['pi'], sparams['ploidy'] fixed_alpha, gamma_a, gamma_b = cparams['fixed_alpha'], cparams['alpha'], cparams['beta'] sens = 1.0 / ((purity/ float(ploidy)) * np.mean(dep)) pval_cutoff = cparams['clonal_cnv_pval'] print('phi lower limit: %f; phi upper limit: %f' % (sens, phi_limit)) if fixed_alpha.lower() in ("yes", "true", "t"): fixed = True fixed_alpha = 0.75 / math.log10(Nvar) if Nvar > 10 else 1 else: try: fixed_alpha = float(fixed_alpha) fixed = True except ValueError: fixed = False if fixed: print('Dirichlet concentration fixed at %f' % fixed_alpha) h = pm.Beta('h', alpha=1, beta=fixed_alpha, size=Ndp) else: beta_init = float(gamma_a) / gamma_b print("Dirichlet concentration gamma prior values: alpha = %f; beta= %f; init = %f" % (gamma_a, gamma_b, beta_init)) alpha = pm.Gamma('alpha', gamma_a, gamma_b, value = beta_init) h = pm.Beta('h', alpha=1, beta=alpha, size=Ndp) @pm.deterministic def p(h=h): value = [u*np.prod(1.0-h[:i]) for i,u in enumerate(h)] #value /= np.sum(value) value[-1] = 1-sum(value[:-1]) return value z_init = np.zeros(Nvar, dtype=np.int) phi_init = np.random.rand(Ndp) * phi_limit # use smart initialisation if nclus_init specified if not nclus_init.lower() in ("no", "false", "f"): try: nclus_init = nclus_init if not recluster else 1 nclus_init = int(nclus_init) nclus_init = Ndp if nclus_init > Ndp else nclus_init if nclus_init == 1: phi_init[0] = 1. else: z_init, phi_init = get_initialisation(nclus_init, Ndp, sparams, sup, dep, norm, cn_states, sens, phi_limit, pval_cutoff) except ValueError: pass z = pm.Categorical('z', p = p, size = Nvar, value = z_init) phi_init = np.array([sens if x < sens else x for x in phi_init]) phi_k = pm.Uniform('phi_k', lower = sens, upper = phi_limit, size = Ndp, value=phi_init) @pm.deterministic def p_var(z=z, phi_k=phi_k, z_init=z_init): # if np.any(np.isnan(phi_k)): # phi_k = phi_init if np.any(z < 0): z = z_init # ^ some fmin optimization methods initialise this array with -ve numbers most_lik_cn_states, pvs = \ get_most_likely_cn_states(cn_states, sup, dep, phi_k[z], purity, pval_cutoff, norm) return np.array(pvs)-0.00000001 cbinom = pm.Binomial('cbinom', dep, p_var, observed=True, value=sup) if fixed: model = pm.Model([h, p, phi_k, z, p_var, cbinom]) else: model = pm.Model([alpha, h, p, phi_k, z, p_var, cbinom]) mcmc, map_ = fit_and_sample(model, n_iter, burn, thin, use_map) return mcmc, map_
if a <= 0 or b <= 0: return -np.inf else: return np.log(np.power((a+b), -2.5)) a = beta_priors[0] b = beta_priors[1] #hidden true rate for each website true_rates = pymc.Beta("true_rates", a, b, size=5) #observed values trials = np.array([1055, 1057, 1065, 1039, 1046]) successes = np.array([28, 45, 69, 58, 60]) observed_values = pymc.Binomial("observed_values", trials, true_rates, observed=True, value=successes) model = pymc.Model([a, b, true_rates, observed_values]) mcmc = pymc.MCMC(model) #Generate 1,000,000 samples and throw out first 500,000 mcmc.sample(1000000, 500000) diff_CA = mcmc.trace("true_rates")[:][:,2] - mcmc.trace("true_rates")[:][:,0] sns.kdeplot(diff_CA, shade=True, label="Difference site C - site A") plt.axvline(0.0, color="black") print ("Probability that website A gets MORE sign-ups than website C: %0.3f" % (diff_CA < 0).mean()) print ("Probability that website A gets LESS sign-ups than website C: %0.3f" %
@pm.deterministic def observed_proportion(t_a=true_answers, fc=first_coin, sc=second_coin): observed = fc * t_a + (1 - fc) * sc return observed.sum() / float(N) observed_proportion.value # data generation X = 35 observations = pm.Binomial("obs", N, observed_proportion, observed=True, value=X) model = pm.Model([ p, true_answers, first_coin, second_coin, observed_proportion, observations ]) mcmc = pm.MCMC(model) mcmc.sample(40000, 15000) p_trace = mcmc.trace("freq_cheating")[:] plt.hist(p_trace, histtype="stepfilled", normed=True, alpha=0.85, bins=30, color="#348ABD")
32, 48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49, 52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46, 56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41, 53, 32, 43, 33 ]) condition = np.repeat([0, 1, 2, 3], nSubj) # Specify the model in PyMC with pm.Model() as model: kappa = pm.Gamma('kappa', 1, 0.1, shape=ncond) mu = pm.Beta('mu', 1, 1, shape=ncond) theta = pm.Beta('theta', mu[condition] * kappa[condition], (1 - mu[condition]) * kappa[condition], shape=len(z)) y = pm.Binomial('y', p=theta, n=N, observed=z) start = pm.find_MAP() step1 = pm.Metropolis([mu]) step2 = pm.Metropolis([theta]) step3 = pm.NUTS([kappa]) # samplers = [pm.Metropolis([rv]) for rv in model.unobserved_RVs] trace = pm.sample(10000, [step1, step2, step3], start=start, progressbar=False) ## Check the results. burnin = 5000 # posterior samples to discard thin = 10 # posterior samples to discard ## Print summary for each trace #pm.summary(trace[burnin::thin])
mu0 = pm.Beta('mu0', 1, 1) a_Beta0 = mu0 * kappa[cond_of_subj] b_Beta0 = (1 - mu0) * kappa[cond_of_subj] mu1 = pm.Beta('mu1', 1, 1, shape=n_cond) a_Beta1 = mu1[cond_of_subj] * kappa[cond_of_subj] b_Beta1 = (1 - mu1[cond_of_subj]) * kappa[cond_of_subj] #Prior on theta theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj) theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj) # if model_index == 0 then sample from theta1 else sample from theta0 theta = pm.switch(pm.eq(model_index, 0), theta1, theta0) # Likelihood: y = pm.Binomial('y', p=theta, n=n_trl_of_subj, observed=n_corr_of_subj) # Sampling start = pm.find_MAP() steps = [pm.Metropolis([i]) for i in model.unobserved_RVs[1:]] steps.append(pm.ElemwiseCategoricalStep(var=model_index,values=[0,1])) trace = pm.sample(20000, steps, start=start, progressbar=False) # EXAMINE THE RESULTS. burnin = 10000 thin = 10 ## Print summary for each trace #pm.summary(trace[burnin::thin]) #pm.summary(trace)
""" A model for an MCMC model for batting average """ import pymc import numpy as np import pandas as pd #load in the data april_df = pd.read_table('./hw_11_data/laa_2011_april.txt',sep='\t') at_bats = april_df['AB'] num_players = len(april_df.index) num_hits = april_df['H'] #prior dist mean_ba = .255 var_ba = .0011 a = ((1-mean_ba)/var_ba - 1/mean_ba)*mean_ba**2 b = a*(1/mean_ba -1) ba = pymc.Beta('ba',alpha=a,beta=b,size=num_players) #model @pymc.deterministic(plot=False) def modeled_ba(ba=ba): return ba #likelihood hits = pymc.Binomial('hits',n=at_bats,p=modeled_ba,value=num_hits,observed =True) #hits_i = pymc.Binomial('hits_i',n=1000,p=modeled_ba,value=800,observed =True)
# Import relevant modules import pymc import numpy as np import pandas as pd data = pd.read_csv('hw_11_data/laa_2011_april.csv',sep='\t') # Priors on unknown parameters alpha = pymc.Normal('alpha',mu=0.255,tau=1/float(0.0011)) beta = pymc.Normal('beta',mu=1-0.255,tau=1/float(0.0011)) avg = pymc.Beta('avg', alpha=alpha, beta=beta, size=len(data)) #def playeravg(a=alpha, b=beta): # return pymc.Beta('avg',a,b, size=len(data)) #mus['mu'+ str(i)] = playeravg xi = pymc.Binomial('xi',n=data.AB, p=avg, value=data.H)
author : ykita date : Thu Feb 11 02:38:03 JST 2016 memo : ''' import pymc as pm import matplotlib.pyplot as plt import numpy as np observed = [1, 0, 1, 1, 0, 1, 0, 1, 0, 0] h = sum(observed) n = len(observed) alpha, beta = 1, 1 niter = 10**6 with pm.Model() as model: # define priors p = pm.Beta('p', alpha=alpha, beta=beta) # define likelihood y = pm.Binomial('y', n=n, p=p, observed=h) # inference start = {'p': 0.5} step = pm.Metropolis() trace = pm.sample(niter, step, start) pm.traceplot(trace) plt.show() N = 10000 p, bins = np.histogram(trace["p"], bins=N, density=True) theta = np.linspace(np.min(bins), np.max(bins), N) print "ML:" + str(h / float(n)) print "MCMC:" + str(np.dot(p, theta) / N)
name='s', parents={ 't_l': 1851, 't_h': 1962 }, random=s_rand, trace=True, value=1900, dtype=int, rseed=1., observed=False, cache_depth=2, plot=True, verbose=0) x = pm.Binomial('x', value=7, n=10, p=.8, observed=True) x = pm.MvNormalCov('x', numpy.ones(3), numpy.eye(3)) y = pm.MvNormalCov('y', numpy.ones(3), numpy.eye(3)) print x + y #<pymc.PyMCObjects.Deterministic '(x_add_y)' at 0x105c3bd10> print x[0] #<pymc.CommonDeterministics.Index 'x[0]' at 0x105c52390> print x[1] + y[2] #<pymc.PyMCObjects.Deterministic '(x[1]_add_y[2])' at 0x105c52410> @pm.deterministic def r(switchpoint=s, early_rate=e, late_rate=l):
pos_score = y_score[y_test == 1] neg_score = y_score[y_test == 0] # ranksums(pos_score, neg_score) alldata = np.concatenate((pos_score, neg_score)) ranked = rankdata(alldata) m1 = len(pos_score) m2 = len(neg_score) pos_rank = ranked[:m1] neg_rank = ranked[m1:] s = np.sum(pos_rank, axis=0) count = s - m1 * (m1 + 1) / 2.0 # Binomal-Beta Conjugate n_sample = 20000 p = pm.Beta("p", alpha=1, beta=1) n = pm.Binomial("Bino", n=m1 * m2, p=p, value=count, observed=True) mcmc = pm.MCMC([n, p]) trace = mcmc.sample(n_sample) auc_trace = mcmc.trace("p")[:] auc_mean = auc_trace.mean() # 95% credible region n_sample = auc_trace.shape[0] lower_limits = np.sort(auc_trace)[int(0.025 * n_sample)] upper_limits = np.sort(auc_trace)[int(0.975 * n_sample)] # plot Posterior predictive distribution of auc measure ax = plt.subplot(l, 2, i) i += 1 # from pymc.Matplot import plot as mcplot # mcplot(mcmc.trace("p"),common_scale=False)
with pymc.Model() as model: ### pp.228のBUGSコード相当 Y = df['y'].values N = len(Y) ### hyperpriors s = pymc.Uniform(name="s", lower=1.0e-2, upper=1.0e+2, testval=0.01) b = pymc.Normal(name='b', mu=0.01, tau=1.0e+2) ### priors r = [ pymc.Normal(name="r_{0}".format(i), mu=0., tau=s**-2) for i in range(N) ] p = tinvlogit(b + r) obs = pymc.Binomial(name="obs", n=8, p=p, observed=Y) #H = model.fastd2logp() with model: start = pymc.find_MAP(vars=[s], fmin=optimize.fmin_l_bfgs_b) # with model: # step = pymc.NUTS(model.vars, scaling=start) # かなり時間がかかるので実行時には注意すること! def run(n=3000): if n == "short": n = 50 with model: