예제 #1
    def sample_bandits(self, n=1):

        bb_score = np.zeros(n)
        choices = np.zeros(n)

        P0_samples = [np.random.rand()]
        P1_samples = [np.random.rand()]
        P2_samples = [np.random.rand()]

        for k in range(n):
            # sample from the bandits's priors, and select the largest sample

            choice = np.argmax([np.random.choice(P0_samples), np.random.choice(P1_samples), np.random.choice(P2_samples)])

            print(k, choice);

            # sample the chosen bandit
            result = self.bandits.pull(choice)

            # update priors and score
            self.wins[choice] += result
            self.trials[choice] += 1
            bb_score[k] = result
            self.N += 1
            choices[k] = choice

            if choice == 0:
                P0 = pm.Uniform('P0', 0, 1)
                X0 = pm.Binomial('X0', value = self.wins[0], n = self.trials[0], p = P0, observed = True)
                mcmc0 = pm.MCMC([P0, X0])
                mcmc0.sample(15000, 5000)
                P0_samples = mcmc0.trace('P0')[:]
            elif choice == 1:
                P1 = pm.Uniform('P1', 0, 1)
                X1 = pm.Binomial('X1', value = self.wins[1], n = self.trials[1], p = P1, observed = True)
                mcmc1 = pm.MCMC([P1, X1])
                mcmc1.sample(15000, 5000)
                P1_samples = mcmc1.trace('P1')[:]
                P2 = pm.Uniform('P2', 0, 1)
                X2 = pm.Binomial('X2', value = self.wins[2], n = self.trials[2], p = P2, observed = True)
                mcmc2 = pm.MCMC([P2, X2])
                mcmc2.sample(15000, 5000)
                P2_samples = mcmc2.trace('P2')[:]

        self.bb_score = np.r_[self.bb_score, bb_score]
        self.choices = np.r_[self.choices, choices]
예제 #2
def binomial_model():
    n_samples = 100
    xs = intX(np.random.binomial(n=1, p=0.2, size=n_samples))
    with pm.Model() as model:
        p = pm.Beta("p", alpha=1, beta=1)
        pm.Binomial("xs", n=1, p=p, observed=xs)
    return model
예제 #3
def test_model_02(x):
    # https://github.com/pymc-devs/pymc
    # Import relevant modules
    import pymc
    import numpy as np

    # Some data
    n = 5 * np.ones(4, dtype=int)
    #x = np.array([-.86, -.3, -.05, .73])

    # Priors on unknown parameters
    alpha = pymc.Normal('alpha', mu=0, tau=.01)
    beta = pymc.Normal('beta', mu=0, tau=.01)

    # Arbitrary deterministic function of parameters
    def theta(a=alpha, b=beta):
        """theta = logit^{-1}(a+b)"""
        return pymc.invlogit(a + b * x)

    # Binomial likelihood for data
    d = pymc.Binomial('d',
                      value=np.array([0., 1., 3., 5.]),
    return locals()
예제 #4
def posterior_upvote_ratio(upvotes, downvotes, samples = 20000):
    p = pm.Uniform("p", 0, 1, value = 0.5)
    n = upvotes + downvotes
    obs_upvotes = pm.Binomial("obs", n, p, value = upvotes, observed = True)
    model = pm.Model([p, obs_upvotes])
    mcmc = pm.MCMC(model)
    return mcmc.trace("p")[:]
예제 #5
    def test_layers(self):
        with pm.Model(rng_seeder=232093) as model:
            a = pm.Uniform("a", lower=0, upper=1, size=10)
            b = pm.Binomial("b", n=1, p=a, size=10)

        b_sampler = compile_pymc([], b, mode="FAST_RUN")
        avg = np.stack([b_sampler() for i in range(10000)]).mean(0)
        npt.assert_array_almost_equal(avg, 0.5 * np.ones((10,)), decimal=2)
예제 #6
    def test_single_observation(self):
        with pm.Model():
            p = pm.Uniform("p", 0, 1)
            pm.Binomial("w", p=p, n=2, observed=1)
            inference_data = pm.sample(500, chains=2, return_inferencedata=True)

        assert inference_data
        assert inference_data.log_likelihood["w"].shape == (2, 500, 1)
예제 #7
def indep_samples(x1,n1,x2,n2):
	class experimental_data(object):
		def __init__(self,x1,n1,x2,n2):
			self.data1 = np.hstack( (np.ones((x1,)) , np.zeros((n1-x1,))) )
			self.data2 = np.hstack( (np.ones((x2,)) , np.zeros((n2-x2,))) )

	### for testing purposes
	#example = True

	data = experimental_data(x1,n1,x2,n2)

	#if example:
	#	example_data1 = np.hstack( (np.ones((15,)) , np.zeros((20,))) )
	#	example_data2 = np.hstack( (np.ones((16,)) , np.zeros((23,))) )
	#	sim_data_size = 14

	#	data1 = example_data1
	#	data2 = example_data2

	p1_val = np.mean(data.data1)
	p2_val = np.mean(data.data2)
	ind_val = p1_val+p2_val-p1_val*p2_val
	print "P1 = " + str(p1_val)

	print "P2 = " + str(p2_val)

	print "Independence = " + str(ind_val)

	p1 = pymc.Beta('p1',alpha=0.5,beta=0.5)
	p2 = pymc.Beta('p2',alpha=0.5,beta=0.5)

	x1 = pymc.Binomial('x',n=len(data.data1),p=p1,value=np.sum(data.data1),observed=True)
	x2 = pymc.Binomial('x',n=len(data.data2),p=p2,value=np.sum(data.data2),observed=True)

	def ind_assump(p1=p1,p2=p2):
		return p1+p2-p1*p2

	return locals()
	#def sim():
	#	sim_data = pymc.Binomial('sim',n=sim_data_size, p=ind_assump)
	#	return sim_data
예제 #8
def get_Models():
    #Full Model (Beta & Binomial)
    nN, nA, nB = 3, 5, 1
    aD = [0, 3, 1]
    Beta = pm.Beta('Beta', alpha=nA, beta=nB)
    # @UndefinedVariable
    BinomD = [
        pm.Binomial('BinomD_' + str(i),
                    value=aD[i]) for i in range(len(aD))
    # @UndefinedVariable @UnusedVariable
    BinomQ = pm.Binomial('BinomQ', n=nN, p=Beta)
    # @UndefinedVariable
    #Collapsed Model (Binomial)
    nA2 = nA + sum(aD)
    nB2 = nB + nN * len(aD) - sum(aD)
    BetaBinQ = pm.Betabin('BetaBinQ', n=nN, alpha=nA2, beta=nB2)
    # @UndefinedVariable
    return np.concatenate([[Beta, BinomQ, BetaBinQ], BinomD])
def posterior_upvote_ratio(upvotes, downvotes, samples=20000):
    """ This function accepts the number of upvotes and downvotes a particular
        comment received, and the number of posterior samples to return to the
        user. Assumes a uniform prior.
    N = upvotes + downvotes
    upvote_ratio = pm.Uniform("upvote_ratio", 0, 1)
    observations = pm.Binomial("obs",
    # do the fitting; first do a MAP as it is cheap and useful.
    map_ = pm.MAP([upvote_ratio, observations]).fit()
    mcmc = pm.MCMC([upvote_ratio, observations])
    mcmc.sample(samples, samples / 4)
    return mcmc.trace("upvote_ratio")[:]
예제 #10
def main():
    N = 100
    p = pm.Uniform("freq_cheating", 0, 1)
    true_answers = pm.Bernoulli("truths", p, size=N)
    first_coin_flips = pm.Bernoulli("first_flips", 0.5, size=N)
    second_coin_flips = pm.Bernoulli("second_flips", 0.5, size=N)

    def observed_proportion(t_a=true_answers,
        result = t_a & fc | ~fc & sc
        return float(sum(result)) / len(result)

    X = 35
    observations = pm.Binomial("obs",

    model = pm.Model([
        p, true_answers, first_coin_flips, second_coin_flips,
        observed_proportion, observations

    # To be explained in Chapter 3!
    mcmc = pm.MCMC(model)
    mcmc.sample(40000, 15000)

    figsize(12.5, 3)
    p_trace = mcmc.trace("freq_cheating")[:]
             label="posterior distribution",
    plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3)
    plt.xlim(0, 1)
예제 #11
    def test_transformed(self):
        n = 18
        at_bats = 45 * np.ones(n, dtype=int)
        hits = np.random.randint(1, 40, size=n, dtype=int)
        draws = 50

        with pm.Model() as model:
            phi = pm.Beta("phi", alpha=1.0, beta=1.0)

            kappa_log = pm.Exponential("logkappa", lam=5.0)
            kappa = pm.Deterministic("kappa", at.exp(kappa_log))

            thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, size=n)

            y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits)
            gen = pm.sample_prior_predictive(draws)

        assert gen.prior["phi"].shape == (1, draws)
        assert gen.prior_predictive["y"].shape == (1, draws, n)
        assert "thetas" in gen.prior.data_vars
예제 #12
def mymodel():
    # Some data
    n = 5 * np.ones(4, dtype=int)
    x = np.array([-.86, -.3, -.05, .73])

    # Priors on unknown parameters
    alpha = pymc.Normal('alpha', mu=0, tau=.01)
    beta = pymc.Normal('beta', mu=0, tau=.01)

    # Arbitrary deterministic function of parameters
    def theta(a=alpha, b=beta):
        """theta = logit^{-1}(a+b)"""
        return pymc.invlogit(a + b * x)

    # Binomial likelihood for data
    d = pymc.Binomial('d',
                      value=np.array([0., 1., 3., 5.]),
예제 #13
def test_duplicate_vars():
    with pytest.raises(ValueError) as err:
        with pm.Model():
    err.match("already exists")

    with pytest.raises(ValueError) as err:
        with pm.Model():
            pm.Normal("a", transform=transforms.log)
    err.match("already exists")

    with pytest.raises(ValueError) as err:
        with pm.Model():
            a = pm.Normal("a")
            pm.Potential("a", a**2)
    err.match("already exists")

    with pytest.raises(ValueError) as err:
        with pm.Model():
            pm.Binomial("a", 10, 0.5)
            pm.Normal("a", transform=transforms.log)
    err.match("already exists")
    # Initialize constants
    N = 100  # 100 of students
    p = pm.Uniform("freq_cheating", 0, 1)  # The freq I want
    # Modeling
    true_answer = pm.Bernoulli("truths", p, size=N)
    first_coin_flips = pm.Bernoulli("firtst_flips", 0.5, size=N)
    second_coin_flips = pm.Bernoulli("second_flips", 0.5, size=N)

    def p_skewed(p=p):
        return 0.5 * p + 0.25

    yes_responses = pm.Binomial("number_cheaters",
    print("{0} : {1}".format(yes_responses, yes_responses.value))
    model = pm.Model([yes_responses, p_skewed, p])
    # to be explain in Chapter 3
    mcmc = pm.MCMC(model)
    mcmc.sample(25000, 2500)
    # plot the answer
    figsize(12.5, 3)
    p_trace = mcmc.trace("freq_cheating")[:]
예제 #15
 def make_model(cls):
     with pm.Model() as model:
         p = pm.Beta("p", [0.5, 0.5, 1.0], [0.5, 0.5, 1.0], size=3)
         pm.Binomial("y", p=p, n=[4, 12, 9], observed=[1, 2, 9])
     return model
예제 #16
# Import relevant modules
import pymc
import numpy as np

# Some data
n = 5 * np.ones(4, dtype=int)
x = np.array([-.86, -.3, -.05, .73])

# Priors on unknown parameters
alpha = pymc.Normal('alpha', mu=0, tau=.01)
beta = pymc.Normal('beta', mu=0, tau=.01)

# Arbitrary deterministic function of parameters
def theta(a=alpha, b=beta):
    """theta = logit^{-1}(a+b)"""
    return pymc.invlogit(a + b * x)

# Binomial likelihood for data
d = pymc.Binomial('d', n=n, p=theta, value=np.array([0.,1.,3.,5.]),\
예제 #17
# Simple dose-response model
n = [5]*4
dose = [-.86,-.3,-.05,.73]
x = [0,1,3,5]

alpha = pm.Normal('alpha', mu=0.0, tau=0.01)
beta = pm.Normal('beta', mu=0.0, tau=0.01)

def theta(a=alpha, b=beta, d=dose):
    """theta = inv_logit(a+b)"""
    return pm.invlogit(a+b*d)

"""deaths ~ binomial(n, p)"""
deaths = pm.Binomial('deaths', n=n, p=theta, value=x, observed=True)

my_model = [alpha, beta, theta, deaths]

# Instantiate and run sampler
S = pm.MCMC(my_model)
S.sample(10000, burn=5000)

# Calculate and plot Geweke scores
scores = pm.geweke(S, intervals=20)

# Geweke plot for a single parameter
trace = S.trace('alpha')[:]
alpha_scores = pm.geweke(trace, intervals=20)
pm.Matplot.geweke_plot(alpha_scores, 'alpha')
예제 #18
def make_model(N,k,X,backend,manifold):
    A standard spatial logistic regression.
    - N: Number sampled at each location
    - k: Number positive at each location
    - X: x,y,z coords of each location
    - Backend: The linear algebra backend. So far, this has to be 'cholmod'. 
    - manifold: The manifold to work on. So far, this has to be 'spherical'.
    # Make the Delaunay triangulation.
    neighbors, triangles, trimap, b = manifold.triangulate_sphere(X)

    # Uncomment to visualize the triangulation.
    # manifold.plot_triangulation(X,neighbors)

    # Generate the C, Ctilde and G matrix in SciPy 'lil' format.
    triangle_areas = [manifold.triangle_area(X, t) for t in triangles]
    Ctilde = manifold.Ctilde(X, triangles, triangle_areas)
    C = manifold.C(X, triangles, triangle_areas)
    G = manifold.G(X, triangles, triangle_areas)

    # Convert to SciPy 'csc' format for efficient use by the CHOLMOD backend.
    C = backend.into_matrix_type(C)
    Ctilde = backend.into_matrix_type(Ctilde)
    G = backend.into_matrix_type(G)

    # Kappa is the scale parameter. It's a free variable.
    kappa = pm.Exponential('kappa',1,value=3)

    # Fix the value of alpha.
    alpha = 2.

    # amp is the overall amplitude. It's a free variable that will probably be highly confounded with kappa.
    amp = pm.Exponential('amp', .0001, value=100)

    # A constant mean.
    m = pm.Uninformative('m',value=0)
    def M(m=m,n=len(X)):
        """The mean vector"""
        return np.ones(n)*m
    def Q(kappa=kappa, alpha=alpha, amp=amp, Ctilde=Ctilde, G=G, backend=backend):
        "The precision matrix."
        out = operators.mod_frac_laplacian_precision(Ctilde, G, kappa, alpha, backend)/np.asscalar(amp)**2
        return out

    # Do all the precomputation you can based on the sparsity pattern alone.
    # Note that if alpha is made free, this needs to be free also, as the sparsity
    # pattern will be changeable.
    pattern_products = backend.pattern_to_products(Q.value)

    def precision_products(Q=Q, p=pattern_products):
        "All the analysis of the precision matrix that the backend needs to do MVN computations."
            return backend.precision_to_products(Q, **p)
        except backend.NonPositiveDefiniteError:
            return None

    # The random field.
    empirical_S = pm.logit((k+1)/(N+2.))
    S=pymc_objects.SparseMVN('S',M, precision_products, backend, value=empirical_S)
    def p(S=S):
        """The success probability."""
        return pm.invlogit(S)

    # The data.
    data = pm.Binomial('data', n=N, p=p, value=k, observed=True)
    # A Fortran representation of the likelihood, to allow for fast Metropolis steps without querying data.logp.
    likelihood_variables = np.vstack((np.resize(N,k.shape),k)).T
    likelihood_string = """
    lkp = dexp({X})/(1.0D0+dexp({X}))
    lkp = lv(i,2)*dlog(lkp) + (lv(i,1)-lv(i,2))*dlog(1.0D0-lkp)
    return locals()
예제 #19
import pymc as mc

# We define a simple model of a survey with one data point. We use a $Beta$
# distribution for the $p$ parameter in a binomial. We would like to know both
# the posterior distribution for p, as well as the predictive posterior
# distribution over the survey parameter.

alpha = 4
beta = 4
n = 20
yes = 15

with mc.Model() as model:
    p = mc.Beta('p', alpha, beta)
    surv_sim = mc.Binomial('surv_sim', n=n, p=p)
    surv = mc.Binomial('surv', n=n, p=p, observed=yes)

# First let's try and use `find_MAP`.

with model:

# `find_map` defaults to find the MAP for only the continuous variables we have
# to specify if we would like to use the discrete variables.

with model:
    print(mc.find_MAP(vars=model.vars, disp=True))

# We set the `disp` variable to display a warning that we are using a
# non-gradient minimization technique, as discrete variables do not give much
예제 #20
파일: cluster.py 프로젝트: xtmgah/SVclone
def cluster(sup,dep,cn_states,Nvar,sparams,cparams,phi_limit,norm,recluster=False):
    clustering model using Dirichlet Process
    Ndp = cparams['clus_limit'] if not recluster else 1
    n_iter = cparams['n_iter'] if not recluster else cparams['merge_iter']
    burn = cparams['burn'] if not recluster else cparams['merge_burn']
    thin, use_map = cparams['thin'], cparams['use_map']
    use_map = False if recluster else use_map
    nclus_init = cparams['nclus_init']

    purity, ploidy = sparams['pi'], sparams['ploidy']
    fixed_alpha, gamma_a, gamma_b = cparams['fixed_alpha'], cparams['alpha'], cparams['beta']
    sens = 1.0 / ((purity/ float(ploidy)) * np.mean(dep))
    pval_cutoff = cparams['clonal_cnv_pval']
    print('phi lower limit: %f; phi upper limit: %f' % (sens, phi_limit))

    if fixed_alpha.lower() in ("yes", "true", "t"):
        fixed = True
        fixed_alpha = 0.75 / math.log10(Nvar) if Nvar > 10 else 1
            fixed_alpha = float(fixed_alpha)
            fixed = True
        except ValueError:
            fixed = False

    if fixed:
        print('Dirichlet concentration fixed at %f' % fixed_alpha)
        h = pm.Beta('h', alpha=1, beta=fixed_alpha, size=Ndp)
        beta_init = float(gamma_a) / gamma_b
        print("Dirichlet concentration gamma prior values: alpha = %f; beta= %f; init = %f" % (gamma_a, gamma_b, beta_init))
        alpha = pm.Gamma('alpha', gamma_a, gamma_b, value = beta_init)
        h = pm.Beta('h', alpha=1, beta=alpha, size=Ndp)

    def p(h=h):
        value = [u*np.prod(1.0-h[:i]) for i,u in enumerate(h)]
        #value /= np.sum(value)
        value[-1] = 1-sum(value[:-1])
        return value

    z_init = np.zeros(Nvar, dtype=np.int)
    phi_init = np.random.rand(Ndp) * phi_limit

    # use smart initialisation if nclus_init specified
    if not nclus_init.lower() in ("no", "false", "f"):
            nclus_init = nclus_init if not recluster else 1
            nclus_init = int(nclus_init)
            nclus_init = Ndp if nclus_init > Ndp else nclus_init
            if nclus_init == 1:
                phi_init[0] = 1.
                z_init, phi_init = get_initialisation(nclus_init, Ndp, sparams, sup, dep, norm,
                                                      cn_states, sens, phi_limit, pval_cutoff)
        except ValueError:

    z = pm.Categorical('z', p = p, size = Nvar, value = z_init)
    phi_init = np.array([sens if x < sens else x for x in phi_init])
    phi_k = pm.Uniform('phi_k', lower = sens, upper = phi_limit, size = Ndp, value=phi_init)

    def p_var(z=z, phi_k=phi_k, z_init=z_init):
#        if np.any(np.isnan(phi_k)):
#            phi_k = phi_init
        if np.any(z < 0):
            z = z_init
            # ^ some fmin optimization methods initialise this array with -ve numbers
        most_lik_cn_states, pvs = \
                get_most_likely_cn_states(cn_states, sup, dep, phi_k[z], purity, pval_cutoff, norm)
        return np.array(pvs)-0.00000001

    cbinom = pm.Binomial('cbinom', dep, p_var, observed=True, value=sup)
    if fixed:
        model = pm.Model([h, p, phi_k, z, p_var, cbinom])
        model = pm.Model([alpha, h, p, phi_k, z, p_var, cbinom])

    mcmc, map_ = fit_and_sample(model, n_iter, burn, thin, use_map)

    return mcmc, map_
예제 #21
    if a <= 0 or b <= 0:
        return -np.inf
        return np.log(np.power((a+b), -2.5))

a = beta_priors[0]
b = beta_priors[1]

#hidden true rate for each website
true_rates = pymc.Beta("true_rates", a, b, size=5)

#observed values
trials = np.array([1055, 1057, 1065, 1039, 1046])
successes = np.array([28, 45, 69, 58, 60])
observed_values = pymc.Binomial("observed_values", trials, true_rates, observed=True,

model = pymc.Model([a, b, true_rates, observed_values])
mcmc = pymc.MCMC(model)

#Generate 1,000,000 samples and throw out first 500,000
mcmc.sample(1000000, 500000)

diff_CA = mcmc.trace("true_rates")[:][:,2] - mcmc.trace("true_rates")[:][:,0]
sns.kdeplot(diff_CA, shade=True, label="Difference site C - site A")
plt.axvline(0.0, color="black")

print ("Probability that website A gets MORE sign-ups than website C: %0.3f" %
       (diff_CA < 0).mean())
print ("Probability that website A gets LESS sign-ups than website C: %0.3f" %
예제 #22

def observed_proportion(t_a=true_answers, fc=first_coin, sc=second_coin):
    observed = fc * t_a + (1 - fc) * sc
    return observed.sum() / float(N)


# data generation

X = 35
observations = pm.Binomial("obs",
model = pm.Model([
    p, true_answers, first_coin, second_coin, observed_proportion, observations
mcmc = pm.MCMC(model)
mcmc.sample(40000, 15000)

p_trace = mcmc.trace("freq_cheating")[:]
    32, 48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54,
    51, 49, 52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53,
    55, 40, 46, 56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51,
    41, 36, 45, 41, 53, 32, 43, 33
condition = np.repeat([0, 1, 2, 3], nSubj)

# Specify the model in PyMC
with pm.Model() as model:
    kappa = pm.Gamma('kappa', 1, 0.1, shape=ncond)
    mu = pm.Beta('mu', 1, 1, shape=ncond)
    theta = pm.Beta('theta',
                    mu[condition] * kappa[condition],
                    (1 - mu[condition]) * kappa[condition],
    y = pm.Binomial('y', p=theta, n=N, observed=z)
    start = pm.find_MAP()
    step1 = pm.Metropolis([mu])
    step2 = pm.Metropolis([theta])
    step3 = pm.NUTS([kappa])
    #    samplers = [pm.Metropolis([rv]) for rv in model.unobserved_RVs]
    trace = pm.sample(10000, [step1, step2, step3],

## Check the results.
burnin = 5000  # posterior samples to discard
thin = 10  # posterior samples to discard

## Print summary for each trace
    mu0 = pm.Beta('mu0', 1, 1)
    a_Beta0 = mu0 * kappa[cond_of_subj]
    b_Beta0 = (1 - mu0) * kappa[cond_of_subj]

    mu1 = pm.Beta('mu1', 1, 1, shape=n_cond)
    a_Beta1 = mu1[cond_of_subj] * kappa[cond_of_subj]
    b_Beta1 = (1 - mu1[cond_of_subj]) * kappa[cond_of_subj]

    #Prior on theta
    theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj)
    theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj)
    # if model_index == 0 then sample from theta1 else sample from theta0
    theta = pm.switch(pm.eq(model_index, 0), theta1, theta0)

    # Likelihood:
    y = pm.Binomial('y', p=theta, n=n_trl_of_subj, observed=n_corr_of_subj)

    # Sampling
    start = pm.find_MAP()
    steps = [pm.Metropolis([i]) for i in model.unobserved_RVs[1:]]
    trace = pm.sample(20000, steps, start=start, progressbar=False)

burnin = 10000
thin = 10

## Print summary for each trace
예제 #25
A model for an MCMC model for batting average

import pymc
import numpy as np
import pandas as pd

#load in the data
april_df = pd.read_table('./hw_11_data/laa_2011_april.txt',sep='\t')
at_bats = april_df['AB']
num_players = len(april_df.index)
num_hits = april_df['H']

#prior dist
mean_ba = .255
var_ba = .0011

a = ((1-mean_ba)/var_ba - 1/mean_ba)*mean_ba**2
b = a*(1/mean_ba -1)
ba = pymc.Beta('ba',alpha=a,beta=b,size=num_players)

def modeled_ba(ba=ba):
	return ba

hits = pymc.Binomial('hits',n=at_bats,p=modeled_ba,value=num_hits,observed =True)
#hits_i = pymc.Binomial('hits_i',n=1000,p=modeled_ba,value=800,observed =True)
예제 #26
# Import relevant modules
import pymc
import numpy as np
import pandas as pd
data = pd.read_csv('hw_11_data/laa_2011_april.csv',sep='\t')

# Priors on unknown parameters
alpha = pymc.Normal('alpha',mu=0.255,tau=1/float(0.0011))
beta = pymc.Normal('beta',mu=1-0.255,tau=1/float(0.0011))

avg = pymc.Beta('avg', alpha=alpha, beta=beta, size=len(data))
#def playeravg(a=alpha, b=beta):
#    return pymc.Beta('avg',a,b, size=len(data))

#mus['mu'+ str(i)] = playeravg
xi = pymc.Binomial('xi',n=data.AB, p=avg, value=data.H)  
예제 #27
author : ykita
date   : Thu Feb 11 02:38:03 JST 2016
memo   :  
import pymc as pm
import matplotlib.pyplot as plt
import numpy as np

observed = [1, 0, 1, 1, 0, 1, 0, 1, 0, 0]
h = sum(observed)
n = len(observed)
alpha, beta = 1, 1
niter = 10**6
with pm.Model() as model:
    # define priors
    p = pm.Beta('p', alpha=alpha, beta=beta)
    # define likelihood
    y = pm.Binomial('y', n=n, p=p, observed=h)
    # inference
    start = {'p': 0.5}
    step = pm.Metropolis()
    trace = pm.sample(niter, step, start)

N = 10000
p, bins = np.histogram(trace["p"], bins=N, density=True)
theta = np.linspace(np.min(bins), np.max(bins), N)
print "ML:" + str(h / float(n))
print "MCMC:" + str(np.dot(p, theta) / N)
예제 #28
                      't_l': 1851,
                      't_h': 1962

x = pm.Binomial('x', value=7, n=10, p=.8, observed=True)

x = pm.MvNormalCov('x', numpy.ones(3), numpy.eye(3))
y = pm.MvNormalCov('y', numpy.ones(3), numpy.eye(3))
print x + y
#<pymc.PyMCObjects.Deterministic '(x_add_y)' at 0x105c3bd10>

print x[0]
#<pymc.CommonDeterministics.Index 'x[0]' at 0x105c52390>

print x[1] + y[2]
#<pymc.PyMCObjects.Deterministic '(x[1]_add_y[2])' at 0x105c52410>

def r(switchpoint=s, early_rate=e, late_rate=l):
예제 #29
    pos_score = y_score[y_test == 1]
    neg_score = y_score[y_test == 0]
    # ranksums(pos_score, neg_score)
    alldata = np.concatenate((pos_score, neg_score))
    ranked = rankdata(alldata)
    m1 = len(pos_score)
    m2 = len(neg_score)
    pos_rank = ranked[:m1]
    neg_rank = ranked[m1:]
    s = np.sum(pos_rank, axis=0)
    count = s - m1 * (m1 + 1) / 2.0

    # Binomal-Beta Conjugate
    n_sample = 20000
    p = pm.Beta("p", alpha=1, beta=1)
    n = pm.Binomial("Bino", n=m1 * m2, p=p, value=count, observed=True)
    mcmc = pm.MCMC([n, p])
    trace = mcmc.sample(n_sample)
    auc_trace = mcmc.trace("p")[:]
    auc_mean = auc_trace.mean()

    # 95% credible region
    n_sample = auc_trace.shape[0]
    lower_limits = np.sort(auc_trace)[int(0.025 * n_sample)]
    upper_limits = np.sort(auc_trace)[int(0.975 * n_sample)]

    # plot Posterior predictive distribution of auc measure
    ax = plt.subplot(l, 2, i)
    i += 1
    # from pymc.Matplot import plot as mcplot
    # mcplot(mcmc.trace("p"),common_scale=False)
예제 #30
with pymc.Model() as model:
    ### pp.228のBUGSコード相当
    Y = df['y'].values
    N = len(Y)

    ### hyperpriors
    s = pymc.Uniform(name="s", lower=1.0e-2, upper=1.0e+2, testval=0.01)
    b = pymc.Normal(name='b', mu=0.01, tau=1.0e+2)

    ### priors
    r = [
        pymc.Normal(name="r_{0}".format(i), mu=0., tau=s**-2) for i in range(N)
    p = tinvlogit(b + r)

    obs = pymc.Binomial(name="obs", n=8, p=p, observed=Y)

#H = model.fastd2logp()

with model:
    start = pymc.find_MAP(vars=[s], fmin=optimize.fmin_l_bfgs_b)

# with model:
#     step = pymc.NUTS(model.vars, scaling=start)

# かなり時間がかかるので実行時には注意すること!
def run(n=3000):
    if n == "short":
        n = 50
    with model: