예제 #1
0
def make_model():
    # Construct the prior term
    location = pymc.Uniform('location', lower=[0, 0], upper=[1, 1])
    # The locations of the sensors
    X = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]
    # The output of the model
    solver = Solver(X=X)

    @pymc.deterministic(plot=False)
    def model_output(value=None, loc=location):
        return solver(loc)

    # The hyper-parameters of the noise
    alpha = pymc.Exponential('alpha', beta=1.)
    beta = pymc.Exponential('beta', beta=1.)
    tau = pymc.Gamma('tau', alpha=alpha, beta=beta)
    # Load the observed data
    data = np.loadtxt('observed_data')
    # The observations at the sensor locations
    @pymc.stochastic(dtype=float, observed=True)
    def sensors(value=data, mu=model_output, tau=tau, gamma=1.):
        """The value of the response at the sensors."""
        return gamma * pymc.normal_like(value, mu=mu, tau=tau)

    return locals()
def gamma_poisson(x, t):
    """ x: number of failures (N vector)
        t: operation time, thousands of hours (N vector) """

    if x is not None:
        N = x.shape
    else:
        N = num_points

    # place an exponential prior on t, for when it is unknown
    t = pymc.Exponential('t',
                         beta=1.0 / 50.0,
                         value=t,
                         size=N,
                         observed=(t is not None))

    alpha = pymc.Exponential('alpha', beta=1.0, value=1.0)
    beta = pymc.Gamma('beta', alpha=0.1, beta=1.0, value=1.0)

    theta = pymc.Gamma('theta', alpha=alpha, beta=beta, size=N)

    @pymc.deterministic
    def mu(theta=theta, t=t):
        return theta * t

    x = pymc.Poisson('x', mu=mu, value=x, observed=(x is not None))

    return locals()
예제 #3
0
def three_model_comparison(p_df):

    a_n = len(p_df)
    t_lam = pm.Uniform('d_lam', 0, 1)
    #d_lam = 1.0 / np.mean(p_df)
    t_lambda_1 = pm.Exponential("t_lambda_1", t_lam)
    #t_lambda_1 = pm.Uniform("t_lambda_1", min(p_df), max(p_df))
    t_lambda_2 = pm.Exponential("t_lambda_2", t_lam)
    #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df))
    t_lambda_3 = pm.Exponential("t_lambda_3", t_lam)
    #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df))

    #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) )
    t_tau_1 = pm.DiscreteUniform("tau1", lower=0, upper=max(p_df) - 1)
    t_tau_2 = pm.DiscreteUniform("tau", lower=t_tau_1, upper=max(p_df))

    @pm.deterministic
    def lambda_(tau_1=t_tau_1,
                tau_2=t_tau_2,
                lambda_1=t_lambda_1,
                lambda_2=t_lambda_2,
                lambda_3=t_lambda_3):
        out = np.zeros(a_n)
        out[:tau_1] = lambda_1  # lambda before tau_1 is lambda1
        out[tau_1:tau_2] = lambda_2  # lambda_2 between tau_1 and tau_2
        out[tau_2:] = lambda_3  # lambda after (and including) tau is lambda_3
        return out

    t_obs = pm.Poisson('t_observed', mu=lambda_, value=p_df, observed=True)

    t_model = pm.Model(
        [t_obs, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2])
    #d_model = pm.Model([d_obs,  t_lambda_1, t_lambda_2, tau])

    return t_model, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2
예제 #4
0
def two_model_comparison(p_df):

    a_n = len(p_df)
    d_lam = pm.Uniform('d_lam', 0, 1)
    #d_lam = 1.0 / np.mean(p_df)
    lambda_1 = pm.Exponential("lambda_1", d_lam)
    #lambda_1 = pm.Uniform("lambda_1", min(p_df), max(p_df))
    lambda_2 = pm.Exponential("lambda_2", d_lam)
    #lambda_2 = pm.Uniform("lambda_2",min(p_df), max(p_df))

    #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) )
    tau = pm.DiscreteUniform("tau", lower=0, upper=max(p_df))

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(a_n)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
        return out

    d_obs = pm.Poisson('d_observed', mu=lambda_, value=p_df, observed=True)

    d_model = pm.Model([d_obs, d_lam, lambda_1, lambda_2, tau])
    #d_model = pm.Model([d_obs,  lambda_1, lambda_2, tau])

    return d_model, d_obs, d_lam, lambda_1, lambda_2, tau
예제 #5
0
파일: model.py 프로젝트: zkxshg/pymc
def make_gp_submodel(suffix,
                     mesh,
                     africa_val=None,
                     with_africa_covariate=False):

    # The partial sill.
    amp = pm.Exponential('amp_%s' % suffix, .1, value=1.)

    # The range parameter. Units are RADIANS.
    scale = pm.Exponential('scale_%s' % suffix, 1, value=.08)

    # 1 radian = the radius of the earth, about 6378.1 km
    scale_in_km = scale * 6378.1

    # The nugget variance, lower-bounded to preserve mixing.
    V = pm.Exponential('V_%s' % suffix, 1, value=1.)

    @pm.potential
    def V_bound(V=V):
        if V < .1:
            return -np.inf
        else:
            return 0

    # Create the covariance & its evaluation at the data locations.
    @pm.deterministic(trace=True, name='C_%s' % suffix)
    def C(amp=amp, scale=scale):
        return pm.gp.FullRankCovariance(pm.gp.exponential.geo_rad,
                                        amp=amp,
                                        scale=scale)

    # Create the mean function
    if with_africa_covariate:
        beta = pm.Normal('beta_%s' % suffix, 0, .01, value=1)

        @pm.deterministic(trace=True, name='M_%s' % suffix)
        def M(mesh=mesh, africa_val=africa_val, beta=beta):
            M = pm.gp.Mean(retrieve_africa_val,
                           meshes=[],
                           africa_vals=[],
                           beta=beta)
            store_africa_val(M, mesh, africa_val)
            return M
    else:

        @pm.deterministic(trace=True, name='M_%s' % suffix)
        def M():
            return pm.gp.Mean(pm.gp.zero_fn)

    # Create the GP submodel
    sp_sub = pm.gp.GPSubmodel('sp_sub_%s' % suffix, M, C, mesh)

    sp_sub.f_eval.value = sp_sub.f_eval.value - sp_sub.f_eval.value.mean()

    return locals()
예제 #6
0
def param_selector(data):
    #Parameters for the hawkes process
    #Take the number events/ divided by the number of days
    mu = pm.Exponential(
        'mu',
        len(data) / data[-1] +
        (random.choice([0, 1]) * 0.01 * random.random()))
    #Alpha is the coefficient for creation - should be an exponential since it's increasingly less likely to be large
    alpha = pm.Exponential(
        'alpha',
        len(data) / data[-1] +
        (random.choice([0, 1]) * 0.01 * random.random()))
    #We are sure that the impact of one event, if it has one, decreases over time so beta must be positive
    beta = pm.Exponential('beta', 1)
    return mu, alpha, beta
예제 #7
0
    def __init__(self, observed_frequencies=1.0, observed_power=1.0):
        self.observed_frequencies = observed_frequencies
        self.observed_power = observed_power

        # PyMC definitions
        # Define data and stochastics        
        self.power_law_index = pymc.Uniform('power_law_index',
                                       lower=0.0,
                                       upper=6.0,
                                       doc='power law index')
    
        self.power_law_norm = pymc.Uniform('power_law_norm',
                                      lower=-100.0,
                                      upper=100.0,
                                      doc='power law normalization')
    
        # Model for the power law spectrum
        @pymc.deterministic(plot=False)
        def fourier_power_spectrum(p=self.power_law_index,
                                   a=self.power_law_norm,
                                   f=self.observed_frequencies):
            """A pure and simple power law model"""
            out = rnspectralmodels.power_law(f, [a, p])
            return out
    
        self.spectrum = pymc.Exponential('spectrum',
                               beta=1.0 / fourier_power_spectrum,
                               value=observed_power,
                               observed=True)
    
        # MCMC model as a list
        self.pymc_model = [self.power_law_index,
                           self.power_law_norm,
                           fourier_power_spectrum,
                           self.spectrum]
예제 #8
0
def mk_allmodels_bayes(tree, chars, nparam, pi="Equal", dbname=None):
    """
    Fit an mk model with nparam parameters distributed about the Q matrix.
    """
    if type(chars) == dict:
        chars = [chars[l] for l in [n.label for n in tree.leaves()]]
    nchar = len(set(chars))
    ncell = nchar**2 - nchar
    assert nparam <= ncell

    minp = pscore(tree, chars)
    treelen = sum([n.length for n in tree.descendants()])
    ### Parameters
    # Prior on slowest distribution (beta = 1/mean)
    slow = pymc.Exponential("slow", beta=treelen / minp)

    paramscales = [None] * (nparam - 1)
    for p in range(nparam - 1):
        paramscales[p] = pymc.Uniform(name="paramscale_{}".format(str(p)),
                                      lower=2,
                                      upper=20)
    ### Model
    paramset = list(range(nparam + 1))
    nonzeros = paramset[1:]
    all_mods = list(itertools.product(paramset, repeat=ncell))
    all_mods = [
        tuple(m) for m in all_mods if all([i in set(m) for i in nonzeros])
    ]

    mod = make_qmat_stoch_mk(all_mods, name="mod")

    l = mk.create_likelihood_function_mk(tree=tree,
                                         chars=chars,
                                         Qtype="ARD",
                                         pi=pi,
                                         findmin=False)
    Q = np.zeros([nchar, nchar])
    mask = np.ones([nchar, nchar], dtype=bool)
    mask[np.diag_indices(nchar)] = False

    @pymc.potential
    def mklik(mod=mod, slow=slow, paramscales=paramscales, name="mklik"):
        params = [0.0] * (nparam + 1)
        params[1] = slow
        for i, s in enumerate(paramscales):
            params[2 + i] = params[2 + (i - 1)] * s

        Qparams = [params[i] for i in mod]
        return l(np.array(Qparams))

    if dbname is None:
        mod_mcmc = pymc.MCMC(locals(), calc_deviance=True)
    else:
        mod_mcmc = pymc.MCMC(locals(),
                             calc_deviance=True,
                             db="pickle",
                             dbname=dbname)
    mod_mcmc.use_step_method(QmatMetropolis_mk, mod, all_mods)
    return mod_mcmc
예제 #9
0
def _fit_beta_distribution(data, n_iter):
    alpha_var = pm.Exponential('alpha', .5)
    beta_var = pm.Exponential('beta', .5)

    observations = pm.Beta('observations',
                           alpha_var,
                           beta_var,
                           value=data,
                           observed=True)

    model = pm.Model([alpha_var, beta_var, observations])
    mcmc = pm.MCMC(model)
    mcmc.sample(n_iter)

    alphas = mcmc.trace('alpha')[:]
    betas = mcmc.trace('beta')[:]
    return alphas, betas
예제 #10
0
def main():
    lambda_1 = pm.Exponential("lambda_1", 1)  # prior on first behaviour
    lambda_2 = pm.Exponential("lambda_2", 1)  # prior on second behaviour
    tau = pm.DiscreteUniform("tau", lower=0,
                             upper=10)  # prior on behaviour change

    print "lambda_1.value = %.3f" % lambda_1.value
    print "lambda_2.value = %.3f" % lambda_2.value
    print "tau.value = %.3f" % tau.value
    print

    lambda_1.random(), lambda_2.random(), tau.random()

    print "After calling random() on the variables..."
    print "lambda_1.value = %.3f" % lambda_1.value
    print "lambda_2.value = %.3f" % lambda_2.value
    print "tau.value = %.3f" % tau.value

    samples = [lambda_1.random() for i in range(20000)]
    plt.hist(samples, bins=70, normed=True, histtype="stepfilled")
    plt.title("Prior distribution for $\lambda_1$")
    plt.xlim(0, 8)
    plt.show()

    data = np.array([10, 5])
    fixed_variable = pm.Poisson("fxd", 1, value=data, observed=True)
    print "value: ", fixed_variable.value
    print "calling .random()"
    fixed_variable.random()
    print "value: ", fixed_variable.value

    n_data_points = 5  # in CH1 we had ~70 data points

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(n_data_points)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after tau is lambda2
        return out

    data = np.array([10, 25, 15, 20, 35])
    obs = pm.Poisson("obs", lambda_, value=data, observed=True)

    model = pm.Model([obs, lambda_, lambda_1, lambda_2, tau])
예제 #11
0
def make_model(x):
    a = pm.Exponential('a', beta=x, value=0.5)

    @pm.deterministic
    def b(a=a):
        return 100 - a

    @pm.stochastic
    def c(value=0.5, a=a, b=b):
        return (value - a)**2 / b

    return locals()
예제 #12
0
def best(group1, group2):
    import pymc as pm
    group1 = np.random.normal(15, 2, 100)
    group2 = np.random.normal(15.3, 2, 100)

    # Generate Pooled Data
    pooled = np.concatenate((group1, group2))

    mu1 = pm.Normal("mu_1", mu=pooled.mean(), tau=1.0 / pooled.var() / 1000.0)
    mu2 = pm.Normal("mu_2", mu=pooled.mean(), tau=1.0 / pooled.var() / 1000.0)

    sig1 = pm.Uniform("sigma_1",
                      lower=pooled.var() / 1000.0,
                      upper=pooled.var() * 1000)
    sig2 = pm.Uniform("sigma_2",
                      lower=pooled.var() / 1000.0,
                      upper=pooled.var() * 1000)

    v = pm.Exponential("nu", beta=1.0 / 29)

    t1 = pm.NoncentralT("t_1",
                        mu=mu1,
                        lam=1.0 / sig1,
                        nu=v,
                        value=group1[:],
                        observed=True)
    t2 = pm.NoncentralT("t_2",
                        mu=mu2,
                        lam=1.0 / sig2,
                        nu=v,
                        value=group2[:],
                        observed=True)

    model = pm.Model([t1, mu1, sig1, t2, mu2, sig2, v])

    # Generate our MCMC object
    mcmc = pm.MCMC(model)

    mcmc.sample(40000, 10000, 2)

    mus1 = mcmc.trace('mu_1')[:]
    mus2 = mcmc.trace('mu_2')[:]
    sigmas1 = mcmc.trace('sigma_1')[:]
    sigmas2 = mcmc.trace('sigma_2')[:]
    nus = mcmc.trace('nu')[:]

    diff_mus = mus1 - mus2
    diff_sigmas = sigmas1 - sigmas2
    normality = np.log(nus)
    effect_size = (mus1 - mus2) / np.sqrt((sigmas1**2 + sigmas2**2) / 2.)

    print("mu_1", mus1.mean())
    print("mu_2", mus2.mean())
예제 #13
0
def make_on_off(n_off, expo_off, n_on, expo_on, mean0):
    """
    Make a PyMC model for inferring a Poisson signal rate parameter, `s`, for
    'on-off' observations with uncertain background rate, `b`.

    Parameters
    ----------

    n_off, n_on : int
        Event counts off-source and on-source

    expo_off, expo_on : float
        Exposures off-source and on-source

    mean0 : float
        Prior mean for both background and signal rates
    """

    # PyMC's exponential dist'n uses beta = 1/scale = 1/mean.
    # Here we initialize rates to good guesses.
    b_est = float(n_off)/expo_off
    s_est = max(float(n_on)/expo_on - b_est, .1*b_est)
    b = pymc.Exponential('b', beta=1./mean0, value=b_est)
    s = pymc.Exponential('s', beta=1./mean0, value=s_est)

    # The expected number of counts on and off source, as deterministic functions.
    @pymc.deterministic
    def mu_off(b=b):
        return b*expo_off

    @pymc.deterministic
    def mu_on(s=s, b=b):
        return (s+b)*expo_on

    # Poisson likelihood functions:
    off_count = pymc.Poisson('off_count', mu=mu_off, value=n_off, observed=True)
    on_count = pymc.Poisson('on_count', mu=mu_on, value=n_on, observed=True)

    return locals()
예제 #14
0
def negative_b_mcm_model(p_df):

    n_mu = pm.Normal('n_mu', mu=1650, tau=0.00001)
    n_lam = pm.Uniform('n_uni_alpha', 0, 1)
    n_alpha = pm.Exponential('n_alpha', beta=n_lam)
    n_ob = pm.NegativeBinomial('n_observed',
                               mu=n_mu,
                               alpha=n_alpha,
                               value=p_df,
                               observed=True)
    n_es = pm.NegativeBinomial('n_estimated',
                               mu=n_mu,
                               alpha=n_alpha,
                               observed=False)
    n_model = pm.Model([n_mu, n_lam, n_alpha, n_ob, n_es])

    return n_mu, n_lam, n_alpha, n_ob, n_es, n_model
예제 #15
0
def create_mk_model(tree, chars, Qtype, pi):
    """
    Create model objects to be passed to pymc.MCMC

    Creates Qparams and likelihood function
    """
    if type(chars) == dict:
        chars = [chars[l] for l in [n.label for n in tree.leaves()]]
    nchar = len(set(chars))
    if Qtype=="ER":
        N = 1
    elif Qtype=="Sym":
        N = int(binom(nchar, 2))
    elif Qtype=="ARD":
        N = int((nchar ** 2 - nchar))
    else:
        ValueError("Qtype must be one of: ER, Sym, ARD")

    # Setting a Dirichlet prior with Jeffrey's hyperprior of 1/2
    if N != 1:
        theta = [1.0/2.0]*N
        Qparams_init = pymc.Dirichlet("Qparams_init", theta, value = [0.5])
        Qparams_init_full = pymc.CompletedDirichlet("Qparams_init_full", Qparams_init)
    else:
        Qparams_init_full = [[1.0]]

    # Exponential scaling factor for Qparams
    scaling_factor = pymc.Exponential(name="scaling_factor", beta=1.0, value=1.0)

    # Scaled Qparams; we would not expect them to necessarily add
    # to 1 as would be the case in a Dirichlet distribution
    @pymc.deterministic(plot=False)
    def Qparams(q=Qparams_init_full, s=scaling_factor):
        Qs = np.empty(N)
        for i in range(N):
            Qs[i] = q[0][i]*s
        return Qs

    l = mk.create_likelihood_function_mk(tree=tree, chars=chars, Qtype=Qtype,
                                  pi="Equal", findmin=False)
    @pymc.potential
    def mklik(q = Qparams, name="mklik"):
        return l(q)
    return locals()
예제 #16
0
    def test_transformed(self):
        n = 18
        at_bats = 45 * np.ones(n, dtype=int)
        hits = np.random.randint(1, 40, size=n, dtype=int)
        draws = 50

        with pm.Model() as model:
            phi = pm.Beta("phi", alpha=1.0, beta=1.0)

            kappa_log = pm.Exponential("logkappa", lam=5.0)
            kappa = pm.Deterministic("kappa", at.exp(kappa_log))

            thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, size=n)

            y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits)
            gen = pm.sample_prior_predictive(draws)

        assert gen.prior["phi"].shape == (1, draws)
        assert gen.prior_predictive["y"].shape == (1, draws, n)
        assert "thetas" in gen.prior.data_vars
예제 #17
0
def make_model():
    import cPickle as pickle
    with open('reaction_kinetics_data.pickle', 'rb') as fd:
        data = pickle.load(fd)
    y_obs = data['y_obs']
    # The priors for the reaction rates:
    k1 = pymc.Lognormal('k1', mu=2, tau=1./(10. ** 2), value=5.)
    k2 = pymc.Lognormal('k2', mu=4, tau=1./(10. ** 2), value=5.)
    # The noise term
    #sigma = pymc.Uninformative('sigma', value=1.)
    sigma = pymc.Exponential('sigma', beta=1.)
    # The forward model
    re_solver = ReactionKineticsSolver()
    @pymc.deterministic
    def model_output(value=None, k1=k1, k2=k2):
        return re_solver(k1, k2)
    # The likelihood term
    @pymc.stochastic(observed=True)
    def output(value=y_obs, mod_out=model_output, sigma=sigma, gamma=1.):
        return gamma * pymc.normal_like(y_obs, mu=mod_out, tau=1/sigma ** 2)
    return locals()
예제 #18
0
def make_poisson(n, intvl, mean0):
    """
    Make a PyMC model for inferring a Poisson distribution rate parameter,
    for a datum consisting of `n` counts observed in an interval of size
    `intvl`.  The inference will use an exponential prior for the rate,
    with prior mean `mean0`.
    """

    # PyMC's exponential dist'n uses beta = 1/scale = 1/mean.
    # Here we initialize rate to n/intvl.
    rate = pymc.Exponential('rate', beta=1./mean0, value=float(n)/intvl)

    # The expected number of counts, mu=rate*intvl, is a deterministic function
    # of the rate RV (and the constant intvl).
    @pymc.deterministic
    def mu(rate=rate):
        return rate*intvl

    # Poisson likelihood function:
    count = pymc.Poisson('count', mu=mu, value=n, observed=True)

    return locals()
import pymc as pm
import numpy  as np
import matplotlib.pyplot as plt

parameter = pm.Exponential( "poisson_param", 1 )
data_generator = pm.Poisson( "data_generator", parameter )
data_plus_one = data_generator + 1

# 'parents' influence another variable
# 'children' subject of parent vars
parameter.children
data_generator.parents
data_generator.children

# 'value' attribute
parameter.value
data_generator.value
data_plus_one.value

# 'stochastic' vars - still random even if parents are known
# 'deterministic' vars - not random if parents are known
# Initializing variables
#   * name argument - retrieves posterior dist 
#   * class specific arguments
#   * size - multivariate indp array of stochastic vars

some_var = pm.DiscreteUniform( "discrete_uni_var", 0, 4 )

betas = pm.Uniform( "betas", 0, 1, size=10 )
betas.value
예제 #20
0
import numpy as np
import pymc as pm
from matplotlib import pyplot as plt

#count_data = np.loadtxt("txtdata.csv")
count_data = np.loadtxt("txtdata_sim.csv")

n_count_data = len(count_data)

print(count_data.mean())

alpha = 1.0 / count_data.mean()  # Recall count_data is the
# variable that holds our txt counts

lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)

tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)


@pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
    out = np.zeros(n_count_data)
    out[:tau] = lambda_1  # lambda before tau is lambda1
    out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
    return out


observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)

model = pm.Model([observation, lambda_1, lambda_2, tau])
def compare_groups(list1, list2):

    data = list1 + list2
    count_data = np.array(data)
    n_count_data = len(count_data)
    plt.bar(np.arange(n_count_data), count_data, color="#348ABD")
    plt.xlabel("Time (days)")
    plt.ylabel("count of text-msgs received")
    plt.title(
        "Did the viewers' ad viewing increase with the number of ads shown?")
    plt.xlim(0, n_count_data)
    #plt.show()

    alpha = 1.0 / count_data.mean()  # Recall count_data is the
    # variable that holds our txt counts
    print alpha
    lambda_1 = pm.Exponential("lambda_1", alpha)
    lambda_2 = pm.Exponential("lambda_2", alpha)

    tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(n_count_data)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
        return out

    observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)

    model = pm.Model([observation, lambda_1, lambda_2, tau])

    mcmc = pm.MCMC(model)
    mcmc.sample(40000, 10000, 1)

    lambda_1_samples = mcmc.trace('lambda_1')[:]
    lambda_2_samples = mcmc.trace('lambda_2')[:]
    tau_samples = mcmc.trace('tau')[:]

    print tau_samples
    # histogram of the samples:

    ax = plt.subplot(311)
    ax.set_autoscaley_on(False)

    plt.hist(lambda_1_samples,
             histtype='stepfilled',
             bins=30,
             alpha=0.85,
             label="posterior of $\lambda_1$",
             color="#A60628",
             normed=True)
    plt.legend(loc="upper left")
    plt.title(r"""Posterior distributions of the variables
        $\lambda_1,\;\lambda_2,\;\tau$""")
    plt.xlim([0, 6])
    plt.ylim([0, 7])
    plt.xlabel("$\lambda_1$ value")

    ax = plt.subplot(312)
    ax.set_autoscaley_on(False)
    plt.hist(lambda_2_samples,
             histtype='stepfilled',
             bins=30,
             alpha=0.85,
             label="posterior of $\lambda_2$",
             color="#7A68A6",
             normed=True)
    plt.legend(loc="upper left")
    plt.xlim([0, 6])
    plt.ylim([0, 7])
    plt.xlabel("$\lambda_2$ value")

    plt.subplot(313)
    w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples)
    plt.hist(tau_samples,
             bins=n_count_data,
             alpha=1,
             label=r"posterior of $\tau$",
             color="#467821",
             weights=w,
             rwidth=2.)
    plt.xticks(np.arange(n_count_data))

    plt.legend(loc="upper left")
    plt.ylim([0, .75])
    plt.xlim([0, len(count_data)])
    plt.xlabel(r"$\tau$ (iterations)")
    plt.ylabel("probability")

    plt.show()
예제 #22
0
    plt.plot(stormsYears, stormsNumbers, '-ok')
    plt.xlim(year0, year1)
    plt.xlabel("Рік")
    plt.ylabel("Кількість штормів")
    general.set_grid_to_plot()
    plt.savefig(general.folderPath2 + "exp2_storms2.png")
    plt.clf()

    switchpoint = pm.DiscreteUniform('switchpoint',
                                     lower=0,
                                     upper=len(stormsNumbers) - 1,
                                     doc='Switchpoint[year]')

    avg = np.mean(stormsNumbers)
    early_mean = pm.Exponential('early_mean', beta=1./avg)
    late_mean = pm.Exponential('late_mean', beta=1./avg)

    @ pm.deterministic(plot=False)
    def rate(s=switchpoint, e=early_mean, l=late_mean):
        # Concatenate Poisson means
        out = np.zeros(len(stormsNumbers))
        out[:s] = e
        out[s:] = l
        return out

    storms = pm.Poisson('storms',
                        mu=rate,
                        value=stormsNumbers,
                        observed=True)
예제 #23
0
    0.23972844, -0.78645389, -0.21687104, -0.2939634, 0.51229013, 0.04626286,
    0.18329919, -1.12775839, -1.64187249, 0.33440094, -0.95224695, 0.15650266,
    -0.54056102, 0.12240128, -0.95397459, 0.44806432, -1.02955556, 0.31740861,
    -0.8762523, 0.47377688, 0.76516415, 0.27890419, -0.07819642, -0.13399348,
    0.82877293, 0.22308624, 0.7485783, -0.14700254, -1.03145657, 0.85641097,
    0.43396285, 0.47901653, 0.80137086, 0.33566812, 0.71443253, -1.57590815,
    -0.24090179, -2.0128344, 0.34503324, 0.12944091, -1.5327008, 0.06363034,
    0.21042021, -0.81425636, 0.20209279, -1.48130423, -1.04983523, 0.16001774,
    -0.75239072, 0.33427956, -0.10224921, 0.26463561, -1.09374674, -0.72749811,
    -0.54892116, -1.89631844, -0.94393545, -0.2521341, 0.26840341, 0.23563219,
    0.35333094
])

# Model: the data are truncated-normally distributed with unknown upper bound.
mu = pm.Normal('mu', 0, .01, value=0)
tau = pm.Exponential('tau', .01, value=1)
cutoff = pm.Exponential('cutoff', 1, value=1.3)
D = pm.TruncatedNormal('D',
                       mu,
                       tau,
                       -np.inf,
                       cutoff,
                       value=data,
                       observed=True)

M = pm.MCMC([mu, tau, cutoff, D])

# Use a TruncatedMetropolis step method that will never propose jumps below D's maximum value.
M.use_step_method(TruncatedMetropolis, cutoff, D.value.max(), np.inf)
# Get a handle to the step method handling cutoff to investigate its behavior.
S = M.step_method_dict[cutoff][0]
예제 #24
0
def mk_multi_bayes(tree, chars,nregime,qidx, pi="Equal" ,seglen=0.02,stepsize=0.05):
    """
    Create a Bayesian multi-mk model. User specifies which regime models
    to use and the Bayesian model finds the switchpoints.

    Args:
        tree (Node): Root node of tree.
        chars (dict): Dict mapping tip labels to discrete character
          states. Character states must be in the form of [0,1,2...]

        regime (int): The number of distinct regimes to test. Set to
          1 for an Mk model, set to greater than 1 for a multi-regime Mk model.
        qidx (np.array): Index specifying the model to test

            columns:
                0, 1, 2 - index axes of q
                3 - index of params
            This scheme allows flexible specification of models. E.g.:
            Symmetric mk2:
                params = [0.2]; qidx = [[0,0,1,0],
                                        [0,1,0,0]]

            Asymmetric mk2:
                params = [0.2,0.6]; qidx = [[0,0,1,0],
                                            [0,1,0,1]]
           NOTE:
             The qidx corresponding to the first q matrix (first column 0)
             is always the root regime
        pi (str or np.array): Option to weight the root node by given values.
           Either a string containing the method or an array
           of weights. Weights should be given in order.

           Accepted methods of weighting root:

           Equal: flat prior
           Equilibrium: Prior equal to stationary distribution
             of Q matrix
           Fitzjohn: Root states weighted by how well they
             explain the data at the tips.
        seglen (float): Size of segments to break tree into. The smaller this
          value, the more "fine-grained" the analysis will be. Optional,
          defaults to 2% of the root-to-tip length.
        stepsize (float): Maximum size of steps for switchpoints to take.
          Optional, defaults to 5% of root-to-tip length.


    """
    if type(chars) == dict:
        data = chars.copy()
        chars = [chars[l] for l in [n.label for n in tree.leaves()]]
    else:
        data = dict(zip([n.label for n in tree.leaves()],chars))
    # Preparations
    nchar = len(set(chars))
    nparam = len(set([n[-1] for n in qidx]))
    # This model has 2 components: Q parameters and switchpoints
    # They are combined in a custom likelihood function
    ###########################################################################
    # Switchpoint:
    ###########################################################################
    # Modeling the movement of the regime shift(s) is the tricky part
    # Regime shifts will only be allowed to happen at a node
    seg_map = tree_map(tree,seglen)
    switch = [None]*(nregime-1)
    for regime in range(nregime-1):
        switch[regime]= make_switchpoint_stoch(seg_map, name=str("switch_{}".format(regime)))
    ###########################################################################
    # Qparams:
    ###########################################################################
    # Each Q parameter is an exponential
    Qparams = [None] * nparam
    for i in range(nparam):
         Qparams[i] = pymc.Exponential(name=str("Qparam_{}".format(i)), beta=1.0, value=0.1*(i+1))


    ###########################################################################
    # Likelihood
    ###########################################################################
    # The likelihood function
    l = cyexpokit.make_mklnl_func(tree, data,nchar,nregime,qidx)

    @pymc.deterministic
    def likelihood(q = Qparams, s=switch,name="likelihood"):
        return l(np.array(q),np.array([x[0].ni for x in s],dtype=np.intp),np.array([x[1] for x in s]))

    @pymc.potential
    def multi_mklik(lnl=likelihood):
        if not (np.isnan(lnl)):
            return lnl
        else:
            return -np.inf
    mod = pymc.MCMC(locals())
    for s in switch:
        mod.use_step_method(SwitchpointMetropolis, s, tree, seg_map,stepsize=stepsize,seglen=seglen)
    return mod
예제 #25
0
def run_mcmc(gp, img, compare_img, transverse_sigma=1.0, motion_angle=0.0):
    """Estimate PSF using Markov Chain Monte Carlo

    gp - Gaussian priors - array of N objects with attributes
                           a, b, sigma

    img  - image to apply PSF to
    compare_img - comparison image
    transverse_sigma - prior
    motion_angle - prior


    Model a Point Spread Function consisting of the sum of N
    collinear Gaussians, blurred in the transverse direction
    and the result rotated.  Each of the collinear Gauusians
    is parameterized by a (amplitude), b (center), and sigma (std. deviation). 

    The Point Spread Function is applied to the image img
    and the result compared with the image  compare_img.
    """

    print "gp.shape", gp.shape
    print "gp", gp

    motion_angle = np.deg2rad(motion_angle)
    motion_angle = pm.VonMises("motion_angle",
                               motion_angle,
                               1.0,
                               value=motion_angle)

    transverse_sigma = pm.Exponential("transverse_sigma",
                                      1.0,
                                      value=transverse_sigma)
    N = gp.shape[0]

    mixing_coeffs = pm.Exponential("mixing_coeffs", 1.0, size=N)
    #mixing_coeffs.set_value(gp['a'])
    mixing_coeffs.value = gp['a']
    longitudinal_sigmas = pm.Exponential("longitudinal_sigmas", 1.0, size=N)
    #longitudinal_sigmas.set_value(gp['sigma'])
    longitudinal_sigmas.value = gp['sigma']

    b = np.array(sorted(gp['b']), dtype=float)
    cut_points = (b[1:] + b[:-1]) * 0.5
    long_means = [None] * b.shape[0]
    print long_means
    left_mean = pm.Gamma("left_mean", 1.0, 2.5 * gp['sigma'][0])
    long_means[0] = cut_points[0] - left_mean
    right_mean = pm.Gamma("right_mean", 1.0, 2.5 * gp['sigma'][-1])
    long_means[-1] = cut_points[-1] + right_mean
    for ix in range(1, N - 1):
        long_means[ix] = pm.Uniform("mid%d_mean" % ix,
                                    lower=cut_points[ix - 1],
                                    upper=cut_points[ix])
    print "cut_points", cut_points
    print "long_means", long_means

    #longitudinal_means = pm.Normal("longitudinal_means", 0.0, 0.04, size=N)
    #longitudinal_means.value = gp['b']

    dtype = np.dtype([('a', np.float), ('b', np.float), ('sigma', np.float)])

    @pm.deterministic
    def psf(mixing_coeffs=mixing_coeffs, longitudinal_sigmas=longitudinal_sigmas, \
            longitudinal_means=long_means, transverse_sigma=transverse_sigma, motion_angle=motion_angle):
        gp = np.ones((N, ), dtype=dtype)
        gp['a'] = mixing_coeffs
        gp['b'] = longitudinal_means
        gp['sigma'] = longitudinal_sigmas
        motion_angle_deg = np.rad2deg(motion_angle)
        if True:
            print "gp: a", mixing_coeffs
            print "    b", longitudinal_means
            print "    s", longitudinal_sigmas
            print "tr-sigma", transverse_sigma, "angle=", motion_angle_deg
        return generate_sum_gauss(gp, transverse_sigma, motion_angle_deg)

    @pm.deterministic
    def image_fitness(psf=psf, img=img, compare_img=compare_img):
        img_convolved = ndimage.convolve(img, psf)
        img_diff = img_convolved.astype(int) - compare_img
        return img_diff.std()

    if False:
        trial_psf = generate_sum_gauss(gp,
                                       2.0,
                                       50.0,
                                       plot_unrot_kernel=True,
                                       plot_rot_kernel=True,
                                       verbose=True)
        print "trial_psf", trial_psf.min(), trial_psf.mean(), trial_psf.max(
        ), trial_psf.std()
        obs_psf = pm.Uniform("obs_psf",
                             lower=-1.0,
                             upper=1.0,
                             doc="Point Spread Function",
                             value=trial_psf,
                             observed=True,
                             verbose=False)

    print "image_fitness value started at", image_fitness.value
    known_fitness = pm.Exponential("fitness",
                                   image_fitness + 0.001,
                                   value=0.669,
                                   observed=True)

    #mcmc = pm.MCMC([motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas, longitudinal_means, image_fitness, known_fitness], verbose=2)
    mcmc = pm.MCMC([
        motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas,
        image_fitness, known_fitness, left_mean, right_mean
    ] + long_means,
                   verbose=2)
    pm.graph.dag(mcmc, format='png')
    plt.show()
    #mcmc.sample(20000, 1000)
    mcmc.sample(2000)

    motion_angle_samples = mcmc.trace("motion_angle")[:]
    transverse_sigma_samples = mcmc.trace("transverse_sigma")[:]
    image_fitness_samples = mcmc.trace("image_fitness")[:]

    best_fit = np.percentile(image_fitness_samples, 1.0)
    best_fit_selection = image_fitness_samples < best_fit

    print mcmc.db.trace_names
    for k in [k for k in mcmc.stats().keys() if k != "known_fitness"]:
        #samples = mcmc.trace(k)[:]
        samples = mcmc.trace(k).gettrace()
        print samples.shape
        selected_samples = samples[best_fit_selection]
        print k, samples.mean(axis=0), samples.std(axis=0), \
            selected_samples.mean(axis=0), selected_samples.std(axis=0)

    ax = plt.subplot(211)
    plt.hist(motion_angle_samples,
             histtype='stepfilled',
             bins=25,
             alpha=0.85,
             label="posterior of $p_\\theta$",
             color="#A60628",
             normed=True)
    plt.legend(loc="upper right")
    plt.title("Posterior distributions of $p_\\theta$, $p_\\sigma$")

    ax = plt.subplot(212)
    plt.hist(transverse_sigma_samples,
             histtype='stepfilled',
             bins=25,
             alpha=0.85,
             label="posterior of $p_\\sigma$",
             color="#467821",
             normed=True)
    plt.legend(loc="upper right")
    plt.show()

    for k, v in mcmc.stats().iteritems():
        print k, v
    # deprecated?  use discrepancy...  print mcmc.goodness()
    mcmc.write_csv("out.csv")
    pm.Matplot.plot(mcmc)
    plt.show()
예제 #26
0
    def time_drug_evaluation(self):
        # fmt: off
        drug = np.array([
            101, 100, 102, 104, 102, 97, 105, 105, 98, 101, 100, 123, 105, 103,
            100, 95, 102, 106, 109, 102, 82, 102, 100, 102, 102, 101, 102, 102,
            103, 103, 97, 97, 103, 101, 97, 104, 96, 103, 124, 101, 101, 100,
            101, 101, 104, 100, 101
        ])
        placebo = np.array([
            99, 101, 100, 101, 102, 100, 97, 101, 104, 101, 102, 102, 100, 105,
            88, 101, 100, 104, 100, 100, 100, 101, 102, 103, 97, 101, 101, 100,
            101, 99, 101, 100, 100, 101, 100, 99, 101, 100, 102, 99, 100, 99
        ])
        # fmt: on

        y = pd.DataFrame({
            "value":
            np.r_[drug, placebo],
            "group":
            np.r_[["drug"] * len(drug), ["placebo"] * len(placebo)],
        })
        y_mean = y.value.mean()
        y_std = y.value.std() * 2

        sigma_low = 1
        sigma_high = 10
        with pm.Model():
            group1_mean = pm.Normal("group1_mean", y_mean, sd=y_std)
            group2_mean = pm.Normal("group2_mean", y_mean, sd=y_std)
            group1_std = pm.Uniform("group1_std",
                                    lower=sigma_low,
                                    upper=sigma_high)
            group2_std = pm.Uniform("group2_std",
                                    lower=sigma_low,
                                    upper=sigma_high)
            lambda_1 = group1_std**-2
            lambda_2 = group2_std**-2

            nu = pm.Exponential("ν_minus_one", 1 / 29.0) + 1

            pm.StudentT("drug",
                        nu=nu,
                        mu=group1_mean,
                        lam=lambda_1,
                        observed=drug)
            pm.StudentT("placebo",
                        nu=nu,
                        mu=group2_mean,
                        lam=lambda_2,
                        observed=placebo)
            diff_of_means = pm.Deterministic("difference of means",
                                             group1_mean - group2_mean)
            pm.Deterministic("difference of stds", group1_std - group2_std)
            pm.Deterministic(
                "effect size", diff_of_means / np.sqrt(
                    (group1_std**2 + group2_std**2) / 2))
            pm.sample(draws=20000,
                      cores=4,
                      chains=4,
                      progressbar=False,
                      compute_convergence_checks=False)
예제 #27
0
def rn_model_load(analysis_frequencies, analysis_power):

#    __all__ = ['analysis_power', 'analysis_frequencies', 'power_law_index',
#           'power_law_norm', 'power_law_spectrum', 'spectrum']

    
    estimate = rn_utils.do_simple_fit(analysis_frequencies, analysis_power)
    
    c_estimate = estimate[0]
    m_estimate = estimate[1]
    
    # Define data and stochastics
    @pymc.stochastic
    power_law_index = pymc.Uniform('power_law_index',
                                   value=m_estimate,
                                   lower=0.0,
                                   upper=m_estimate + 2,
                                   doc='power law index')
    @pymc.stochastic
    power_law_norm = pymc.Uniform('power_law_norm',
                                  value=c_estimate,
                                  lower=c_estimate * 0.8,
                                  upper=c_estimate * 1.2,
                                  doc='power law normalization')
    
    
    # Model for the power law spectrum
    @pymc.deterministic(plot=False)
    def power_law_spectrum(p=power_law_index,
                           a=power_law_norm,
                           f=analysis_frequencies):
        """A pure and simple power law model"""
        out = a * (f ** (-p))
        return out
    
    #@pymc.deterministic(plot=False)
    #def power_law_spectrum_with_constant(p=power_law_index, a=power_law_norm,
    #                                     c=constant, f=frequencies):
    #    """Simple power law with a constant"""
    #    out = empty(frequencies)
    #    out = c + a/(f**p)
    #    return out
    
    #@pymc.deterministic(plot=False)
    #def broken_power_law_spectrum(p2=power_law_index_above,
    #                              p1=power_law_index_below,
    #                              bf=break_frequency,
    #                              a=power_law_norm,
    #                              f=analysis_frequencies):
    #    """A broken power law model"""
    #    out = np.empty(len(f))
    #    out[f < bf] = a * (f[f < bf] ** (-p1))
    #    out[f > bf] = a * (f[f >= bf] ** (-p2)) * bf ** (p2 - p1)
    #    return out
    
    # This is the PyMC model we will use: fits the model defined in
    # beta=1.0 / model to the power law spectrum we are analyzing
    # value=analysis_power
    
    spectrum = pymc.Exponential('spectrum',
                           beta=1.0 / power_law_spectrum,
                           value=analysis_power,
                           observed=True)
    return locals()
예제 #28
0
        for v in range(len(gap)):
            ret.append(chunk[v] + gap[v])
        return ret

    return predict


predictions = [
    mc.Deterministic(eval=chunkPrediction(i),
                     name='chunk%sPrediction' % i,
                     parents={'gap': gaps[i]},
                     doc='chunk %s prediction' % i)
    for i in range(len(chunks) - 1)
]

noise = mc.Exponential('noise', 1, 1)

observations = [
    mc.Normal('chunk%sObservation' % i,
              predictions[i - 1],
              noise,
              value=chunks[i - 1]['mouth'],
              observed=True) for i in range(1, len(chunks))
]


@mc.deterministic
def mouthChanges(early=mouthFactor[0], late=mouthFactor[1]):
    return late - early

예제 #29
0
def bayes_ttest(groups=None, N=40, show=False):
    """
  Run a Bayesian t-test on sample or true data.
  """
    if groups is None:  # Generate some data
        group1, group2 = gen_data(N=40)
    elif len(groups) != 2:
        print('T-test requires only 2 groups, not %i' % len(groups))
        return None
    else:
        group1, group2 = groups

    pooled = np.concatenate((group1, group2))  # Pooled data
    # Establish priors
    mu1 = pm.Normal("mu_1", mu=pooled.mean(), tau=1.0 / pooled.var() / N)
    mu2 = pm.Normal("mu_2", mu=pooled.mean(), tau=1.0 / pooled.var() / N)
    sig1 = pm.Uniform("sigma_1",
                      lower=pooled.var() / 1000.0,
                      upper=pooled.var() * 1000)
    sig2 = pm.Uniform("sigma_2",
                      lower=pooled.var() / 1000.0,
                      upper=pooled.var() * 1000)
    v = pm.Exponential("nu", beta=1.0 / 29)

    # Set up posterior distribution
    t1 = pm.NoncentralT("t_1",
                        mu=mu1,
                        lam=1.0 / sig1,
                        nu=v,
                        value=group1,
                        observed=True)
    t2 = pm.NoncentralT("t_1",
                        mu=mu2,
                        lam=1.0 / sig2,
                        nu=v,
                        value=group2,
                        observed=True)

    # Generate the model
    model = pm.Model([t1, mu1, sig1, t2, mu2, sig2, v])  # Push priors
    mcmc = pm.MCMC(model)  # Generate MCMC object
    mcmc.sample(40000, 10000, 2)  # Run MCMC sampler # "trace"

    # Get the numerical results
    mus1 = mcmc.trace('mu_1')[:]
    mus2 = mcmc.trace('mu_2')[:]
    sigmas1 = mcmc.trace('sigma_1')[:]
    sigmas2 = mcmc.trace('sigma_2')[:]
    nus = mcmc.trace('nu')[:]
    diff_mus = mus1 - mus2  # Difference in mus
    diff_sigmas = sigmas1 - sigmas2
    normality = np.log(nus)
    effect_size = (mus1 - mus2) / np.sqrt((sigmas1**2 + sigmas2**2) / 2.)
    print('\n   Group 1 mu: %.4f\n   Group 2 mu: %.4f\n   Effect size: %.4f' %
          (mus1.mean(), mus2.mean(), effect_size.mean()))

    if show:  # Plot some basic metrics if desired
        from pymc.Matplot import plot as mcplot
        # mcplot(mcmc) # This plots 5 graphs, only useful as a benchmark.

        # Finally, what can this tell is about the null hypothesis?
        # Split distribution
        fig2 = plt.figure()
        ax2 = fig2.add_subplot(121)
        minx = min(min(mus1), min(mus2))
        maxx = max(max(mus1), max(mus2))
        xs = np.linspace(minx, maxx, 1000)
        gkde1 = stats.gaussian_kde(mus1)
        gkde2 = stats.gaussian_kde(mus2)
        ax2.plot(xs, gkde1(xs), label='$\mu_1$')
        ax2.plot(xs, gkde2(xs), label='$\mu_2$')
        ax2.set_title('$\mu_1$ vs $\mu_2$')
        ax2.legend()

        # Difference of mus
        ax3 = fig2.add_subplot(122)
        minx = min(diff_mus)
        maxx = max(diff_mus)
        xs = np.linspace(minx, maxx, 1000)
        gkde = stats.gaussian_kde(diff_mus)
        ax3.plot(xs, gkde(xs), label='$\mu_1-\mu_2$')
        ax3.legend()
        ax3.axvline(0, color='#000000', alpha=0.3, linestyle='--')
        ax3.set_title('$\mu_1-\mu_2$')
        plt.show()

    return
예제 #30
0
def exponential_beta(n=2):
    with pm.Model() as model:
        pm.Beta("x", 3, 1, size=n, transform=None)
        pm.Exponential("y", 1, size=n, transform=None)
    return model.compute_initial_point(), model, None