def main():
    tau = pm.rdiscrete_uniform(0, 80)
    print tau

    alpha = 1. / 20.
    lambda_1, lambda_2 = pm.rexponential(alpha, 2)
    print lambda_1, lambda_2

    data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]

    def plot_artificial_sms_dataset():
        tau = pm.rdiscrete_uniform(0, 80)
        alpha = 1. / 20.
        lambda_1, lambda_2 = pm.rexponential(alpha, 2)
        data = np.r_[pm.rpoisson(lambda_1, tau),
                     pm.rpoisson(lambda_2, 80 - tau)]
        plt.bar(np.arange(80), data, color="#348ABD")
        plt.bar(tau - 1,
                data[tau - 1],
                color="r",
                label="user behaviour changed")
        plt.xlim(0, 80)

    plt.title("More example of artificial datasets")
    for i in range(1, 5):
        plt.subplot(4, 1, i)
        plot_artificial_sms_dataset()
    plt.show()
Example #2
0
def plot_artificial_sms_dataset():
    tau = pm.rdiscrete_uniform(0, 80)
    alpha = 1. / 20.
    lambda_1, lambda_2 = pm.rexponential(alpha, 2)
    data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]
    plt.bar(np.arange(80), data, color="#348ABD")
    plt.bar(tau - 1, data[tau - 1], color="r", label="user behaviour changed")
    plt.xlim(0, 80)
def plot_artificial_sms_dataset():
    maxdays = 80
    tau = pm.rdiscrete_uniform( 0, maxdays )
    alpha = 1 / 20.
    lambda_1, lambda_2 = pm.rexponential( alpha, 2 )
    data = np.r_[
            pm.rpoisson( lambda_1, tau ),
            pm.rpoisson( lambda_2, maxdays-tau )]
    plt.bar( np.arange(maxdays), data )
    plt.bar( tau - 1, data[tau-1], color = 'r', label='change point' )
    plt.xlim( 0, 80 )
 def D_N(n):
     """
     This function approx. D_n, the average variance of using n samples.
     """
     Z = pm.rpoisson(lambda_, size=(n, N_Y))
     average_Z = Z.mean(axis=0)
     return np.sqrt(((average_Z - expected_value) ** 2).mean())
def data_gen(samples_n=10, tau_start=75, tau_end=100, gamma=0.1):
    alpha = 1.0 / gamma
    for x in xrange(samples_n):
        tau = pm.rdiscrete_uniform(tau_start, tau_end)
        # lam = pm.rexponential(alpha)
        lam = alpha
        yield pm.rpoisson(lam, tau)
Example #6
0
def plot_artifical_sms_dataset():
    # specify when the user's behaviour (amount of sms received) switches by sampling from DiscreteUniform
    tau = rdiscrete_uniform(0, 80)
    print('τ = {}'.format(tau,))
    alpha = 1. / 20.
    lambda_1, lambda_2 = rexponential(alpha, 2)
    print(lambda_1, lambda_2)

    # for days before tau, repr. the user's received sms count by sampling from a
    # Poisson(lambda_1), and for days after tau by sampling from Poisson(lambda_2)
    data = np.r_[rpoisson(lambda_1, tau), rpoisson(lambda_2, 80 - tau)]
    print(data)


    # plot artificial data set
    pyplot.bar(np.arange(80), data, color="#348ABD")
    pyplot.bar(tau - 1, data[tau - 1], color="r", label="user behaviour changed")
    pyplot.xlabel("time (days)")
    pyplot.ylabel("count of sms received")
    pyplot.xlim(0, 80)
    pyplot.legend()
def main():
    tau = pm.rdiscrete_uniform(0, 80)
    print tau

    alpha = 1. / 20.
    lambda_1, lambda_2 = pm.rexponential(alpha, 2)
    print lambda_1, lambda_2

    data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]

    def plot_artificial_sms_dataset():
        tau = pm.rdiscrete_uniform(0, 80)
        alpha = 1. / 20.
        lambda_1, lambda_2 = pm.rexponential(alpha, 2)
        data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]
        plt.bar(np.arange(80), data, color="#348ABD")
        plt.bar(tau - 1, data[tau - 1], color="r", label="user behaviour changed")
        plt.xlim(0, 80)

    plt.title("More example of artificial datasets")
    for i in range(1, 5):
        plt.subplot(4, 1, i)
        plot_artificial_sms_dataset()
    plt.show()
Example #8
0
def plot_artificail_sms_dataset():
    #----------------------------------
    # initialize both deterministic and stochastic variables
    tau = pm.rdiscrete_uniform(0, 80)
    print("tau = {0}".format(tau))
    alpha = 1. / 20.
    lambda_1, lambda_2 = pm.rexponential(alpha, 2)
    print("lambda_1 = {0}\nlambda_2 = {1}".format(lambda_1, lambda_2))
    lambda_ = np.r_[lambda_1 * np.ones(tau), lambda_2 * np.ones(80 - tau)]
    print("lambda = \n{0}".format(lambda_))
    data = pm.rpoisson(lambda_)
    print("data = \n{0}".format(data))
    #-----------------------------------
    # plot the artificial
    plt.bar(np.arange(80), data, color="#348ABD")
    plt.bar(tau - 1, data[tau - 1], color="r", label="user behavior changed")
    plt.xlabel("Time(days)")
    plt.ylabel("Text messages received")
    plt.xlim(0, 80)
def main():
    sample_size = 100000
    expected_value = lambda_ = 4.5
    N_samples = range(1, sample_size, 100)

    for k in range(3):
        samples = pm.rpoisson(lambda_, size=sample_size)
        partial_average = [samples[:i].mean() for i in N_samples]
        label = "average of  $n$ samples; seq. %d" % k
        plt.plot(N_samples, partial_average, lw=1.5, label=label)

    plt.plot(N_samples,
             expected_value * np.ones_like(partial_average),
             ls="--",
             label="true expected value",
             c="k")

    plt.ylim(4.35, 4.65)
    plt.title("Convergence of the average of \n random variables to its" +
              "expected value")
    plt.ylabel("average of $n$ samples")
    plt.xlabel("# of samples, $n$")
    plt.legend()
    plt.show()
Example #10
0
"""
zip.py

Zero-inflated Poisson example using simulated data.
"""
import numpy as np
from pymc import Uniform, Beta, observed, rpoisson, poisson_like

# True parameter values
mu_true = 5
psi_true = 0.75
n = 100

# Simulate some data
data = np.array(
    [rpoisson(mu_true) * (np.random.random() < psi_true) for i in range(n)])

# Uniorm prior on Poisson mean
mu = Uniform('mu', 0, 20)
# Beta prior on psi
psi = Beta('psi', alpha=1, beta=1)


@observed(dtype=int, plot=False)
def zip(value=data, mu=mu, psi=psi):
    """ Zero-inflated Poisson likelihood """

    # Initialize likeihood
    like = 0.0

    # Loop over data
# 
# Model class - analyze variables as a single unit
model = pm.Model( [obs, lambda_, lambda_1, lambda_2, taus] )

#
# Creating new datasets
#
maxdays = 80
tau = pm.rdiscrete_uniform( 0, maxdays )

alpha = 1 / 20.
lambda_1, lambda_2 = pm.rexponential( alpha, 2 )

data = np.r_[
        pm.rpoisson( lambda_1, tau ),
        pm.rpoisson( lambda_2, maxdays-tau )]

plt.bar( np.arange(maxdays), data )
plt.bar( tau - 1, data[tau-1], color = 'r', label='change point' )
plt.xlabel( "Time (days)" )
plt.ylabel( "count" )
plt.title( "Artificial Data" )
plt.xlim( 0, 80 )
plt.legend()
plt.show()

def plot_artificial_sms_dataset():
    maxdays = 80
    tau = pm.rdiscrete_uniform( 0, maxdays )
    alpha = 1 / 20.
def rbivariate_poisson(l_1,l_2,l_3):
    l_1 = max(l_1,eps)
    l_2 = max(l_2,eps)
    l_3 = max(l_3,eps)
    x = pymc.rpoisson(l_3)
    return [pymc.rpoisson(l_1)+x,pymc.rpoisson(l_2)+x]
Example #13
0
 def p_pred(pi=pi, n=n_nonzero):
     return mc.rpoisson((pi * n).clip(1.0e-9, pl.inf)) / (1.0 * n)
Example #14
0
#!/usr/bin/env python
"""
zip.py

Zero-inflated Poisson example using simulated data.
"""
import numpy as np
from pymc import Uniform, Beta, observed, rpoisson, poisson_like

# True parameter values
mu_true = 5
psi_true = 0.75
n = 100

# Simulate some data
data = np.array([rpoisson(mu_true) * (np.random.random() < psi_true)
                 for i in range(n)])

# Uniorm prior on Poisson mean
mu = Uniform('mu', 0, 20)
# Beta prior on psi
psi = Beta('psi', alpha=1, beta=1)


@observed(dtype=int, plot=False)
def zip(value=data, mu=mu, psi=psi):
    """ Zero-inflated Poisson likelihood """

    # Initialize likeihood
    like = 0.0
import numpy as np
import pymc as pm
from matplotlib import pyplot as plt

alpha = 1. / 20.
lambda_ = pm.rexponential(alpha)
print(lambda_)

data = np.r_[pm.rpoisson(lambda_, 80)]

np.savetxt("txtdata_sim.csv", data)

plt.bar(np.arange(80), data, color="#348ABD")
plt.xlabel("Time (days)")
plt.ylabel("count of text-msgs received")
plt.title("Artificial dataset")
plt.xlim(0, 80)

plt.show()

Example #16
0
 def p_pred(pi=pi, n=n_nonzero):
     return mc.rpoisson((pi * n).clip(1.e-9, pl.inf)) / (1. * n)
Example #17
0
####################################################
#### 모델에 관측 포함 ####
figsize = (12.5, 4)
plt.figure(figsize=figsize)
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.dpi'] = 300
samples = [ld1.random() for i in range(20000)]
plt.hist(samples, bins=70, normed=True, histtype="stepfilled")
plt.xlim(0, 8)
plt.show()

# 고정 밸류
data = np.array([10, 25, 15, 20, 35])
obs = pm.Poisson("obs", lambda_, value=data, observed=True)
obs.value

##################
##### 모델링 #####

tau = pm.rdiscrete_uniform(0, 80)
alpha = 1. / 20.
lambda_1, lambda_2 = pm.rexponential(alpha, 2)
lambda_ = np.r_[lambda_1 * np.ones(tau), lambda_2 * np.ones(80 - tau)]
data = pm.rpoisson(lambda_)
plt.bar(np.arange(80), data, color="#348ABD")
plt.bar(tau - 1, data[tau - 1], color='r', label='행동변화')
plt.xlable("time")
plt.ylabel("message")
plt.xlim(0, 80)
plt.legend()
Example #18
0
def pred(pi=pi):
    return mc.rpoisson(pi * n_pred) / float(n_pred)
Example #19
0
def pred(pi=pi):
    return mc.rpoisson(pi*n_pred) / float(n_pred)
import numpy as np
import pymc as pm
from matplotlib import pyplot as plt

tau = pm.rdiscrete_uniform(0, 80)
print(tau)

alpha = 1. / 20.
lambda_1, lambda_2 = pm.rexponential(alpha, 2)
print(lambda_1, lambda_2)

data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]

plt.bar(np.arange(80), data, color="#348ABD")
plt.bar(tau - 1, data[tau - 1], color="r", label="user behaviour changed")
plt.xlabel("Time (days)")
plt.ylabel("count of text-msgs received")
plt.title("Artificial dataset")
plt.xlim(0, 80)
plt.legend();

plt.show()

Example #21
0
def validate_rate_model(rate_type='neg_binom', data_type='epilepsy', replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)
    
    # load data
    model = dismod3.data.load('/home/j/Project/dismod/output/dm-32377/')

    data = model.get_data('p')

    #data = data.ix[:20, :]
    
    # replace data with synthetic data if requested
    if data_type == 'epilepsy':
        # no replacement needed
        pass

    elif data_type == 'schiz':
        import pandas as pd
        data = pd.read_csv('/homes/abie/gbd_dev/gbd/tests/schiz.csv')
    
    elif data_type == 'binom':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rbinomial(N, mu, size=len(data.index)) / N

    elif data_type == 'poisson':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rpoisson(N*mu, size=len(data.index)) / N

    elif data_type == 'normal':
        mu = data['value'].mean()
        sigma = .125*mu
        data['standard_error'] = sigma
        data['value'] = mc.rnormal(mu, sigma**-2, size=len(data.index))

    elif data_type == 'log_normal':
        mu = data['value'].mean()
        sigma = .25
        data['standard_error'] = sigma*mu
        data['value'] = pl.exp(mc.rnormal(pl.log(mu), sigma**-2, size=len(data.index)))

    else:
        raise TypeError, 'Unknown data type "%s"' % data_type

    # sample prevalence data
    i_test = mc.rbernoulli(.25, size=len(data.index))
    i_nan = pl.isnan(data['effective_sample_size'])
    
    data['lower_ci'] = pl.nan
    data['upper_ci'] = pl.nan
    data.ix[i_nan, 'effective_sample_size'] = 0.
    data['standard_error'] = pl.sqrt(data['value']*(1-data['value'])) / data['effective_sample_size']
    data.ix[pl.isnan(data['standard_error']), 'standard_error'] = pl.inf

    data['standard_error'][i_test] = pl.inf
    data['effective_sample_size'][i_test] = 0.

    data['value'] = pl.maximum(data['value'], 1.e-12)

    model.input_data = data


    # create model
    # TODO: set parameters in model.parameters['p'] dict
    # then have simple method to create age specific rate model
    #model.parameters['p'] = ...
    #model.vars += dismod3.ism.age_specific_rate(model, 'p')

    model.parameters['p']['parameter_age_mesh'] = [0,100]
    model.parameters['p']['heterogeneity'] = 'Very'
    model.vars['p'] = dismod3.data_model.data_model(
        'p', model, 'p',
        'all', 'total', 'all',
        None, None, None,
        rate_type=rate_type,
        interpolation_method='zero',
        include_covariates=False)
    
    # add upper bound on sigma in log normal model to help convergence
    #if rate_type == 'log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = 1.5

    # add upper bound on sigma, zeta in offset log normal
    #if rate_type == 'offset_log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = .1
    #    model.vars['p']['p_zeta'].value = 5.e-9
    #    model.vars['p']['p_zeta'].parents['upper'] = 1.e-8

    # fit model
    dismod3.fit.fit_asr(model, 'p', iter=20000, thin=10, burn=10000)
    #dismod3.fit.fit_asr(model, 'p', iter=100, thin=1, burn=0)

    # compare estimate to hold-out
    data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    data['lb_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,0]
    data['ub_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,1]

    import data_simulation
    model.test = data[i_test]
    data = model.test
    data['true'] = data['value']
    data_simulation.add_quality_metrics(data)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'test')
    data_simulation.finalize_results(model)


    return model
plt.title("Prior distribution for $\lambda_1$")
plt.xlim(0, 8);

# Take the case of the sms data in the previous chapter, knowing what we do
# about parent and child variables and taking an omniscient view on the data
# and determining a modeling procedure we can work backwards to create the 
# data mimicing the expected creation of the data. i.e.

tau = pm.rdiscrete_uniform(0, 80)
print( tau )

alpha = 1. / 20.
lambda_1, lambda_2 = pm.rexponential(alpha, 2)
print( lambda_1, lambda_2)

data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]

# Plot the distribution

plt.bar(np.arange(80), data, color="#348ABD")
plt.bar(tau - 1, data[tau - 1], color="r", label="user behaviour changed")
plt.xlabel("Time (days)")
plt.ylabel("count of text-msgs received")
plt.title("Artificial dataset")
plt.xlim(0, 80)
plt.legend();

# This becomes important, I assume when we start checking to see if our
# inference was indeed correct. If we were to create a function then this
# would be the case:
def disasters_sim(early_mean=early_mean,
                  late_mean=late_mean,
                  switchpoint=switchpoint):
    """Coal mining disasters sampled from the posterior predictive distribution"""
    return concatenate((pm.rpoisson(early_mean, size=switchpoint), pm.rpoisson(late_mean, size=n - switchpoint)))
Example #24
0
def rbivariate_poisson(l_1, l_2, l_3):
    l_1 = max(l_1, eps)
    l_2 = max(l_2, eps)
    l_3 = max(l_3, eps)
    x = pymc.rpoisson(l_3)
    return [pymc.rpoisson(l_1) + x, pymc.rpoisson(l_2) + x]
Example #25
0
def disasters_sim(early_mean=early_mean,
                  late_mean=late_mean,
                  switchpoint=switchpoint):
    """Coal mining disasters sampled from the posterior predictive distribution"""
    return concatenate((pm.rpoisson(early_mean, size=switchpoint),
                        pm.rpoisson(late_mean, size=n - switchpoint)))
Example #26
0
#!/usr/bin/env python
"""
zip.py

Zero-inflated Poisson example using simulated data.
"""
import numpy as np
from pymc import Uniform, Beta, observed, rpoisson, poisson_like

# True parameter values
mu_true = 5
psi_true = 0.75
n = 100

# Simulate some data
data = np.array([rpoisson(mu_true) * (np.random.random() < psi_true)
                 for i in range(n)])

# Uniorm prior on Poisson mean
mu = Uniform('mu', 0, 20)
# Beta prior on psi
psi = Beta('psi', alpha=1, beta=1)


@observed(dtype=int, plot=False)
def zip(value=data, mu=mu, psi=psi):
    """ Zero-inflated Poisson likelihood """

    # Initialize likeihood
    like = 0.0
Example #27
0
def validate_rate_model(rate_type='neg_binom',
                        data_type='epilepsy',
                        replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)

    # load data
    model = dismod3.data.load('/home/j/Project/dismod/output/dm-32377/')

    data = model.get_data('p')

    #data = data.ix[:20, :]

    # replace data with synthetic data if requested
    if data_type == 'epilepsy':
        # no replacement needed
        pass

    elif data_type == 'schiz':
        import pandas as pd
        data = pd.read_csv('/homes/abie/gbd_dev/gbd/tests/schiz.csv')

    elif data_type == 'binom':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rbinomial(N, mu, size=len(data.index)) / N

    elif data_type == 'poisson':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rpoisson(N * mu, size=len(data.index)) / N

    elif data_type == 'normal':
        mu = data['value'].mean()
        sigma = .125 * mu
        data['standard_error'] = sigma
        data['value'] = mc.rnormal(mu, sigma**-2, size=len(data.index))

    elif data_type == 'log_normal':
        mu = data['value'].mean()
        sigma = .25
        data['standard_error'] = sigma * mu
        data['value'] = pl.exp(
            mc.rnormal(pl.log(mu), sigma**-2, size=len(data.index)))

    else:
        raise TypeError, 'Unknown data type "%s"' % data_type

    # sample prevalence data
    i_test = mc.rbernoulli(.25, size=len(data.index))
    i_nan = pl.isnan(data['effective_sample_size'])

    data['lower_ci'] = pl.nan
    data['upper_ci'] = pl.nan
    data.ix[i_nan, 'effective_sample_size'] = 0.
    data['standard_error'] = pl.sqrt(
        data['value'] * (1 - data['value'])) / data['effective_sample_size']
    data.ix[pl.isnan(data['standard_error']), 'standard_error'] = pl.inf

    data['standard_error'][i_test] = pl.inf
    data['effective_sample_size'][i_test] = 0.

    data['value'] = pl.maximum(data['value'], 1.e-12)

    model.input_data = data

    # create model
    # TODO: set parameters in model.parameters['p'] dict
    # then have simple method to create age specific rate model
    #model.parameters['p'] = ...
    #model.vars += dismod3.ism.age_specific_rate(model, 'p')

    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p']['heterogeneity'] = 'Very'
    model.vars['p'] = dismod3.data_model.data_model(
        'p',
        model,
        'p',
        'all',
        'total',
        'all',
        None,
        None,
        None,
        rate_type=rate_type,
        interpolation_method='zero',
        include_covariates=False)

    # add upper bound on sigma in log normal model to help convergence
    #if rate_type == 'log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = 1.5

    # add upper bound on sigma, zeta in offset log normal
    #if rate_type == 'offset_log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = .1
    #    model.vars['p']['p_zeta'].value = 5.e-9
    #    model.vars['p']['p_zeta'].parents['upper'] = 1.e-8

    # fit model
    dismod3.fit.fit_asr(model, 'p', iter=20000, thin=10, burn=10000)
    #dismod3.fit.fit_asr(model, 'p', iter=100, thin=1, burn=0)

    # compare estimate to hold-out
    data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    data['lb_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,
                                                                            0]
    data['ub_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,
                                                                            1]

    import data_simulation
    model.test = data[i_test]
    data = model.test
    data['true'] = data['value']
    data_simulation.add_quality_metrics(data)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'test')
    data_simulation.finalize_results(model)

    return model