Beispiel #1
0
 def test_zeroinflatedpoisson(self):
     with pm.Model():
         theta = pm.Beta("theta", alpha=1, beta=1)
         psi = pm.HalfNormal("psi", sd=1)
         pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, shape=20)
         gen_data = pm.sample_prior_predictive(samples=5000)
         assert gen_data["theta"].shape == (5000, )
         assert gen_data["psi"].shape == (5000, )
         assert gen_data["suppliers"].shape == (5000, 20)
Beispiel #2
0
def aevb_model():
    with pm.Model() as model:
        pm.HalfNormal('x', shape=(2, ), total_size=5)
        pm.Normal('y', shape=(2, ))
    x = model.x
    y = model.y
    mu = theano.shared(x.init_value)
    rho = theano.shared(np.zeros_like(x.init_value))
    return {'model': model, 'y': y, 'x': x, 'replace': dict(mu=mu, rho=rho)}
Beispiel #3
0
 def test_zeroinflatedpoisson(self):
     with pm.Model():
         theta = pm.Beta('theta', alpha=1, beta=1)
         psi = pm.HalfNormal('psi', sd=1)
         pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20)
         gen_data = pm.sample_prior_predictive(samples=5000)
         assert gen_data['theta'].shape == (5000,)
         assert gen_data['psi'].shape == (5000,)
         assert gen_data['suppliers'].shape == (5000, 20)
def aevb_model():
    with pm.Model() as model:
        pm.HalfNormal("x", shape=(2, ), total_size=5)
        pm.Normal("y", shape=(2, ))
    x = model.x
    y = model.y
    mu = theano.shared(x.init_value)
    rho = theano.shared(np.zeros_like(x.init_value))
    return {"model": model, "y": y, "x": x, "replace": dict(mu=mu, rho=rho)}
    def Metropolis_Hastings(self):
        # True parameter values
        a, b, c = 1, 0, 2
        sigma = 0.01

        # Size of dataset
        size = 100

        # Predictor variable
        X1 = np.random.randn(size)

        # Simulate outcome variable
        Y_obs = a * X1**2 + b * X1 + c + np.random.randn(size) * sigma
        # uni= uniform.rvs(size=size)
        fig, axes = plt.subplots(1, 2, sharex=True, figsize=(10, 4))
        axes[0].scatter(X1, Y_obs)
        axes[0].set_ylabel('Y')
        axes[0].set_xlabel('X1')
        basic_model = pm.Model()
        plt.show()

        with basic_model:
            # Priors for unknown model parameters
            a = pm.Uniform('a',
                           lower=self.sampler['a']['range_min'],
                           upper=self.sampler['a']['range_max'])
            b = pm.Uniform('b',
                           lower=self.sampler['b']['range_min'],
                           upper=self.sampler['b']['range_max'])
            c = pm.Uniform('c',
                           lower=self.sampler['c']['range_min'],
                           upper=self.sampler['c']['range_max'])

            sigma = pm.HalfNormal('sigma', sd=1)

            # Expected value of outcome
            mu = a * X1**2 + b * X1 + c

            # Likelihood (sampling distribution) of observations
            Y_posterior = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y_obs)

            trace = pm.sample(100000, pm.Metropolis())

            # trace = pm.sample(5000)

            # # obtain starting values via MAP
            # start = pm.find_MAP(model=basic_model)
            #
            # # instantiate sampler
            # step = pm.Slice()
            #
            # # draw 5000 posterior samples
            # trace = pm.sample(5000, step=step, start=start)
        _ = pm.traceplot(trace)
        # plt.plot(trace['a'])
        plt.show()
 def train(self, niter = 1000, random_seed=123, tune=500, cores = 4):
     ### model training 
     with self.scallop_model:
         # hyperparameter priors
         l = pm.InverseGamma("l", 5, 5, shape = self.dim)
         sigma_f = pm.HalfNormal("sigma_f", 1)
         
         # convariance function and marginal GP
         K = sigma_f ** 2 * pm.gp.cov.ExpQuad(self.dim, ls = l)
          
         self.gp = pm.gp.Marginal(cov_func=K)
 
         # marginal likelihood
         # convariance function and marginal GP
         sigma_n = pm.HalfNormal("sigma_n",1)
         tot_catch = self.gp.marginal_likelihood("tot_catch", X = self.x, y = self.y, noise = sigma_n)
     
         # model fitting
         self.trace = pm.sample(niter, random_seed=random_seed, progressbar=True, tune=tune, cores = cores)
Beispiel #7
0
def model_ggl(locations, samples, centers, cc):
    basic_model = pm.Model()
    with basic_model:
        # Priors for unknown model parameters
        s1 = pm.HalfNormal('s1', sd=20)
        m1 = centers[0]

        s2 = pm.Normal('s2', sd=20)
        m2 = centers[1]

        m3 = centers[2]
        s3 = pm.HalfNormal('s3', sd=20)

        p_x = gpdf(locations[0], m1, s1)
        p_y = gpdf(locations[1], m2, s2)
        p_theta = lpdf(locations[2], m3, s3)

        sigma = pm.HalfNormal('sigma', sd=1)

        # Expected value of outcome
        mu = cc * p_x * p_y * p_theta

        # Likelihood (sampling distribution) of observations
        Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=samples)
        trace = pm.sample(5000, njobs=4)

    pm.summary(trace)
    # values
    S1 = np.mean(trace['s1'])
    M1 = centers[0]

    S2 = np.mean(trace['s2'])
    M2 = centers[1]

    M3 = centers[2]
    S3 = np.mean(trace['s3'])

    p_x = gpdf(locations[0], M1, S1).eval()
    p_y = gpdf(locations[1], M2, S2).eval()
    p_theta = lpdf(locations[2], M3, S3).eval()
    mu = cc * p_x * p_y * p_theta
    Err = np.sum((samples - mu)**2)
    print(Err)
Beispiel #8
0
 def test_start(self):
     with pm.Model() as model:
         a = pm.Poisson("a", 5)
         b = pm.HalfNormal("b", 10)
         y = pm.Normal("y", a, b, observed=[1, 2, 3, 4])
         start = {
             "a": np.random.poisson(5, size=500),
             "b_log__": np.abs(np.random.normal(0, 10, size=500)),
         }
         trace = pm.sample_smc(500, start=start)
Beispiel #9
0
def test_init_jitter(testval, jitter_max_retries, expectation):
    with pm.Model() as m:
        pm.HalfNormal("x", transform=None, testval=testval)

    with expectation:
        # Starting value is negative (invalid) when np.random.rand returns 0 (jitter = -1)
        # and positive (valid) when it returns 1 (jitter = 1)
        with mock.patch("numpy.random.rand", side_effect=[0, 0, 0, 1, 0]):
            start = pm.sampling._init_jitter(m, chains=1, jitter_max_retries=jitter_max_retries)
            pm.util.check_start_vals(start, m)
Beispiel #10
0
def solve_vi(X, Y, initial=None, batch_size=100):
    X_t = th.shared(X)  #pm.Minibatch(X,batch_size=batch_size,)
    Y_t = th.shared(Y)  #pm.Minibatch(Y,batch_size=batch_size)
    #    sigma_Y_t = th.shared(sigma_Y)#pm.Minibatch(sigma_Y,batch_size=batch_size)

    #initial=(0.3,0.5,2.)

    dx = np.max(X) - np.min(X)
    dy = np.max(Y) - np.min(Y)

    with pm.Model() as model:
        sigma_K = pm.HalfNormal('sigma_K', sd=dy / 3.)
        l_space = pm.HalfNormal('l_space', sd=dx / 3., testval=1.)
        cov_func = sigma_K**2 * pm.gp.cov.ExpQuad(
            2, active_dims=[0, 1], ls=l_space)
        gp = pm.gp.Marginal(cov_func=cov_func)
        eps = pm.Uniform('eps', 0.0, np.std(Y))
        y1 = gp.marginal_likelihood('y1', X_t, Y_t, eps)
        #y2 = gp.marginal_likelihood('y2',X[:100,:],Y[:100],eps*sigma_Y[:100])
        initial = initial or pm.find_MAP()
        approx = pm.fit(
            1000,
            start=initial,
            method='advi',
            callbacks=[
                pm.callbacks.CheckParametersConvergence(tolerance=1e-4)
            ])
        #         plt.plot(approx.hist)
        #         plt.show()
        means = approx.bij.rmap(approx.mean.eval())
        #         print(means)
        #         sds = approx.bij.rmap(approx.std.eval())
        #         print(sds)
        df = approx.sample(10000)
        p = {
            k: pm.summary(df)['mean'][k]
            for k in pm.summary(df)['mean'].keys()
        }


#         pm.traceplot(df,lines=p)
#         plt.show()
    return p
Beispiel #11
0
def draws_from_StudentT(data, uncertainties):
    #pymc3 model
    with pm.Model() as model:
        sig_prior = pm.HalfNormal('sig', 50)
        vel_prior = pm.Normal('vel', 0.0, 50.0)
        lognu_prior = pm.Uniform('lognu', -2.0, np.log(20))
        nu_prior = pm.Deterministic('nu', pm.math.exp(lognu_prior))

        vel_tracers = pm.Normal('vel-tracers',
                                mu=vel_prior,
                                sd=uncertainties,
                                shape=len(data))

        measurements = pm.StudentT('measurements',
                                   nu=nu_prior,
                                   mu=vel_tracers,
                                   sd=sig_prior,
                                   observed=data)
        trace = pm.sample(2000, tune=10000)

    #Plot these traces
    pm.traceplot(trace)
    plt.savefig('Plots/studentT_traceplot.pdf')
    plt.savefig('Plots/studentT_traceplot.jpg')
    #Make a KDE approximation to the sigma posterior
    xx = np.linspace(0.0, 30.0, 1000)
    kde_approximation = stats.gaussian_kde(trace['sig'])

    #Plot things
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(xx, kde_approximation(xx), c='r', linewidth=3.0)
    ax.hist(trace['sig'],
            100,
            facecolor='0.8',
            edgecolor='k',
            histtype='stepfilled',
            normed=True,
            linewidth=2.0)

    ax.axvline(xx[np.argmax(kde_approximation(xx))],
               c='k',
               linestyle='dashed',
               linewidth=2.0)

    ax.set_xlim([0.0, 30.0])
    ax.set_ylabel(r'PDF')
    ax.set_yticks([])
    #ax.tick_params(axis='both', which='major', labelsize=15)
    ax.set_xlabel(r'$\sigma$ (kms$^{-1}$)')

    fig.tight_layout()
    fig.savefig('Plots/studentT_pdf.pdf')
    fig.savefig('Plots/studentT_pdf.jpg')

    return trace, kde_approximation
Beispiel #12
0
    def fit(self, cases_past, deaths_curr, 
        quantiles=[10, 20, 30, 40, 50, 60, 70, 80, 90]):
        '''
        Use a GP to find the relationship between cases in the past and 
        deaths today.
        '''
        # If not enough data, return all zeros.
        if len(cases_past) < 5:
            self.quantile_gp = []
            for q in quantiles:
                self.quantile_gp.append(degenerate)
            return self.quantile_gp

        cases_past2, deaths_curr2 = self.scale_data(cases_past, deaths_curr)

        # First, we do a simple linear fit, and use this as our mean prior.
        mfit = curve_fit(linear, cases_past2, deaths_curr2)
        slope = mfit[0]

        with pm.Model() as gp_model:

            ρ = pm.HalfCauchy('ρ', 5)
            η = pm.HalfCauchy('η', 5)
            
            M = pm.gp.mean.Linear(coeffs=slope)
            K = (η**2) * pm.gp.cov.ExpQuad(1, ρ)
            
            σ = pm.HalfNormal('σ', 50)
                        
            deaths_gp = pm.gp.Marginal(mean_func=M, cov_func=K)
            deaths_gp.marginal_likelihood('deaths', X=cases_past2.reshape(-1,1),
                                   y=deaths_curr2, noise=σ)

        with gp_model:
            gp_trace = pm.sample(self.draws, tune=self.tune, cores=1,
                random_seed=random.randint(30, 80))

        X_pred = np.arange(0, np.max(cases_past2)*5)
        with gp_model:
            deaths_pred = deaths_gp.conditional("deaths_pred_noise", 
                X_pred.reshape(-1, 1), pred_noise=True)
            gp_samples = pm.sample_posterior_predictive(gp_trace, 
                vars=[deaths_pred], samples=self.samples)

        quantile_gp = [np.percentile(
            gp_samples['deaths_pred_noise'] * self.scale_factor, q, axis=0) 
                for q in quantiles]

        # We interpolate our predicted function
        X_pred2 = X_pred * self.scale_factor
        self.quantile_gp = []
        for i in range(len(quantiles)):
            f = interp1d(X_pred2, quantile_gp[i], bounds_error=False,
                fill_value='extrapolate')
            self.quantile_gp.append(f)
Beispiel #13
0
def test_pairplot():
    with pm.Model() as model:
        a = pm.Normal('a', shape=2)
        c = pm.HalfNormal('c', shape=2)
        b = pm.Normal('b', a, c, shape=2)
        d = pm.Normal('d', 100, 1)
        trace = pm.sample(1000)

    pairplot(trace)
    pairplot(trace, hexbin=True, plot_transformed=True)
    pairplot(trace, sub_varnames=['a_0', 'c_0', 'b_1'])
def test_exec_nuts_init(method):
    with pm.Model() as model:
        pm.Normal('a', mu=0, sd=1, shape=2)
        pm.HalfNormal('b', sd=1)
    with model:
        start, _ = pm.init_nuts(init=method, n_init=10)
        assert isinstance(start, dict)
        start, _ = pm.init_nuts(init=method, n_init=10, njobs=2)
        assert isinstance(start, list)
        assert len(start) == 2
        assert isinstance(start[0], dict)
def fixture_model():
    with pm.Model() as model:
        n = 5
        dim = 4
        with pm.Model():
            cov = pm.InverseGamma("cov", alpha=1, beta=1)
            x = pm.Normal("x", mu=np.ones((dim,)), sigma=pm.math.sqrt(cov), shape=(n, dim))
            eps = pm.HalfNormal("eps", np.ones((n, 1)), shape=(n, dim))
            mu = pm.Deterministic("mu", at.sum(x + eps, axis=-1))
            y = pm.Normal("y", mu=mu, sigma=1, shape=(n,))
    return model, [cov, x, eps, y]
Beispiel #16
0
    def run(self):
        coloredlogs.install()
        logging.info('Fetching some data')
        with dask.set_options(get=dask.multiprocessing.get):
            data = dask.dataframe.read_csv(
                '/tmp/split_data/{}/train/*.csv'.format(self.rand_round))
            total_size = data.week_num.count().compute()
            nose.tools.assert_greater(total_size, 100, 'Not enought data!')

            unique_products = data['product_id'].unique().compute().astype(
                np.uint16)
            sample = data.head()
        logging.info('Got it!')

        product_id_var = theano.shared(value=sample.product_id.astype(
            'category', categories=unique_products).cat.codes.values,
                                       name='product_id_var')
        adjusted_demand_var = theano.shared(
            value=sample.adjusted_demand.values, name='adjusted_demand_var')

        model = pm.Model()
        with model:
            product_category = pm.Uniform('cat',
                                          0,
                                          1,
                                          shape=(unique_products.shape[0], 5))
            product_vecs = pm.Normal('vecs', 0, 100, shape=5)
            adjusted_demand_variance = pm.HalfNormal('demand_variance', 10)
            product_pred = T.dot(product_category[product_id_var],
                                 product_vecs)

            adjusted_demand = pm.Normal('adjusted_demand',
                                        product_pred,
                                        adjusted_demand_variance,
                                        observed=adjusted_demand_var)

            minibatches = map(self.expand_batch,
                              self.minibatches(unique_products))

            v_params = pm.variational.advi_minibatch(
                n=100,
                minibatch_tensors=[product_id_var, adjusted_demand_var],
                minibatch_RVs=[adjusted_demand],
                minibatches=minibatches,
                total_size=total_size,
                n_mcsamples=5,
                verbose=True)
            trace = pm.variational.sample_vp(v_params, draws=500)
            print(pm.summary(trace))

        res = trace[-100:]['cat'].mean(0)
        self.output().makedirs()
        pandas.DataFrame(res, index=unique_products.values).to_msgpack(
            self.output().path)
    def _build_model(self, x, y):
        """
        """

        if self.model is not None:
            raise Exception("Overwriting previous fit.")

        input_dim = x.shape[1]
        output_dim = y.shape[1]
        ann_input = theano.shared(x)
        ann_output = theano.shared(y)

        n_hidden = 3
        with pm.Model() as neural_network:
            # Weights from input to hidden layer
            weights_in_1 = pm.Normal('w_in_1',
                                     0,
                                     sd=1,
                                     shape=(input_dim,
                                            n_hidden))  #, testval=init_1)
            weights_b_1 = pm.Normal('w_b_1', 0, sd=1,
                                    shape=(n_hidden))  #, testval=init_b_1)

            # Weights from 1st to 2nd layer
            weights_1_2 = pm.Normal('w_1_2',
                                    0,
                                    sd=1,
                                    shape=(n_hidden,
                                           n_hidden))  #, testval=init_2)
            weights_b_2 = pm.Normal('w_b_2', 0, sd=1,
                                    shape=(n_hidden))  #, testval=init_b_2)

            # Weights from hidden layer to output
            weights_2_out = pm.Normal('w_2_out',
                                      0,
                                      sd=1,
                                      shape=(n_hidden,
                                             output_dim))  #, testval=init_out)
            weights_b_out = pm.Normal(
                'w_b_out', 0, sd=1, shape=(output_dim))  #, testval=init_b_out)

            # Build neural-network using tanh activation function
            act_1 = pm.math.tanh(
                pm.math.dot(ann_input, weights_in_1) + weights_b_1)
            act_2 = pm.math.tanh(pm.math.dot(act_1, weights_1_2) + weights_b_2)
            act_out = pm.math.dot(act_2, weights_2_out) + weights_b_out

            variance = pm.HalfNormal('uncertainty', sigma=3.0)
            out = pm.Normal('out',
                            mu=act_out,
                            sigma=variance,
                            observed=ann_output)

        self.model = neural_network
Beispiel #18
0
def bayesTest(mocktable, outname):
    import pymc3 as pymc
    from pymc3.backends import SQLite
    from collections import Counter

    idx = {}
    expr_vector = {}
    for line in open(mocktable):
        if line.startswith('Gene'):
            header = line.strip().split('\t')
            for i in range(len(header)):
                if header[i] != 'Gene':
                    idx[header[i]] = i
        else:
            vals = line.strip().split('\t')
            gene = vals[0]
            for sample in idx:
                if sample not in expr_vector:
                    expr_vector[sample] = [float(vals[idx[sample]])]
                else:
                    expr_vector[sample].append(float(vals[idx[sample]]))
    for sample in expr_vector:
        if sample == 'Neurons':
            neuro = expr_vector[sample]
        if sample == 'Astrocytes':
            astro = expr_vector[sample]
        if sample == 'Oligodendrocytes':
            oligo = expr_vector[sample]
        if sample == 'Sample1':
            one = expr_vector[sample]
        if sample == 'Sample2':
            two = expr_vector[sample]
        if sample == 'Sample3':
            three = expr_vector[sample]
    samples = [one, two, three]
    for s in samples:
        model = pymc.Model()
        with pymc.Model() as model:
            beta = pymc.Dirichlet('beta', a=np.array([1.0, 1.0, 1.0]))
            sigma = pymc.HalfNormal('sigma', sd=1)
            y_est = beta[0] * neuro + beta[1] * astro + beta[2] * oligo
            likelihood = pymc.Normal('y', mu=y_est, sd=sigma, observed=s)
            trace = pymc.sample(1000, random_seed=123, progressbar=True)
            s = pymc.summary(trace)
            #print trace['beta'] #matrix with 3 columns and 1000 rows, need to convert this and do math
            neurons = trace['beta'][:, 0]
            astrocytes = trace['beta'][:, 1]
            oligodendrocytes = trace['beta'][:, 2]
            n_avg = np.mean(neurons)
            n_med = np.median(neurons)
            data = Counter(neurons)
            data.most_common()
            n_mode = data.most_common(1)[0][0]
            print n_avg, n_med, n_mode
Beispiel #19
0
    def test_variable_type(self):
        with pm.Model() as model:
            mu = pm.HalfNormal("mu", 1)
            a = pm.Normal("a", mu=mu, sigma=2, observed=np.array([1, 2]))
            b = pm.Poisson("b", mu, observed=np.array([1, 2]))
            trace = pm.sample()

        with model:
            ppc = pm.sample_posterior_predictive(trace, samples=1)
            assert ppc["a"].dtype.kind == "f"
            assert ppc["b"].dtype.kind == "i"
Beispiel #20
0
    def ruqaxik_pymc(ri, rnjml):
        if rnjml is None:
            return ri.tzij

        rjchnm = pm.HalfNormal(name='sg_' + str(ri),
                               sd=10)  # rujechunem chijun
        b = pm.Normal(name='junelïk_' + str(ri), mu=0, sd=100)
        return pm.Normal(name=str(ri),
                         mu=rnjml + b,
                         sd=rjchnm,
                         observed=ri.tzij)
Beispiel #21
0
def case_count_model_us_states(df):

    # Normalize inputs in a way that is sensible:

    # People per test: normalize to South Korea
    # assuming S.K. testing is "saturated"
    ppt_sk = np.log10(51500000. / 250000)
    df['people_per_test_normalized'] = (
        np.log10(df['people_per_test_7_days_ago']) - ppt_sk)

    n = len(df)

    # For each country, let:
    # c_obs = number of observed cases
    c_obs = df['num_pos_7_days_ago'].values
    # c_star = number of true cases

    # d_obs = number of observed deaths
    d_obs = df[['death', 'num_pos_7_days_ago']].min(axis=1).values
    # people per test
    people_per_test = df['people_per_test_normalized'].values

    covid_case_count_model = pm.Model()

    with covid_case_count_model:

        # Priors:
        mu_0 = pm.Beta('mu_0', alpha=1, beta=100, testval=0.01)
        # sig_0 = pm.Uniform('sig_0', lower=0.0, upper=mu_0 * (1 - mu_0))
        alpha = pm.Bound(pm.Normal, lower=0.0)(
            'alpha', mu=8, sigma=3, shape=1)
        beta = pm.Bound(pm.Normal, upper=0.0)(
            'beta', mu=-1, sigma=1, shape=1)
        # beta = pm.Normal('beta', mu=0, sigma=1, shape=3)
        sigma = pm.HalfNormal('sigma', sigma=0.5, testval=0.1)
        # sigma_1 = pm.HalfNormal('sigma_1', sigma=2, testval=0.1)

        # Model probability of case under-reporting as logistic regression:
        mu_model_logit = alpha + beta * people_per_test
        tau_logit = pm.Normal('tau_logit',
                              mu=mu_model_logit,
                              sigma=sigma,
                              shape=n)
        tau = np.exp(tau_logit) / (np.exp(tau_logit) + 1)

        c_star = c_obs / tau

        # Binomial likelihood:
        d = pm.Binomial('d',
                        n=c_star,
                        p=mu_0,
                        observed=d_obs)

    return covid_case_count_model
Beispiel #22
0
def estimate_student(normalized_ranks):
    """This fits a PyMC3 model. All the model does is
    fit the parameters for t distribution, since it is clear
    (in the authors opinion) that the logit-transformed ranks 
    are very well described by a t distribution. The logit
    ranks are thus the observations, and the model finds the 
    ranges of parameters consistent with those obs."""

    with pm.Model() as model:
        nu = pm.HalfNormal('nu', 50)  #very broad priors
        mu = pm.Normal('mu', mu=0, sigma=50)  #very broad priors
        sigma = pm.HalfNormal('sig', 50)  #very broad priors

        lik = pm.StudentT('t',
                          nu=nu,
                          mu=mu,
                          sigma=sigma,
                          observed=logit(normalized_ranks))
        trace = pm.sample(1000, tune=1000)
    return trace, model
def graded_response_model(dataset, n_categories):
    """Defines the mcmc model for the graded response model.
    
    Args:
        dataset: [n_items, n_participants] 2d array of measured responses
        n_categories: number of polytomous values (i.e. Number of Likert Levels)

    Returns:
        model: PyMC3 model to run
    """
    n_items, n_people = dataset.shape
    n_levels = n_categories - 1

    # Need small deviation in offset to
    # fit into pymc framework
    mu_value = linspace(-0.1, 0.1, n_levels)

    # Run through 0, K - 1
    observed = dataset - dataset.min()

    graded_mcmc_model = pm.Model()

    with graded_mcmc_model:
        # Ability Parameters
        ability = pm.Normal("Ability", mu=0, sigma=1, shape=n_people)

        # Discrimination multilevel prior
        rayleigh_scale = pm.Lognormal("Rayleigh_Scale",
                                      mu=0,
                                      sigma=1 / 4,
                                      shape=1)
        discrimination = pm.Bound(Rayleigh, lower=0.25)(name='Discrimination',
                                                        beta=rayleigh_scale,
                                                        offset=0.25,
                                                        shape=n_items)

        # Threshold multilevel prior
        sigma_difficulty = pm.HalfNormal('Difficulty_SD', sigma=1, shape=1)
        for ndx in range(n_items):
            thresholds = pm.Normal(
                f"Thresholds{ndx}",
                mu=mu_value,
                sigma=sigma_difficulty,
                shape=n_levels,
                transform=pm.distributions.transforms.ordered)

            # Compute the log likelihood
            kernel = discrimination[ndx] * ability
            probabilities = pm.OrderedLogistic(f'Log_Likelihood{ndx}',
                                               cutpoints=thresholds,
                                               eta=kernel,
                                               observed=observed[ndx])

    return graded_mcmc_model
Beispiel #24
0
    def setup_class(self):
        super().setup_class()
        self.data = np.sort(np.random.normal(loc=0, scale=1, size=1000))

        def normal_sim(a, b):
            return np.sort(np.random.normal(a, b, 1000))

        with pm.Model() as self.SMABC_test:
            a = pm.Normal("a", mu=0, sd=5)
            b = pm.HalfNormal("b", sd=2)
            s = pm.Simulator("s", normal_sim, observed=self.data)
Beispiel #25
0
    def infer_with_pymc3(self, n_iteration):
        with pm.Model() as linreg:
            a = pm.Normal('a', mu=0, sd=100)
            b = pm.Normal('b', mu=0, sd=100)
            sigma = pm.HalfNormal('sigma', sd=1)
            # http://docs.pymc.io/api/distributions/continuous.html#pymc3.distributions.continuous.HalfNormal

            y_est = a * self.x + b
            likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=self.y)

            self.trace = pm.sample(n_iteration, random_seed=123)
Beispiel #26
0
def from_posterior(param, samples, distribution=None, half=False, freedom=10):

    if len(samples.shape) > 1:
        shape = samples.shape[1:]
    else:
        shape = None

    if (distribution is None):
        smin, smax = np.min(samples), np.max(samples)
        width = smax - smin
        x = np.linspace(smin, smax, 1000)
        y = stats.gaussian_kde(samples)(x)
        if half:
            x = np.concatenate([x, [x[-1] + 0.1 * width]])
            y = np.concatenate([y, [0]])
        else:
            x = np.concatenate([[x[0] - 0.1 * width], x,
                                [x[-1] + 0.1 * width]])
            y = np.concatenate([[0], y, [0]])
        return pm.distributions.Interpolated(param, x, y)
    elif (distribution == 'normal'):
        temp = stats.norm.fit(samples)
        if shape is None:
            return pm.Normal(param, mu=temp[0], sigma=freedom * temp[1])
        else:
            return pm.Normal(param,
                             mu=temp[0],
                             sigma=freedom * temp[1],
                             shape=shape)
    elif (distribution == 'hnormal'):
        temp = stats.halfnorm.fit(samples)
        if shape is None:
            return pm.HalfNormal(param, sigma=freedom * temp[1])
        else:
            return pm.HalfNormal(param, sigma=freedom * temp[1], shape=shape)
    elif (distribution == 'hcauchy'):
        temp = stats.halfcauchy.fit(samples)
        if shape is None:
            return pm.HalfCauchy(param, freedom * temp[1])
        else:
            return pm.HalfCauchy(param, freedom * temp[1], shape=shape)
Beispiel #27
0
    def setup_class(self):
        super().setup_class()
        self.data = np.random.normal(loc=0, scale=1, size=1000)

        def normal_sim(a, b):
            return np.random.normal(a, b, 1000)

        with pm.Model() as self.SMABC_test:
            a = pm.Normal("a", mu=0, sigma=1)
            b = pm.HalfNormal("b", sigma=1)
            s = pm.Simulator(
                "s", normal_sim, params=(a, b), sum_stat="sort", epsilon=1, observed=self.data
            )
            self.s = s

        def quantiles(x):
            return np.quantile(x, [0.25, 0.5, 0.75])

        def abs_diff(eps, obs_data, sim_data):
            return np.mean(np.abs((obs_data - sim_data) / eps))

        with pm.Model() as self.SMABC_test2:
            a = pm.Normal("a", mu=0, sigma=1)
            b = pm.HalfNormal("b", sigma=1)
            s = pm.Simulator(
                "s",
                normal_sim,
                params=(a, b),
                distance=abs_diff,
                sum_stat=quantiles,
                epsilon=1,
                observed=self.data,
            )

        with pm.Model() as self.SMABC_potential:
            a = pm.Normal("a", mu=0, sigma=1)
            b = pm.HalfNormal("b", sigma=1)
            c = pm.Potential("c", pm.math.switch(a > 0, 0, -np.inf))
            s = pm.Simulator(
                "s", normal_sim, params=(a, b), sum_stat="sort", epsilon=1, observed=self.data
            )
Beispiel #28
0
def FitMyModel(trainDM,PredDM):
    with pm.Model() as model:
        # partition dataframes df
        Ydf = trainDM[0]
        TXdf = trainDM[1]     
        PXdf = PredDM  
        ## Parameters for linear predictor
        #b0 = pm.Normal('b0',mu=0,sd=10)
        #dum_names = filter(lambda col : str(col).startswith('inegiv5name'),TXdf)
        #dumsdf = TXdf[dum_names]
        #dumshape = dumscols.shape
        #coordsdf = TXdf[['Longitude','Latitude']] 
        # Create vectors for dumi vars 
        #drvs = map(lambda col : pm.Normal(col,mu=0,sd=1.5),dum_names)
        ## Create theano vector
        dimX = len(TXdf.columns)
        b = pm.Normal('b',mu=0,sd=1.5,shape=dimX)
        #mk = pm.math.matrix_dot(TXdf.values,b.transpose())
        ## The latent function
        x_index = TXdf.columns.get_loc("Longitude")
        y_index = TXdf.columns.get_loc("Latitude")
        ## Building the covariance structure
        tau = pm.HalfNormal('tau',sd=10)
        sigma = pm.HalfNormal('sigma',sd=10)
        #phi = pm.Uniform('phi',0,15)
        phi = pm.HalfNormal('phi',sd=6)
        Tau = pm.gp.cov.Constant(tau)
        cov = (sigma * pm.gp.cov.Matern32(2,phi,active_dims=[x_index,y_index])) + Tau
        mean_f = pm.gp.mean.Linear(coeffs=b)
        gp = pm.gp.Latent(mean_func=mean_f,cov_func=cov)
        f = gp.prior("latent_field", X=TXdf.values,reparameterize=False)
        yy = pm.Bernoulli("yy",logit_p=f,observed=Ydf.values)
        #trace = pm.fit(method='advi', callbacks=[CheckParametersConvergence()],n=15000)    
        trace = pm.sample(15,init='adapt_diag')
        #trace = trace.sample(draws=5000)
        # Remove any column that doesnt appear in the training data
        ValidPreds = PredDM[TXdf.columns]
        PredX = ValidPreds.values
        f_star = gp.conditional("f_star", PredX)
        pred_samples = pm.sample_ppc(trace, vars=[f_star], samples=100)
        return pred_samples,trace
Beispiel #29
0
    def create_model(self):
        """
        Creates and returns the PyMC3 model.
        Note: The size of the shared variables must match the size of the training data.
        Otherwise, setting the shared variables later will raise an error.
        See http://docs.pymc.io/advanced_theano.html
        Returns
        -------
        the PyMC3 model
        """
        model_input = theano.shared(np.zeros([self.num_training_samples, self.num_pred]))
        model_output = theano.shared(np.zeros(self.num_training_samples, dtype='int'))
        model_cats = theano.shared(np.zeros(self.num_training_samples, dtype='int'))

        self.shared_vars = {
            'model_input': model_input,
            'model_output': model_output,
            'model_cats': model_cats
        }

        model = pm.Model()

        with model:
            mu_alpha = pm.Normal('mu_alpha', mu=0, sd=100)
            sigma_alpha = pm.HalfNormal('sigma_alpha', sd=100)

            mu_beta = pm.Normal('mu_beta', mu=0, sd=100)
            sigma_beta = pm.HalfNormal('sigma_beta', sd=100)

            alpha = pm.Normal('alpha', mu=mu_alpha, sd=sigma_alpha, shape=(self.num_cats,))
            betas = pm.Normal('beta', mu=mu_beta, sd=sigma_beta, shape=(self.num_cats, self.num_pred))

            c = model_cats

            temp = alpha[c] + T.sum(betas[c] * model_input, 1)

            p = pm.invlogit(temp)

            o = pm.Bernoulli('o', p, observed=model_output)

        return model
Beispiel #30
0
def fit_refractory_minus_duration():
    sample_data = pd.read_pickle(
        '../data/raw/refractory_prior_samples.pkl')['samples'].values
    with pm.Model() as model:
        a = pm.HalfNormal('a', 100 * 10)
        b = pm.HalfNormal('b', 100 * 10)
        pm.Wald('prior', mu=a, lam=b, observed=sample_data)
        trace = pm.sample(2000, njobs=1)
    summary_df = pm.summary(trace)
    a_est = summary_df.loc['a', 'mean']
    b_est = summary_df.loc['b', 'mean']
    n_samples = 10000
    with pm.Model() as model:
        pm.Wald('prior_check', mu=a_est, lam=b_est)
        outcome = pm.sample(n_samples, njobs=1, nchains=1)

    samples = outcome['prior_check']
    sns.distplot(samples, kde=True)
    sns.distplot(sample_data, kde=True)
    plt.show()
    print(summary_df)