Python sample_ppc Exemples, pymc3.sample_ppc Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_shared.py Projet : malithjayasinghe/ML_PredictiveModels

    def test_sample(self):
        x = np.random.normal(size=100)
        y = x + np.random.normal(scale=1e-2, size=100)

        x_pred = np.linspace(-3, 3, 200)

        x_shared = theano.shared(x)

        with pm.Model() as model:
            b = pm.Normal('b', 0., 10.)
            pm.Normal('obs', b * x_shared, np.sqrt(1e-2), observed=y)
            prior_trace0 = pm.sample_prior_predictive(1000)

            trace = pm.sample(1000, init=None, progressbar=False)
            pp_trace0 = pm.sample_ppc(trace, 1000)

            x_shared.set_value(x_pred)
            prior_trace1 = pm.sample_prior_predictive(1000)
            pp_trace1 = pm.sample_ppc(trace, 1000)

        assert prior_trace0['b'].shape == (1000, )
        assert prior_trace0['obs'].shape == (1000, 100)
        np.testing.assert_allclose(x, pp_trace0['obs'].mean(axis=0), atol=1e-1)

        assert prior_trace1['b'].shape == (1000, )
        assert prior_trace1['obs'].shape == (1000, 200)
        np.testing.assert_allclose(x_pred,
                                   pp_trace1['obs'].mean(axis=0),
                                   atol=1e-1)

Exemple #2

0

Afficher le fichier

    def test_vector_observed(self):
        # This test was initially created to test whether observedRVs
        # can assert the shape automatically from the observed data.
        # It can make sample_ppc correct for RVs similar to below (i.e.,
        # some kind of broadcasting is involved). However, doing so makes
        # the application with `theano.shared` array as observed data
        # invalid (after the `.set_value` the RV shape could change).
        with pm.Model() as model:
            mu = pm.Normal('mu', mu=0, sd=1)
            a = pm.Normal(
                'a',
                mu=mu,
                sd=1,
                shape=2,  # necessary to make ppc sample correct
                observed=np.array([0., 1.]))
            trace = pm.sample()

        with model:
            # test list input
            ppc0 = pm.sample_ppc([model.test_point], samples=10)
            ppc = pm.sample_ppc(trace, samples=10, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_ppc(trace, samples=10, vars=[a])
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 2)

            ppc = pm.sample_ppc(trace, samples=10, vars=[a], size=4)
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 4, 2)

Exemple #3

0

Afficher le fichier

def test_density_dist_without_random_not_sampleable():
    with pm.Model() as model:
        mu = pm.Normal('mu',0,1)
        normal_dist = pm.Normal.dist(mu, 1)
        pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100))
        trace = pm.sample(100)

    samples = 500
    with pytest.raises(ValueError):
        pm.sample_ppc(trace, samples=samples, model=model, size=100)

Exemple #4

0

Afficher le fichier

Fichier : test_distributions_random.py Projet : mmargenot/pymc3

def test_density_dist_without_random_not_sampleable():
    with pm.Model() as model:
        mu = pm.Normal('mu',0,1)
        normal_dist = pm.Normal.dist(mu, 1)
        pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100))
        trace = pm.sample(100)

    samples = 500
    with pytest.raises(ValueError):
        pm.sample_ppc(trace, samples=samples, model=model, size=100)

Exemple #5

0

Afficher le fichier

Fichier : test_sampling.py Projet : aasensio/pymc3

    def test_normal_vector(self):
        with pm.Model() as model:
            a = pm.Normal('a', mu=0, sd=1, shape=2)
            trace = pm.sample()

        with model:
            ppc = pm.sample_ppc(trace, samples=10, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_ppc(trace, samples=10, vars=[a])
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 2)

            ppc = pm.sample_ppc(trace, samples=10, vars=[a], size=4)
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 4, 2)

Exemple #6

0

Afficher le fichier

Fichier : test_sampling.py Projet : zhouqingping/pymc3

    def test_normal_vector(self):
        with pm.Model() as model:
            a = pm.Normal('a', mu=0, sd=1, shape=2)
            trace = pm.sample()

        with model:
            ppc = pm.sample_ppc(trace, samples=10, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_ppc(trace, samples=10, vars=[a])
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 2)

            ppc = pm.sample_ppc(trace, samples=10, vars=[a], size=4)
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 4, 2)

Exemple #7

0

Afficher le fichier

    def test_sum_normal(self):
        with pm.Model() as model:
            a = pm.Normal('a', sd=0.2)
            b = pm.Normal('b', mu=a)
            trace = pm.sample()

        with model:
	    # test list input
            ppc0 = pm.sample_ppc([model.test_point], samples=10)
            ppc = pm.sample_ppc(trace, samples=1000, vars=[b])
            assert len(ppc) == 1
            assert ppc['b'].shape == (1000,)
            scale = np.sqrt(1 + 0.2 ** 2)
            _, pval = stats.kstest(ppc['b'], stats.norm(scale=scale).cdf)
            assert pval > 0.001

Exemple #8

0

Afficher le fichier

Fichier : spatial_model_kernel.py Projet : HHRClemson/spatial-temporal-py

    def predict(self, new_df=None, sample_size=500):

        if new_df:
            try:
                self.X_test = coordinates_converter(new_df)
                self.y_test = new_df[self.response_var]
                self.test_loc_cache = new_df[['LATITUDE', 'LONGITUDE']]
            except:
                raise ValueError(
                    'The new dataframe should contain LATITUDE, LONGITUDE and the variable column, e.g., PRCP'
                )
        with self.model:
            self.X_train.set_value(self.X_test)
            self.simulated_values = pm.sample_ppc(self.trace,
                                                  samples=sample_size)
            self.predictions = np.exp(
                np.median(self.simulated_values['y'], axis=0))

        l1_loss = np.mean(np.abs(self.predictions - self.y_test))
        l2_loss = np.mean(np.square(self.predictions - self.y_test))

        self.summary = {'l1_loss': l1_loss, 'l2_loss': l2_loss}

        output_df = self.test_loc_cache.copy()
        output_df['PRED'] = self.predictions

        return self.predictions

Exemple #9

0

Afficher le fichier

Fichier : pymc.py Projet : shafiahmed/skpro

 def on_predict(self, X):
     # Update the theano shared variable with test data
     self.X_.set_value(X)
     # Running PPC will use the updated values and do the prediction
     self.ppc_ = pm.sample_ppc(self.trace_,
                               model=self.model_,
                               samples=self.sample_size)

Exemple #10

0

Afficher le fichier

def main():
    config = create_configuration(filename='/regression-siso.json')
    dataset = get_dataset(config.dataset, testing=False)

    # %%
    x_train = dataset.x
    y_train = dataset.y
    x = theano.shared(x_train)
    y = theano.shared(y_train)
    nn = construct_nn(x=x, y=y, config=config)

    # ADVI
    with nn:
        inference = pm.ADVI()
        approx = pm.fit(n=50000, method=inference)
    trace = approx.sample(draws=5000)

    # with nn:
    #     inference = pm.NUTS()
    #     trace = pm.sample(2000, tune=1000, cores=4, inference=inference)
    print(pm.summary(trace))

    x.set_value(x_train)
    y.set_value(y_train)

    with nn:
        ppc = pm.sample_ppc(trace, samples=500, progressbar=False)

Exemple #11

0

Afficher le fichier

Fichier : dirichlet_process.py Projet : zhuyiche/pymc-learn

    def predict_proba(self, X, return_std=False):
        """
        Predicts probabilities of new data with a trained Dirichlet Process
        Mixture Model

        Parameters
        ----------
        X : numpy array, shape [n_samples, n_features]

        cats : numpy array, shape [n_samples, ]

        return_std : Boolean flag
           Boolean flag of whether to return standard deviations with mean
           probabilities. Defaults to False.
        """

        if self.trace is None:
            raise NotFittedError('Run fit on the model before predict.')

        # num_samples = X.shape[0]

        if self.cached_model is None:
            self.cached_model = self.create_model()

        self._set_shared_vars({'model_input': X})
        _vars = self.cached_model.free_RVs[8:11]

        ppc = pm.sample_ppc(self.trace,
                            model=self.cached_model,
                            vars=_vars,
                            samples=2000,
                            size=len(X))
        return (ppc)

Exemple #12

0

Afficher le fichier

def test_mixture_random_shape():
    # test the shape broadcasting in mixture random
    y = np.concatenate([nr.poisson(5, size=10), nr.poisson(9, size=10)])
    with pm.Model() as m:
        comp0 = pm.Poisson.dist(mu=np.ones(2))
        w0 = pm.Dirichlet('w0', a=np.ones(2))
        like0 = pm.Mixture('like0', w=w0, comp_dists=comp0, observed=y)

        comp1 = pm.Poisson.dist(mu=np.ones((20, 2)), shape=(20, 2))
        w1 = pm.Dirichlet('w1', a=np.ones(2))
        like1 = pm.Mixture('like1', w=w1, comp_dists=comp1, observed=y)

        comp2 = pm.Poisson.dist(mu=np.ones(2))
        w2 = pm.Dirichlet('w2', a=np.ones(2), shape=(20, 2))
        like2 = pm.Mixture('like2', w=w2, comp_dists=comp2, observed=y)

        comp3 = pm.Poisson.dist(mu=np.ones(2), shape=(20, 2))
        w3 = pm.Dirichlet('w3', a=np.ones(2), shape=(20, 2))
        like3 = pm.Mixture('like3', w=w3, comp_dists=comp3, observed=y)

    rand0, rand1, rand2, rand3 = draw_values([like0, like1, like2, like3],
                                             point=m.test_point,
                                             size=100)
    assert rand0.shape == (100, 20)
    assert rand1.shape == (100, 20)
    assert rand2.shape == (100, 20)
    assert rand3.shape == (100, 20)

    with m:
        ppc = pm.sample_ppc([m.test_point], samples=200)
    assert ppc['like0'].shape == (200, 20)
    assert ppc['like1'].shape == (200, 20)
    assert ppc['like2'].shape == (200, 20)
    assert ppc['like3'].shape == (200, 20)

Exemple #13

0

Afficher le fichier

    def predict_proba(self, X, cats):
        """
        Predicts probabilities of new data with a trained HLM

        Parameters
        ----------
        X : numpy array, shape [n_samples, n_features]

        cats: numpy array, shape [n_samples, ]
        """

        if self.advi_trace is None:
            raise PSToolkitError("Run fit on the model before predict.")

        num_samples = X.shape[0]

        if self.cached_model is None:
            self.cached_model, o = self.create_model()

        self._set_shared_vars(X, np.zeros(num_samples), cats)

        ppc = pm.sample_ppc(self.advi_trace,
                            model=self.cached_model,
                            samples=2000)

        return ppc['o'].mean(axis=0)

Exemple #14

0

Afficher le fichier

def sample_mu_ensemble_ppc(models, weights, n_total_samples):
    """
    For given models and weights: generate posterior samples of 'mu' variable proportional to given weights.
    Will skip models with 0 weight.
    :param models: iterable of Model objects
    :param weights: iterable of floating point weights
    :param n_total_samples: int
    :return: pandas df with posterior mu samples and 'model' column indiciating which model generated this sample
    """
    post_sample_dfs = list()

    for model, weight in zip(models, weights):
        if weight == 0:
            continue

        samples = pm.sample_ppc(model.trace,
                                samples=int(weight * n_total_samples),
                                model=model.model,
                                vars=[model.model.mu])['mu']

        df = pd.DataFrame(
            data=samples,
            columns=['mu__{}'.format(i) for i in range(samples.shape[1])
                     ]).assign(model=model.name)

        post_sample_dfs.append(df)

    return pd.concat(post_sample_dfs, ignore_index=True)

Exemple #15

0

Afficher le fichier

    def generate_samples(self,name, X_new, n_samples = 500):
        with self.model as model:

            Kuu = pm.gp.util.stabilize(self.cov(self.Xu))
            Kuf = self.cov(self.Xu, self.X)
            Luu = tt.slinalg.cholesky(Kuu)
            A = pm.gp.util.solve_lower(Luu, Kuf)
            Qff = tt.dot(tt.transpose(A),A)
            Kffd = self.cov(self.X, diag=True)
            Lamd_inv = tt.diag(1./tt.clip(Kffd - tt.diag(Qff) + self.sigma**2, 0, np.inf))

            Sigma = pm.gp.util.stabilize(Kuu + tt.dot(Kuf.dot(Lamd_inv),tt.transpose(Kuf)))
            L_Sigma = tt.slinalg.cholesky(Sigma)


            Kus = self.cov(self.Xu,X_new)

            m1 = pm.gp.util.solve_lower(L_Sigma, Kus)
            m2 = pm.gp.util.solve_lower(L_Sigma, Kuf)

            mu_pred = tt.dot(tt.dot(tt.transpose(m1),m2),tt.dot(Lamd_inv,model.fp))

            Kss = self.cov(X_new) + 1e-6 * tt.eye(X_new.shape[0])
            As = pm.gp.util.solve_lower(Luu, Kus)
            Qss = tt.dot(tt.transpose(As),As)


            cov_pred = Kss - Qss + tt.dot(tt.transpose(m1),m1)

            f_pred = pm.MvNormal(name, mu=mu_pred, cov=cov_pred, shape=pm.gp.util.infer_shape(X_new))

        with self.model:
            pred_samples = pm.sample_ppc(self.trace, vars=[f_pred], samples=n_samples)

        return pred_samples

Exemple #16

0

Afficher le fichier

    def predict(self, new_df=None, sample_size=1000):
        '''
        Args:
            new_data_frame (pandas dataframe): the dataframe of new locations. Users can also include the truth value of Y.
            Note that MSE cannot be computed if truth is not provided.
        '''
        if new_df:
            try:
                self.X_test = coordinates_converter(new_df)
                self.y_test = new_df[self.response_var]
                self.test_loc_cache = new_df[['LATITUDE', 'LONGITUDE']]
            except:
                raise ValueError(
                    'The new dataframe should contain LATITUDE, LONGITUDE and the variable column, e.g., PRCP'
                )

        with self.model:
            y_pred = self.gp.conditional("y_pred", self.X_test)
            self.simulated_values = pm.sample_ppc(self.trace,
                                                  vars=[y_pred],
                                                  samples=sample_size)
            self.predictions = np.exp(
                np.median(self.simulated_values['y_pred'], axis=0))

        l1_loss = np.mean(np.abs(self.predictions - self.y_test))
        l2_loss = np.mean(np.square(self.predictions - self.y_test))
        self.summary = {'l1_loss': l1_loss, 'l2_loss': l2_loss}

        output_df = self.test_loc_cache.copy()
        output_df['PRED'] = self.predictions

        return self.predictions

Exemple #17

0

Afficher le fichier

Fichier : base.py Projet : zhuyiche/pymc-learn

    def predict_proba(self, X, return_std=False):
        """ Perform Prediction

        Predicts values of new data with a trained Gaussian Process
        Regression model

        Parameters
        ----------
        X : numpy array, shape [n_samples, n_features]

        return_std : Boolean
            Whether to return standard deviations with mean values.
            Defaults to False.
        """

        if self.trace is None:
            raise NotFittedError('Run fit on the model before predict.')

        num_samples = X.shape[0]

        if self.cached_model is None:
            self.cached_model = self.create_model()

        self._set_shared_vars({
            'model_input': X,
            'model_output': np.zeros(num_samples)
        })

        ppc = pm.sample_ppc(self.trace, model=self.cached_model, samples=2000)

        if return_std:
            return ppc['y'].mean(axis=0), ppc['y'].std(axis=0)
        else:
            return ppc['y'].mean(axis=0)

Exemple #18

0

Afficher le fichier

    def predict(self, X, return_std=False):
        """
        Predicts values of new data with a trained Gaussian Process Regression model

        Parameters
        ----------
        X : numpy array, shape [n_samples, n_features]

        return_std : Boolean flag of whether to return standard deviations with mean values. Defaults to False.
        """

        if self.trace is None:
            raise PyMC3ModelsError('Run fit on the model before predict.')

        num_samples = X.shape[0]

        if self.cached_model is None:
            self.cached_model = self.create_model()

        self._set_shared_vars({
            'model_input': X,
            'model_output': np.zeros(num_samples)
        })

        with self.cached_model:
            f_pred = self.gp.conditional("f_pred", X)
            self.ppc = pm.sample_ppc(self.trace, vars=[f_pred], samples=2000)

        if return_std:
            return self.ppc['f_pred'].mean(axis=0), self.ppc['f_pred'].std(
                axis=0)
        else:
            return self.ppc['f_pred'].mean(axis=0)

Exemple #19

0

Afficher le fichier

Fichier : bnn.py Projet : bglick13/likurai

    def predict(self, x, n_samples=1, progressbar=True, point_estimate=False):
        self.x.set_value(x.astype(floatX))
        try:
            # For classification tasks
            self.y.set_value(
                np.zeros((np.array(x).shape[0],
                          self.y.get_value().shape[1])).astype(floatX))
        except IndexError:
            # For regression tasks
            self.y.set_value(
                np.zeros((np.array(x).shape[0], 1)).astype(floatX))

        with self.model:
            ppc = None
            for trace in self.trace:
                _ppc = pm.sample_ppc(trace,
                                     samples=n_samples,
                                     progressbar=progressbar)['likelihood']
                if ppc is None:
                    ppc = _ppc
                else:
                    ppc = np.vstack((ppc, _ppc))
        if point_estimate:
            return np.mean(ppc, axis=0)
        return ppc

Exemple #20

0

Afficher le fichier

    def predict_proba(self, X, cats, return_std=False):
        """
        Predicts probabilities of new data with a trained HLR

        Parameters
        ----------
        X : numpy array, shape [n_samples, n_features]

        cats: numpy array, shape [n_samples, ]

        return_std: Boolean flag of whether to return standard deviations with mean probabilities. Defaults to False.
        """

        if self.advi_trace is None:
            raise PSToolkitError('Run fit on the model before predict.')

        num_samples = X.shape[0]

        if self.cached_model is None:
            self.cached_model = self.create_model()

        self._set_shared_vars({
            'model_input': X,
            'model_output': np.zeros(num_samples),
            'model_cats': cats
        })

        ppc = pm.sample_ppc(self.advi_trace,
                            model=self.cached_model,
                            samples=2000)

        if return_std:
            return ppc['o'].mean(axis=0), ppc['o'].std(axis=0)
        else:
            return ppc['o'].mean(axis=0)

Exemple #21

0

Afficher le fichier

Fichier : fit_simulation_model.py Projet : Irrationone/cellassign-paper

def v2_model(observations,
             nulls,
             null_sd,
             null_b,
             null_dispersed_prob,
             iter_count=2000,
             tune_iters=2000):
    with pm.Model() as model:
        # Probability of being a DE gene
        de_prob = pm.Beta('de_prob', alpha=1., beta=5.)

        # Probability of being downregulated
        down_prob = pm.Beta('down_prob', alpha=1., beta=1.)

        dispersed_prob = null_dispersed_prob

        mu_pos = pm.Lognormal('mu_pos', mu=-3, sd=1.)
        mu_neg = pm.Lognormal('mu_neg', mu=-3, sd=1.)
        sd_pos = pm.Gamma('sd_pos', alpha=0.01, beta=1.)
        sd_neg = pm.Gamma('sd_neg', alpha=0.01, beta=1.)
        nu_pos = pm.Gamma('nu_pos', alpha=5., beta=1.)
        nu_neg = pm.Gamma('nu_neg', alpha=5., beta=1.)

        spike_component = pm.Normal.dist(mu=0., sd=null_sd)
        slab_component = pm.Laplace.dist(mu=0., b=null_b)

        # Sample from Gaussian-Laplace mixture for null (spike-and-slab mixture)
        pm.Mixture('null',
                   comp_dists=[spike_component, slab_component],
                   w=tt.as_tensor([1. - dispersed_prob, dispersed_prob]),
                   observed=nulls)

        pos_component = pm.Bound(pm.StudentT, lower=0.).dist(mu=mu_pos,
                                                             sd=sd_pos,
                                                             nu=nu_pos)
        neg_component = pm.Bound(pm.StudentT, upper=0.).dist(mu=-mu_neg,
                                                             sd=sd_neg,
                                                             nu=nu_neg)

        pm.Mixture('obs',
                   w=tt.as_tensor([(1. - de_prob) * (1. - dispersed_prob),
                                   (1. - de_prob) * dispersed_prob,
                                   de_prob * (1. - down_prob),
                                   de_prob * down_prob]),
                   comp_dists=[
                       spike_component, slab_component, pos_component,
                       neg_component
                   ],
                   observed=observations)

        pm.Deterministic('log_prob', model.logpt)

        for RV in model.basic_RVs:
            print(RV.name, RV.logp(model.test_point))

        trace = pm.sample(iter_count, tune=tune_iters, chains=4)
        ppc = pm.sample_ppc(trace, samples=iter_count, model=model)

    return ({'trace': trace, 'ppc': ppc})

Exemple #22

0

Afficher le fichier

Fichier : test_sampling.py Projet : zhouqingping/pymc3

    def test_normal_scalar(self):
        with pm.Model() as model:
            a = pm.Normal('a', mu=0, sd=1)
            trace = pm.sample()

        with model:
            ppc = pm.sample_ppc(trace, samples=1000, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_ppc(trace, samples=1000, vars=[a])
            assert 'a' in ppc
            assert ppc['a'].shape == (1000, )
        _, pval = stats.kstest(ppc['a'], stats.norm().cdf)
        assert pval > 0.001

        with model:
            ppc = pm.sample_ppc(trace, samples=10, size=5, vars=[a])
            assert ppc['a'].shape == (10, 5)

Exemple #23

0

Afficher le fichier

Fichier : test_plots.py Projet : AustinRochford/arviz

 def setup_class(cls):
     cls.data = eight_schools_params()
     models = load_cached_models(draws=500, chains=2)
     model, cls.short_trace = models['pymc3']
     with model:
         cls.sample_ppc = pm.sample_ppc(cls.short_trace, 100)
     cls.stan_model, cls.fit = models['pystan']
     cls.df_trace = DataFrame({'a': np.random.poisson(2.3, 100)})

Exemple #24

0

Afficher le fichier

Fichier : test_sampling.py Projet : aasensio/pymc3

    def test_normal_scalar(self):
        with pm.Model() as model:
            a = pm.Normal('a', mu=0, sd=1)
            trace = pm.sample()

        with model:
            ppc = pm.sample_ppc(trace, samples=1000, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_ppc(trace, samples=1000, vars=[a])
            assert 'a' in ppc
            assert ppc['a'].shape == (1000,)
        _, pval = stats.kstest(ppc['a'], stats.norm().cdf)
        assert pval > 0.001

        with model:
            ppc = pm.sample_ppc(trace, samples=10, size=5, vars=[a])
            assert ppc['a'].shape == (10, 5)

Exemple #25

0

Afficher le fichier

def production_step1():
    ann_input.set_value(X_test)
    ann_output.set_value(Y_test)
    with neural_network:
        ppc = pm.sample_ppc(trace, samples=500, progressbar=False)

    # Use probability of > 0.5 to assume prediction of class 1
    pred = ppc['out'].mean(axis=0) > 0.5

Exemple #26

0

Afficher le fichier

Fichier : active.py Projet : hans/spatial-survey

 def _sample_ppc(self):
     for assignment in range(self.k):
         with temp_set(self.assignment_shared,
                       [assignment] * self._assignment_shared_size):
             # TODO: remove magic "points"
             self._ppcs[assignment] = pm.sample_ppc(
                 self.orig_trace, samples=1000,
                 vars=self.query_vars)["points"]

Exemple #27

0

Afficher le fichier

    def get_posterior(self,
                      observed_data,
                      use_ppc_samples=config.USE_PPC_SAMPLES):

        # ------------------------------------------------- #
        # CREATE BERNOULLI INPUT FROM SUCCESS PROPORTION    #
        # ------------------------------------------------- #
        # Creates N_bernoulli_sims lots of (K x K) boolean grid mimicking
        # success rate in observed data. Required because pm.Bernoulli only
        # takes boolean data shaped (N x K x K) (this method is quite hacky)
        N_bernoulli_sims = 500
        data = np.ones((config.K, config.K, N_bernoulli_sims))
        change_to_zeros = np.round(
            (1 - self.p_wins) * N_bernoulli_sims).astype(int)
        data[change_to_zeros[:, :, None] > np.arange(data.shape[-1])] = 0
        data = np.transpose(data, (2, 0, 1))  # Swap axes
        data = np.take(data, np.random.rand(data.shape[0]).argsort(),
                       axis=0)  # Shuffle on N axis

        # ------------------------------------------------- #
        # USE PYMC3 TO CREATE POSTERIOR AND SAMPLE FROM IT  #
        # ------------------------------------------------- #
        model = pm.Model()
        with model:

            # Priors for unknown model parameters
            a = pm.Normal('a', mu=0, sd=10, shape=(config.K, 1))
            b = pm.Normal('b', mu=0, sd=10, shape=(1, config.K))
            offset = pm.Normal('offset', mu=0, sd=10)

            p = pm.Deterministic('p', self.sigmoid(a + b + offset))

            # Likelihood (sampling distribution) of observations
            # L = pm.Bernoulli('L', p=p, observed=data)
            L = pm.Binomial('L', self.N_data, p, observed=self.s_with_obs)

            # draw posterior samples
            trace = pm.sample(config.TRACE_LENGTH,
                              nuts_kwargs=dict(target_accept=.95),
                              chains=config.N_MCMC_CHAINS)

        if use_ppc_samples:
            # Use samples from ppc to obtain posterior point estimates
            ppc = pm.sample_ppc(trace,
                                samples=config.N_PPC_SAMPLES,
                                model=model)
            # y_post = np.mean(ppc['L'], axis=(0, 1))  # USE IF USING BERNOULLI
            y_post = np.mean(ppc['L'],
                             axis=0) / self.N_data  # USE IF USING BINOMIAL

        else:
            # Use trace to obtain posterior point estimates
            a_post = np.array(np.mean(trace[:100]['a'], axis=0))
            b_post = np.array(np.mean(trace[:100]['b'], axis=0))
            offset_post = np.mean(trace[:100]['offset'], axis=0)
            y_post = self.sigmoid(a_post + b_post + offset_post)

        return y_post, model, trace

Exemple #28

0

Afficher le fichier

Fichier : bayesian.py Projet : win2cs/pyfolio

def run_model(model, returns_train, returns_test=None,
              bmark=None, samples=500, ppc=False):
    """Run one of the Bayesian models.

    Parameters
    ----------
    model : {'alpha_beta', 't', 'normal', 'best'}
        Which model to run
    returns_train : pd.Series
        Timeseries of simple returns
    returns_test : pd.Series (optional)
        Out-of-sample returns. Datetimes in returns_test will be added to
        returns_train as missing values and predictions will be generated
        for them.
    bmark : pd.Series or pd.DataFrame (optional)
        Only used for alpha_beta to estimate regression coefficients.
        If bmark has more recent returns than returns_train, these dates
        will be treated as missing values and predictions will be
        generated for them taking market correlations into account.
    samples : int (optional)
        Number of posterior samples to draw.
    ppc : boolean (optional)
        Whether to run a posterior predictive check. Will generate
        samples of length returns_test.  Returns a second argument
        that contains the PPC of shape samples x len(returns_test).

    Returns
    -------
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    ppc : numpy.array (if ppc==True)
       PPC of shape samples x len(returns_test).

"""

    if model == 'alpha_beta':
        model, trace = model_returns_t_alpha_beta(returns_train,
                                                  bmark, samples)
    elif model == 't':
        model, trace = model_returns_t(returns_train, samples)
    elif model == 'normal':
        model, trace = model_returns_normal(returns_train, samples)
    elif model == 'best':
        model, trace = model_best(returns_train, returns_test, samples=samples)
    else:
        raise NotImplementedError(
            'Model {} not found.'
            'Use alpha_beta, t, normal, or best.'.format(model))

    if ppc:
        ppc_samples = pm.sample_ppc(trace, samples=samples,
                                    model=model, size=len(returns_test))
        return trace, ppc_samples['returns']

    return trace

Exemple #29

0

Afficher le fichier

Fichier : bayesian.py Projet : reebot/pyfolio

def run_model(model, returns_train, returns_test=None,
              bmark=None, samples=500, ppc=False):
    """Run one of the Bayesian models.

    Parameters
    ----------
    model : {'alpha_beta', 't', 'normal', 'best'}
        Which model to run
    returns_train : pd.Series
        Timeseries of simple returns
    returns_test : pd.Series (optional)
        Out-of-sample returns. Datetimes in returns_test will be added to
        returns_train as missing values and predictions will be generated
        for them.
    bmark : pd.Series or pd.DataFrame (optional)
        Only used for alpha_beta to estimate regression coefficients.
        If bmark has more recent returns than returns_train, these dates
        will be treated as missing values and predictions will be
        generated for them taking market correlations into account.
    samples : int (optional)
        Number of posterior samples to draw.
    ppc : boolean (optional)
        Whether to run a posterior predictive check. Will generate
        samples of length returns_test.  Returns a second argument
        that contains the PPC of shape samples x len(returns_test).

    Returns
    -------
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    ppc : numpy.array (if ppc==True)
       PPC of shape samples x len(returns_test).

"""

    if model == 'alpha_beta':
        model, trace = model_returns_t_alpha_beta(returns_train,
                                                  bmark, samples)
    elif model == 't':
        model, trace = model_returns_t(returns_train, samples)
    elif model == 'normal':
        model, trace = model_returns_normal(returns_train, samples)
    elif model == 'best':
        model, trace = model_best(returns_train, returns_test, samples=samples)
    else:
        raise NotImplementedError(
            'Model {} not found.'
            'Use alpha_beta, t, normal, or best.'.format(model))

    if ppc:
        ppc_samples = pm.sample_ppc(trace, samples=samples,
                                    model=model, size=len(returns_test))
        return trace, ppc_samples['returns']

    return trace

Exemple #30

0

Afficher le fichier

 def decision_function(self, X) -> np.ndarray:
     skutilvalid.check_is_fitted(self, ['model_'])
     X = self._check_X_predict(X)
     self.X_shared_.set_value(X)
     self.y_shared_.set_value(np.zeros(X.shape[0], dtype=np.int))
     with self.model_:
         post_pred = pm.sample_ppc(trace=self.trace_, samples=self.nsamplesPredict,
                                   progressbar=False)['y_obs'].mean(axis=0)
     return post_pred

Exemple #31

0

Afficher le fichier

    def test_vector_observed(self):
        with pm.Model() as model:
            mu = pm.Normal('mu', mu=0, sd=1)
            a = pm.Normal('a', mu=mu, sd=1, observed=np.array([0., 1.]))
            trace = pm.sample()

        with model:
            # test list input
            ppc0 = pm.sample_ppc([model.test_point], samples=10)
            ppc = pm.sample_ppc(trace, samples=10, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_ppc(trace, samples=10, vars=[a])
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 2)

            ppc = pm.sample_ppc(trace, samples=10, vars=[a], size=4)
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 4, 2)

Exemple #32

0

Afficher le fichier

def FitMyModel(trainDM, PredDM):
    with pm.Model() as model:

        # partition dataframes df
        Ydf = trainDM[0]
        TXdf = trainDM[1]

        PXdf = PredDM

        ## Parameters for linear predictor
        #b0 = pm.Normal('b0',mu=0,sd=10)
        #dum_names = filter(lambda col : str(col).startswith('inegiv5name'),TXdf)
        #dumsdf = TXdf[dum_names]
        #dumshape = dumscols.shape
        #coordsdf = TXdf[['Longitude','Latitude']]

        # Create vectors for dumi vars
        #drvs = map(lambda col : pm.Normal(col,mu=0,sd=1.5),dum_names)
        ## Create theano vector
        dimX = len(TXdf.columns)
        b = pm.Normal('b', mu=0, sd=1.5, shape=dimX)
        #mk = pm.math.matrix_dot(TXdf.values,b.transpose())

        ## The latent function
        x_index = TXdf.columns.get_loc(b"Longitude")
        y_index = TXdf.columns.get_loc(b"Latitude")

        ## Building the covariance structure
        tau = pm.HalfNormal('tau', sd=10)
        sigma = pm.HalfNormal('sigma', sd=10)
        #phi = pm.Uniform('phi',0,15)
        phi = pm.HalfNormal('phi', sd=6)
        Tau = pm.gp.cov.Constant(tau)
        cov = (sigma * pm.gp.cov.Matern32(
            2, phi, active_dims=[x_index, y_index])) + Tau

        mean_f = pm.gp.mean.Linear(coeffs=b)

        gp = pm.gp.Latent(mean_func=mean_f, cov_func=cov)

        f = gp.prior("latent_field", X=TXdf.values, reparameterize=False)

        yy = pm.Bernoulli("yy", logit_p=f, observed=Ydf.values)

        #trace = pm.fit(method='advi', callbacks=[CheckParametersConvergence()],n=15000)
        trace = pm.sample(150, init='adapt_diag')
        #trace = trace.sample(draws=5000)

        # Remove any column that doesnt appear in the training data
        ValidPreds = PredDM[TXdf.columns]
        PredX = ValidPreds.values

        f_star = gp.conditional("f_star", PredX)

        pred_samples = pm.sample_ppc(trace, vars=[f_star], samples=100)
        return pred_samples, trace

Exemple #33

0

Afficher le fichier

Fichier : test_distributions_random.py Projet : mmargenot/pymc3

def test_density_dist_with_random_sampleable():
    with pm.Model() as model:
        mu = pm.Normal('mu',0,1)
        normal_dist = pm.Normal.dist(mu, 1)
        pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100), random=normal_dist.random)
        trace = pm.sample(100)

    samples = 500
    ppc = pm.sample_ppc(trace, samples=samples, model=model, size=100)
    assert len(ppc['density_dist']) == samples

Exemple #34

0

Afficher le fichier

Fichier : test_ndarray_backend.py Projet : qinghsui/pymc

    def test_sample_ppc(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            ppc = pm.sample_ppc(self.trace)

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            trace2 = pm.load_trace(directory)
            ppc2 = pm.sample_ppc(trace2)

        for key, value in ppc.items():
            assert (value == ppc2[key]).all()

Exemple #35

0

Afficher le fichier

    def test_sample_ppc(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            ppc = pm.sample_ppc(self.trace)

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            trace2 = pm.load_trace(directory)
            ppc2 = pm.sample_ppc(trace2)

        for key, value in ppc.items():
            assert (value == ppc2[key]).all()

Exemple #36

0

Afficher le fichier

def test_density_dist_with_random_sampleable():
    with pm.Model() as model:
        mu = pm.Normal('mu',0,1)
        normal_dist = pm.Normal.dist(mu, 1)
        pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100), random=normal_dist.random)
        trace = pm.sample(100)

    samples = 500
    ppc = pm.sample_ppc(trace, samples=samples, model=model, size=100)
    assert len(ppc['density_dist']) == samples

Exemple #37

0

Afficher le fichier

Fichier : ecoreg_code.py Projet : JamesSample/ECOREG

def bayesian_t(df, val_col, grp_col='regulated',
               sig_fac=2, unif_l=0, unif_u=20,
               exp_mn=30, 
               plot_trace=False, plot_ppc=False,
               plot_vars=False, plot_diffs=True,
               steps=2000, mcmc='metropolis'):
    """ Simple Bayesian test for differences between two groups.
    
    Args:
        df         Dataframe. Must have a column containing values
                   and a categorical 'regulated' column that is [0, 1]
                   to define the two groups
        val_col    Name of the values column
        grp_col    Name of the categorical column defining the groups
        sig_fac    Factor applied to std. dev. of pooled data to define
                   prior std. dev. for group means
        unif_l     Lower bound for uniform prior on std. dev. of group
                   means
        unif_u     Upper bound for uniform prior on std. dev. of group
                   means
        exp_mn     Mean of exponential prior for v in Student-T 
                   distribution
        plot_trace Whether to plot the MCMC traces
        plot_ppc   Whether to perform and plot the Posterior Predictive
                   Check 
        plot_vars  Whether to plot posteriors for variables
        plot_diffs Whether to plot posteriors for differences
        steps      Number of steps to take in MCMC chains
        mcmc       Sampler to use: ['metropolis', 'slice', 'nuts']
    
    Returns:
        Creates plots showing the distribution of differences in 
        means and variances, plus optional diagnostics. Returns the 
        MCMC trace
    """
    import numpy as np
    import pymc3 as pm
    import pandas as pd
    import seaborn as sn
    import matplotlib.pyplot as plt

    # Get overall means and s.d.
    mean_all = df[val_col].mean()
    std_all = df[val_col].std()

    # Group data
    grpd = df.groupby(grp_col)
    
    # Separate groups
    reg_data = grpd.get_group(1)[val_col].values
    ureg_data = grpd.get_group(0)[val_col].values   

    # Setup model
    with pm.Model() as model:
        # Priors for means of Student-T dists
        reg_mean = pm.Normal('regulated_mean', mu=mean_all, sd=std_all*sig_fac)
        ureg_mean = pm.Normal('unregulated_mean', mu=mean_all, sd=std_all*sig_fac)

        # Priors for std. dev. of Student-T dists
        reg_std = pm.Uniform('regulated_std', lower=unif_l, upper=unif_u)
        ureg_std = pm.Uniform('unregulated_std', lower=unif_l, upper=unif_u)

        # Prior for v of Student-T dists
        nu = pm.Exponential('v_minus_one', 1./29.) + 1

        # Define Student-T dists
        # PyMC3 uses precision = 1 / (sd^2) to define dists rather than std. dev.
        reg_lam = reg_std**-2
        ureg_lam = ureg_std**-2

        reg = pm.StudentT('regulated', nu=nu, mu=reg_mean, lam=reg_lam, observed=reg_data)
        ureg = pm.StudentT('unregulated', nu=nu, mu=ureg_mean, lam=ureg_lam, observed=ureg_data)

        # Quantities of interest (difference of means and std. devs.)
        diff_of_means = pm.Deterministic('difference_of_means', reg_mean - ureg_mean)
        diff_of_stds = pm.Deterministic('difference_of_stds', reg_std - ureg_std)
        
        # Run sampler to approximate posterior
        if mcmc == 'metropolis':
            trace = pm.sample(steps, step=pm.Metropolis())
        elif mcmc == 'slice':
            trace = pm.sample(steps, step=pm.Slice())
        elif mcmc == 'nuts':
            trace = pm.sample(steps)
        else:
            raise ValueError("mcmc must be one of ['metropolis', 'slice', 'nuts']")

    # Plot results
    # Raw data
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,4))
    
    for name, grp in grpd:
        sn.distplot(grp[val_col].values, ax=axes[name], kde=False)
        axes[name].set_title('Regulated = %s' % name)        

    # Traces
    if plot_trace:
        pm.traceplot(trace)
    
    # Posteriors for variables
    if plot_vars:
        pm.plot_posterior(trace[1000:],
                          varnames=['regulated_mean', 'unregulated_mean', 
                                    'regulated_std', 'unregulated_std'],
                          alpha=0.3)

    # Posteriors for differences
    if plot_diffs:
        pm.plot_posterior(trace[1000:],
                          varnames=['difference_of_means', 'difference_of_stds'],
                          ref_val=0,
                          alpha=0.3)
        
    # Posterior predictive check
    if plot_ppc:
        ppc = pm.sample_ppc(trace, samples=500, model=model, size=100)

        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,4))

        sn.distplot([n.mean() for n in ppc['unregulated']], ax=axes[0])
        axes[0].axvline(ureg_data.mean(), c='k')
        axes[0].set(title='Posterior predictive of the mean (unregulated)', 
                    xlabel='Mean', 
                    ylabel='Frequency')

        sn.distplot([n.mean() for n in ppc['regulated']], ax=axes[1])
        axes[1].axvline(reg_data.mean(), c='k')
        axes[1].set(title='Posterior predictive of the mean (regulated)', 
                    xlabel='Mean', 
                    ylabel='Frequency')
    
    return trace

Exemple #38

0

Afficher le fichier

Fichier : axial_ordered.py Projet : aasensio/axial_ratio

    def sample(self, name_chain, noncentered=False):
        """
        Sample from the hierarchical model
        p(mu) ~ U(0,1)
        p(phi) ~ U(0,pi)
        p(muB) ~ U(0.0, 1.0)
        p(muC) ~ U(0.0, 1.0)
        p(sigmaB) ~ HN(sd=1)
        p(sigmaC) ~ HN(sd=1)
        p(B|muB,sigmaB) ~ BoundedNormal(mu=muB, sd=sigmaB, lower=0, upper=1)
        p(C|muC,sigmaC) ~ BoundedNormal(mu=muC, sd=sigmaC, lower=0, upper=1)
        qobs ~ N(mu=q, sd=noise)
        q = f(B,C,mu,phi)

        """

        self.name_chain = name_chain

        A = 1.0

# Define the probabilistic model
        self.model = pm.Model()
        with self.model:

# Priors for orientation
            mu = pm.Uniform('mu', lower=0, upper=1.0, testval=0.5, shape=self.n_galaxies)
            phi = pm.Uniform('phi', lower=0, upper=np.pi / 2.0, testval=0.1, shape=self.n_galaxies)

# Priors for means and standard deviations. Perhaps one should play a little with the
# priors for sdB and sdC because they are usually not very well constrained by data
            muCB_ = pm.Uniform('muCB_', lower=0.0, upper=1.0, testval=[0.3, 0.8], shape=2)
            muCB = pm.Deterministic('muCB', tt.sort(muCB_))

            sdCB = pm.HalfNormal('sdCB', sd=0.05, shape=2)

# Use a non-centered model (http://twiecki.github.io/blog/2017/02/08/bayesian-hierchical-non-centered/)
            if (noncentered):
                offset = pm.Normal('offset', mu=0, sd=1, shape=(self.n_galaxies,2))
                CB_ = pm.Deterministic('CB_', tt.clip(muCB + offset * sdCB, 0.0, 1.0))
                CB = pm.Deterministic('CB', tt.sort(CB_, axis=1))
            else:                
                bounded_normal = pm.Bound(pm.Normal, lower=0.0, upper=1.0)
                CB_ = bounded_normal('CB_', mu=muCB, sd=sdCB, testval=np.array([0.3,0.8]), shape=(self.n_galaxies,2))                        
                CB = pm.Deterministic('CB', tt.sort(CB_, axis=1))
          
# Now that we have all ingredients, compute q
            sin_theta = tt.sqrt(1.0 - mu**2)
            f = ( A*CB[:,0]*sin_theta*tt.cos(phi) )**2 + ( CB[:,1]*CB[:,0]*sin_theta*tt.sin(phi) )**2 + ( A*CB[:,1]*mu )**2 
            g = A*A * (tt.cos(phi)**2 + mu**2 * tt.sin(phi)**2) + \
                CB[:,1]*CB[:,1] * (tt.sin(phi)**2 + mu**2 * tt.cos(phi)**2) + CB[:,0]*CB[:,0] * sin_theta**2    

            h = tt.sqrt(  (g - 2 * tt.sqrt(f)) / (g + 2 * tt.sqrt(f))  )
            q = (1 - h) / (1 + h)

# And define the normal likelihood
            qobs = pm.Normal('qobs', mu=q, sd=self.sigmaq, observed=self.qobs, shape=self.n_galaxies)

# Finally sample from the posterior and use a CSV backend for later plots
            db = pm.backends.Text(self.name_chain)
            self.trace = pm.sample(chains=4, trace=db)
            self.ppc = pm.sample_ppc(self.trace, samples=500, model=self.model, size=100)

Exemple #39

0

Afficher le fichier

Fichier : Neural Networks in PyMC3.py Projet : balarsen/pymc_learning

plt.plot(v_params.elbo_vals)
plt.ylabel('ELBO')
plt.xlabel('iteration')


# Now that we trained our model, lets predict on the hold-out set using a posterior predictive check (PPC). We use `sample_ppc() <http://pymc-devs.github.io/pymc3/api.html#pymc3.sampling.sample_ppc>`__ to generate new data (in this case class predictions) from the posterior (sampled from the variational estimation).

# In[11]:


# Replace shared variables with testing set
ann_input.set_value(X_test)
ann_output.set_value(Y_test)

# Creater posterior predictive samples
ppc = pm.sample_ppc(trace, model=neural_network, samples=500)

# Use probability of > 0.5 to assume prediction of class 1
pred = ppc['out'].mean(axis=0) > 0.5


# In[12]:

fig, ax = plt.subplots()
ax.scatter(X_test[pred==0, 0], X_test[pred==0, 1])
ax.scatter(X_test[pred==1, 0], X_test[pred==1, 1], color='r')
sns.despine()
ax.set(title='Predicted labels in testing set', xlabel='X', ylabel='Y');


# In[13]: