コード例 #1
0
    def get_predictions(self, data=None, trace=None):
        if data is None:
            data = self.make_example_data()

        if trace is None:
            trace = self.trace

        if not hasattr(self, 'model'):
            raise Exception(
                'Model was not built yet! First run model.build_model()')

        model_input = self._get_model_input(data)

        with self.model:
            pm.set_data(model_input)

            posterior_predictive = pm.sample_posterior_predictive(
                trace, var_names=["p"])
            model_preds = posterior_predictive["p"]

        model_preds = pd.DataFrame(model_preds,
                                   index=pd.Index(np.arange(len(model_preds)),
                                                  name='sample'),
                                   columns=data.index)
        model_preds = pd.concat((model_preds.T, data),
                                1).set_index(data.columns.tolist(),
                                             append=True)
        model_preds.columns.name = 'sample'

        return model_preds.stack().to_frame('p_predicted').reset_index()
コード例 #2
0
    def test_sample_after_set_data(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
            pm.sample(
                1000,
                init=None,
                tune=1000,
                chains=1,
                compute_convergence_checks=False,
            )
        # Predict on new data.
        new_x = [5.0, 6.0, 9.0]
        new_y = [5.0, 6.0, 9.0]
        with model:
            pm.set_data(new_data={"x": new_x, "y": new_y})
            new_idata = pm.sample(
                1000,
                init=None,
                tune=1000,
                chains=1,
                compute_convergence_checks=False,
            )
            pp_trace = pm.sample_posterior_predictive(new_idata, 1000)

        assert pp_trace["obs"].shape == (1000, 3)
        np.testing.assert_allclose(new_y,
                                   pp_trace["obs"].mean(axis=0),
                                   atol=1e-1)
コード例 #3
0
    def test_shared_data_as_index(self):
        """
        Allow pm.Data to be used for index variables, i.e with integers as well as floats.
        See https://github.com/pymc-devs/pymc3/issues/3813
        """
        with pm.Model() as model:
            index = pm.Data("index", [2, 0, 1, 0, 2])
            y = pm.Data("y", [1.0, 2.0, 3.0, 2.0, 1.0])
            alpha = pm.Normal("alpha", 0, 1.5, shape=3)
            pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y)

            prior_trace = pm.sample_prior_predictive(1000, var_names=["alpha"])
            trace = pm.sample(1000, init=None, tune=1000, chains=1)

        # Predict on new data
        new_index = np.array([0, 1, 2])
        new_y = [5.0, 6.0, 9.0]
        with model:
            pm.set_data(new_data={"index": new_index, "y": new_y})
            pp_trace = pm.sample_posterior_predictive(
                trace, 1000, var_names=["alpha", "obs"])
            pp_tracef = pm.fast_sample_posterior_predictive(
                trace, 1000, var_names=["alpha", "obs"])

        assert prior_trace["alpha"].shape == (1000, 3)
        assert trace["alpha"].shape == (1000, 3)
        assert pp_trace["alpha"].shape == (1000, 3)
        assert pp_trace["obs"].shape == (1000, 3)
        assert pp_tracef["alpha"].shape == (1000, 3)
        assert pp_tracef["obs"].shape == (1000, 3)
コード例 #4
0
    def test_shared_data_as_rv_input(self):
        """
        Allow pm.Data to be used as input for other RVs.
        See https://github.com/pymc-devs/pymc3/issues/3842
        """
        with pm.Model() as m:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            _ = pm.Normal("y", mu=x, shape=3)
            trace = pm.sample(chains=1)

        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]),
                                   x.get_value(),
                                   atol=1e-1)
        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]),
                                   trace["y"].mean(0),
                                   atol=1e-1)

        with m:
            pm.set_data({"x": np.array([2.0, 4.0, 6.0])})
            trace = pm.sample(chains=1)

        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]),
                                   x.get_value(),
                                   atol=1e-1)
        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]),
                                   trace["y"].mean(0),
                                   atol=1e-1)
コード例 #5
0
 def test_set_data_to_non_data_container_variables(self):
     with pm.Model() as model:
         x = np.array([1.0, 2.0, 3.0])
         y = np.array([1.0, 2.0, 3.0])
         beta = pm.Normal("beta", 0, 10.0)
         pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
         pm.sample(1000, init=None, tune=1000, chains=1)
     with pytest.raises(TypeError) as error:
         pm.set_data({"beta": [1.1, 2.2, 3.3]}, model=model)
     error.match("defined as `pymc3.Data` inside the model")
コード例 #6
0
def get_count_percentiles(
    counts: ndarray,
    percentiles: Sequence[float],
    lower_mu: float,
    upper_mu: float,
    num_samples: int = 2000,
) -> ndarray:
    """
    For each count in an array of counts, this function:
    * fits a poisson model to the count
    * calculates the percentiles from the trace

    This is useful for eg. plotting a graph that gives a notion of confidence;
    we can plot the 25, 50, 75 percentile to easily do this

    Parameters
    ----------

    counts :
        1 - dimensional array of counts
    percentiles :
        sequence of percentiles to be calculated
    lower_mu :
        lower bound for uniform mu prior
    upper_mu :
        upper bound for uniform mu prior
    num_samples :
        samples to extract from the mcmc chain

    Returns
    -------

    ndarray
        (num_percentiles, num_count) - dimensional array
    """

    model = make_poisson_model()
    res = []

    for count in counts:
        data = {
            "lower_mu": lower_mu,
            "upper_mu": upper_mu,
            "counts": [count],
        }

        with model:
            pm.set_data(data)
            trace = pm.sample(num_samples, progressbar=False)
            mus = trace["mu"]
            out = np.percentile(mus, percentiles)
            res.append(out)
    arr = np.array(res)
    return arr.transpose()
コード例 #7
0
def production_step1():
    pm.set_data(new_data={
        'ann_input': X_test,
        'ann_output': Y_test
    },
                model=neural_network)
    ppc = pm.sample_posterior_predictive(trace,
                                         samples=500,
                                         progressbar=False,
                                         model=neural_network)

    # Use probability of > 0.5 to assume prediction of class 1
    pred = ppc['out'].mean(axis=0) > 0.5
コード例 #8
0
ファイル: mlda.py プロジェクト: AlexAndorra/pymc3
    def update_error_estimate(self, accepted, skipped_logp):
        """Updates the adaptive error model estimate with
        the latest accepted forward model output difference. Also
        updates the model variables mu_B and Sigma_B.

        The current level estimates and stores the error
        model between the current level and the level below."""

        # only save errors when a sample is accepted (excluding skipped_logp)
        if accepted and not skipped_logp:
            # this is the error (i.e. forward model output difference)
            # between the current level's model and the model in the level below
            self.last_synced_output_diff = (
                self.model.model_output.get_value() - self.model_below.model_output.get_value()
            )
            self.adaptation_started = True

        if self.adaptation_started:
            # update the internal recursive bias estimator with the last saved error
            self.bias.update(self.last_synced_output_diff)
            # Update the model variables in the level below the current one.
            # Each level has its own bias correction (i.e. bias object) that
            # estimates the error between that level and the one below.
            # The model variables mu_B and Signa_B of a level are the
            # sum of the bias corrections of all levels below and including
            # that level. This sum is updated here.
            with self.model_below:
                pm.set_data(
                    {
                        "mu_B": sum(
                            [
                                bias.get_mu()
                                for bias in self.bias_all[
                                    : len(self.bias_all) - self.num_levels + 2
                                ]
                            ]
                        )
                    }
                )
                pm.set_data(
                    {
                        "Sigma_B": sum(
                            [
                                bias.get_sigma()
                                for bias in self.bias_all[
                                    : len(self.bias_all) - self.num_levels + 2
                                ]
                            ]
                        )
                    }
                )
コード例 #9
0
 def test_set_data_to_non_data_container_variables(self):
     with pm.Model() as model:
         x = np.array([1.0, 2.0, 3.0])
         y = np.array([1.0, 2.0, 3.0])
         beta = pm.Normal("beta", 0, 10.0)
         pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
         pm.sample(
             1000,
             init=None,
             tune=1000,
             chains=1,
             compute_convergence_checks=False,
         )
     with pytest.raises(TypeError) as error:
         pm.set_data({"beta": [1.1, 2.2, 3.3]}, model=model)
     error.match("The variable `beta` must be a `SharedVariable`")
コード例 #10
0
def pp_test(mod_name, trace, dct, params):
    """posterior predictive on unseen data. 

    Args:
        mod_name (pymc3.model.Model): pymc3 model object. 
        trace (arviz.data.inference_data.InferenceData): Model trace (from pm.trace())
        dct (dict): Dictionary with variables to change (e.g. x variable). 
        params (list): Parameters (e.g. alpha, beta, y_pred)

    Returns:
        dict: dictionary with predictive draws for each parameter specified.
    """
    with mod_name:
        pm.set_data(dct)
        predictions = pm.sample_posterior_predictive(trace, var_names=params)
        return predictions
コード例 #11
0
ファイル: phe_old.py プロジェクト: mkefly/Simulations_COVID19
    def sample_posterior_predictive_model(self, method = 'log-model', field = 'deaths', samples = 1000, number_days = 100, **kwargs):
        models, _ = self.sample_model(method = method, field = field, **kwargs)
        self.post_preds = {}
        self.post_preds[method] = {}
        for country in self.countries:
            with model:

                # Update data so that we get predictions into the future
                x_data = np.arange(0, number_days)
                y_data = np.array([np.nan] * len(x_data))
                pm.set_data({"x": x_data})
                pm.set_data({"y": y_data})

                # Sample posterior predictive
                self.post_preds[method][country] = pm.sample_posterior_predictive(self.traces[method][country], samples = samples)
        return self.post_preds
コード例 #12
0
    def test_sample_posterior_predictive_after_set_data(self):
        with pm.Model() as model:
            x = pm.Data('x', [1., 2., 3.])
            y = pm.Data('y', [1., 2., 3.])
            beta = pm.Normal('beta', 0, 10.)
            pm.Normal('obs', beta * x, np.sqrt(1e-2), observed=y)
            trace = pm.sample(1000, tune=1000, chains=1)
        # Predict on new data.
        with model:
            x_test = [5, 6, 9]
            pm.set_data(new_data={'x': x_test})
            y_test = pm.sample_posterior_predictive(trace)

        assert y_test['obs'].shape == (1000, 3)
        np.testing.assert_allclose(x_test, y_test['obs'].mean(axis=0),
                                   atol=1e-1)
コード例 #13
0
    def test_poisson_model(self):
        model = make_poisson_model()
        data = {
            "lower_mu": 0,
            "upper_mu": 1000,
            "counts": np.array([100 for _ in range(20)]),
        }

        with model:
            pm.set_data(data)
            trace = pm.sample(5000)
            mus = trace["mu"]
            a, b, c = np.percentile(mus, [5, 50, 95])

            self.assertTrue(a < 100)
            self.assertTrue(c > 100)
            self.assertTrue(abs(b - 100) < 10)
コード例 #14
0
    def test_shared_data_as_rv_input(self):
        """
        Allow pm.Data to be used as input for other RVs.
        See https://github.com/pymc-devs/pymc3/issues/3842
        """
        with pm.Model() as m:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Normal("y", mu=x, size=(2, 3))
            assert y.eval().shape == (2, 3)
            idata = pm.sample(
                chains=1,
                tune=500,
                draws=550,
                return_inferencedata=True,
                compute_convergence_checks=False,
            )
        samples = idata.posterior["y"]
        assert samples.shape == (1, 550, 2, 3)

        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]),
                                   x.get_value(),
                                   atol=1e-1)
        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]),
                                   samples.mean(("chain", "draw", "y_dim_0")),
                                   atol=1e-1)

        with m:
            pm.set_data({"x": np.array([2.0, 4.0, 6.0])})
            assert y.eval().shape == (2, 3)
            idata = pm.sample(
                chains=1,
                tune=500,
                draws=620,
                return_inferencedata=True,
                compute_convergence_checks=False,
            )
        samples = idata.posterior["y"]
        assert samples.shape == (1, 620, 2, 3)

        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]),
                                   x.get_value(),
                                   atol=1e-1)
        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]),
                                   samples.mean(("chain", "draw", "y_dim_0")),
                                   atol=1e-1)
コード例 #15
0
    def test_sample_after_set_data(self):
        with pm.Model() as model:
            x = pm.Data('x', [1., 2., 3.])
            y = pm.Data('y', [1., 2., 3.])
            beta = pm.Normal('beta', 0, 10.)
            pm.Normal('obs', beta * x, np.sqrt(1e-2), observed=y)
            pm.sample(1000, init=None, tune=1000, chains=1)
        # Predict on new data.
        new_x = [5, 6, 9]
        new_y = [5, 6, 9]
        with model:
            pm.set_data(new_data={'x': new_x, 'y': new_y})
            new_trace = pm.sample()
            pp_trace = pm.sample_posterior_predictive(new_trace, 1000)

        assert pp_trace['obs'].shape == (1000, 3)
        np.testing.assert_allclose(new_y, pp_trace['obs'].mean(axis=0),
                                   atol=1e-1)
コード例 #16
0
    def test_sample_posterior_predictive_after_set_data(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
            trace = pm.sample(1000, tune=1000, chains=1)
        # Predict on new data.
        with model:
            x_test = [5, 6, 9]
            pm.set_data(new_data={"x": x_test})
            y_test = pm.sample_posterior_predictive(trace)
            y_test1 = pm.fast_sample_posterior_predictive(trace)

        assert y_test["obs"].shape == (1000, 3)
        assert y_test1["obs"].shape == (1000, 3)
        np.testing.assert_allclose(x_test,
                                   y_test["obs"].mean(axis=0),
                                   atol=1e-1)
        np.testing.assert_allclose(x_test,
                                   y_test1["obs"].mean(axis=0),
                                   atol=1e-1)
コード例 #17
0
    def sample_mod(
        self, 
        posterior_draws = 2000, # this is not enough
        post_pred_draws = 1000,
        prior_pred_draws = 1000,
        random_seed = 42,
        chains = 2):
        """Sample the posterior, the posterior predictive and the prior predictive distribution.

        Args:
            posterior_draws (int, optional): Number of draws for the posterior. Defaults to 2000.
            prior_pred_draws (int, optional): Number of draws for the prior predictive distribution. Defaults to 1000.
            post_pred_draws (int, optional): Number of draws from the posterior predictive distribution. Defaults to 1000.
            random_seed (int, optional): Random seed for ensuring reproducibility. Defaults to 42.
            chains (int, optional): Number of chains used for sampling the posterior. Defaults to 2.

        Example:
            Pc.sample_mod(posterior_draws = 3000, post_pred_draws = 1500, prior_pred_draws = 55, random_seed = 13, chains = 4)
        """        

        # we need these for later
        self.posterior_draws = posterior_draws
        self.post_pred_draws = post_pred_draws
        self.prior_pred_draws = prior_pred_draws
        
        with self.model: 
            self.trace = pm.sample(
                return_inferencedata = False, 
                draws = posterior_draws,
                target_accept = .99,
            random_seed = random_seed,
            chains = chains) #hard set to 42
            self.post_pred = pm.sample_posterior_predictive(self.trace, samples = post_pred_draws)
            self.prior_pred = pm.sample_prior_predictive(samples = prior_pred_draws)
            self.m_idata = az.from_pymc3(trace = self.trace, posterior_predictive=self.post_pred, prior=self.prior_pred)

        with self.model:
            pm.set_data({"t1_shared": self.t1_test})
            pm.set_data({"t2_shared": self.t2_test})
            pm.set_data({"idx_shared": self.idx_test})
            pm.set_data({"t3_shared": np.array(self.t3_test)})
            predictions = pm.fast_sample_posterior_predictive(
                self.m_idata.posterior
            )
            az.from_pymc3_predictions(
                predictions, 
                idata_orig = self.m_idata,
                coords = {'idx': self.test[self.index].values},
                inplace = True)
コード例 #18
0
def test(neural_network,
         approx,
         X_test,
         Y_test,
         ppc_file,
         trace_samples=5000,
         pred_samples=5000):
    trace = approx.sample(draws=trace_samples)
    pm.set_data(new_data={
        'ann_input': X_test,
        'ann_output': Y_test
    },
                model=neural_network)
    ppc = pm.sample_posterior_predictive(trace,
                                         samples=pred_samples,
                                         progressbar=True,
                                         model=neural_network)

    with open(ppc_file, "wb") as f:
        pickle.dump(ppc, f, pickle.HIGHEST_PROTOCOL)

    return ppc
コード例 #19
0
 def predict(self): ## make this work for only one. 
     
     with self.model:
         pm.set_data({"t1_shared": self.t1_test})
         pm.set_data({"t2_shared": self.t2_test})
         pm.set_data({"idx_shared": self.idx_test})
         pm.set_data({"t3_shared": np.array(self.t3_test)})
         predictions = pm.fast_sample_posterior_predictive(
             self.m_idata.posterior
         )
         az.from_pymc3_predictions(
             predictions, 
             idata_orig = self.m_idata,
             coords = {'idx': self.test[self.index].values},
             inplace = True)
コード例 #20
0
ファイル: richards_pymc3.py プロジェクト: jorivero83/covid19
            """.format(country, *f_values)
    print(txt)

    az.plot_trace(trace, compact=True)
    plt.savefig('results/{}/trace_plot.png'.format(country))
    az.plot_posterior(trace)
    plt.savefig('results/{}/posterior_plot.png'.format(country))

    # ========== Compute predictions =============

    h = 7  # number points to prediction ahead
    with richards_model_final:
        # Update data so that we get predictions into the future
        x_data = np.arange(0, len(y_values) + h)
        y_data = np.array([np.nan] * len(x_data))
        pm.set_data({"x_data": x_data})
        pm.set_data({"y_data": y_data})

        # Sample posterior predictive
        post_pred_final = pm.sample_posterior_predictive(trace, samples=100)

    y_min_final = np.percentile(post_pred_final['y'], 2.5, axis=0)
    y_max_final = np.percentile(post_pred_final['y'], 97.5, axis=0)
    y_fit_final = np.percentile(post_pred_final['y'], 50, axis=0)
    dy_fit_final = np.percentile(trace['rate'], 50, axis=0) * y_fit_final * (
        1 - (y_fit_final / np.percentile(trace['K'], 50, axis=0))**
        np.percentile(trace['a'], 50, axis=0))

    # Plot prediction of comulative cases
    #ymax_limit = max(max(y_fit_final), df.acumulado.astype('float64').max()) * 1.10
    yref_ycoord_0 = min(np.median(y_fit_final), df.acumulado.median()) * 0.6
コード例 #21
0
"""After fitting the Bayesian Neural Network on 5000 samples, I've drawn 100 more based on which there were done predictions for the training examples. On the training set, the model scored 0.96 accuracy, while on the test set the same model previously trained scored 0.97.
(see figure 1 attached in the email). 

* The results are taken from running this code locally.

Predict labels for the training set.
"""

predictions = pm.sample_ppc(mcmc, model=neural_network, samples=100) 
y_pred = predictions['out']

print ("[MCMC] Train set accuracy binary classification:", accuracy_score(y_train, y_pred[99]))

"""Predict labels for the testing set."""

pm.set_data(new_data={'input_data': X_test, 'output_data': y_test}, model=neural_network)
predictions = pm.sample_ppc(mcmc, samples=100, model=neural_network)
y_pred = predictions['out']

print ("[MCMC] Test set accuracy binary classification:", accuracy_score(y_test, y_pred[99]))

"""### Sanity Check

The following plots represent the posterior values for the weights between the input layer and the hidden layer, and the hidden layer and the output unit. It can be noticed that those values finely represent the normal distribution that was implied at the beginning.

Each color represents one of the units from the hidden layer, thus 8 colors.
"""

plt.figure() 
plt.hist(mcmc['w_in_1'][999][:][:])
plt.title("Posteriori of w_in_1")
コード例 #22
0
def bayesian_model_comparison(df):
    # Preprocess
    df["log_v"] = log_electricity = np.log(df["total_electricity"]).values
    total_electricity = df.total_electricity.values

    # Create local variables (assign daypart, cluster and weekday values need to start from 0)
    # clusters are use profile categories, heat_clusters and cool_clusters indicate days having similar
    # temperature dependence (likely to modify this in the new version of the preprocessing)

    df.t = pd.to_datetime(pd.Series(df.t))
    df.s = df.s - 1
    df.weekday = df.weekday - 1
    clusters = df.s
    unique_clusters = clusters.unique()
    dayparts = df.daypart
    weekdays = df.weekday
    unique_dayparts = dayparts.unique()
    unique_weekdays = weekdays.unique()
    n_hours = len(df.index)
    outdoor_temp_c = df.outdoor_temp_c
    outdoor_temp_h = df.outdoor_temp_h
    outdoor_temp_lp_c = df.outdoor_temp_lp_c
    outdoor_temp_lp_h = df.outdoor_temp_lp_h
    daypart_fs_sin_1 = df.daypart_fs_sin_1
    daypart_fs_sin_2 = df.daypart_fs_sin_2
    daypart_fs_sin_3 = df.daypart_fs_sin_3
    daypart_fs_cos_1 = df.daypart_fs_cos_1
    daypart_fs_cos_2 = df.daypart_fs_cos_2
    daypart_fs_cos_3 = df.daypart_fs_cos_3

    # create coords for pymc3
    coords = {"obs_id": np.arange(total_electricity.size)}
    coords["profile_cluster"] = unique_clusters
    coords["daypart"] = unique_dayparts
    coords["weekday"] = unique_weekdays

    # Create kfold cross-validation splits

    kf = KFold(n_splits=5)
    kf.get_n_splits(df)

    # Create arrays to save model results
    partial_pool_cvrmse_list = []
    no_pool_cvrmse_list = []
    complete_pool_cvrmse_list = []

    partial_pool_coverage_list = []
    no_pool_coverage_list = []
    complete_pool_coverage_list = []

    for train_index, test_index in kf.split(df):
        coords = {"obs_id": np.arange(total_electricity[train_index].size)}
        coords["profile_cluster"] = unique_clusters
        coords["daypart"] = unique_dayparts
        coords["weekday"] = unique_weekdays

        # Partial Pooling

        with pm.Model(coords=coords) as partial_pooling:
            profile_cluster_idx = pm.Data("profile_cluster_idx",
                                          clusters[train_index],
                                          dims="obs_id")
            daypart = pm.Data("daypart", dayparts[train_index], dims="obs_id")
            weekday = pm.Data("weekday", weekdays[train_index], dims="obs_id")

            fs_sin_1 = pm.Data("fs_sin_1",
                               daypart_fs_sin_1[train_index],
                               dims="obs_id")
            fs_sin_2 = pm.Data("fs_sin_2",
                               daypart_fs_sin_2[train_index],
                               dims="obs_id")
            fs_sin_3 = pm.Data("fs_sin_3",
                               daypart_fs_sin_3[train_index],
                               dims="obs_id")

            fs_cos_1 = pm.Data("fs_cos_1",
                               daypart_fs_cos_1[train_index],
                               dims="obs_id")
            fs_cos_2 = pm.Data("fs_cos_2",
                               daypart_fs_cos_2[train_index],
                               dims="obs_id")
            fs_cos_3 = pm.Data("fs_cos_3",
                               daypart_fs_cos_3[train_index],
                               dims="obs_id")

            # cooling_temp = pm.Data("cooling_temp", outdoor_temp_c[train_index], dims="obs_id")
            # heating_temp = pm.Data("heating_temp", outdoor_temp_h[train_index], dims="obs_id")
            cooling_temp_lp = pm.Data("cooling_temp_lp",
                                      outdoor_temp_lp_c[train_index],
                                      dims="obs_id")
            heating_temp_lp = pm.Data("heating_temp_lp",
                                      outdoor_temp_lp_h[train_index],
                                      dims="obs_id")

            # Hyperpriors:
            bf = pm.Normal("bf", mu=0.0, sigma=1.0)
            sigma_bf = pm.Exponential("sigma_bf", 1.0)
            a = pm.Normal("a", mu=0.0, sigma=1.0)
            sigma_a = pm.Exponential("sigma_a", 1.0)

            # btc = pm.Normal("btc", mu=0.0, sigma=1.0, dims="daypart")
            # bth = pm.Normal("bth", mu=0.0, sigma=1.0, dims="daypart")

            btclp = pm.Normal("btclp", mu=0.0, sigma=1.0, dims="daypart")
            bthlp = pm.Normal("bthlp", mu=0.0, sigma=1.0, dims="daypart")

            # Varying intercepts
            a_cluster = pm.Normal("a_cluster",
                                  mu=a,
                                  sigma=sigma_a,
                                  dims=("daypart", "profile_cluster"))

            # Varying slopes:
            bs1 = pm.Normal("bs1",
                            mu=bf,
                            sigma=sigma_bf,
                            dims=("profile_cluster"))
            bs2 = pm.Normal("bs2",
                            mu=bf,
                            sigma=sigma_bf,
                            dims=("profile_cluster"))
            bs3 = pm.Normal("bs3",
                            mu=bf,
                            sigma=sigma_bf,
                            dims=("profile_cluster"))

            bc1 = pm.Normal("bc1",
                            mu=bf,
                            sigma=sigma_bf,
                            dims=("profile_cluster"))
            bc2 = pm.Normal("bc2",
                            mu=bf,
                            sigma=sigma_bf,
                            dims=("profile_cluster"))
            bc3 = pm.Normal("bc3",
                            mu=bf,
                            sigma=sigma_bf,
                            dims=("profile_cluster"))

            # Expected value per county:
            mu = a_cluster[daypart, profile_cluster_idx] + bs1[profile_cluster_idx] * fs_sin_1 + \
                 bs2[profile_cluster_idx] * fs_sin_2 + bs3[profile_cluster_idx] * fs_sin_3 + \
                 bc1[profile_cluster_idx] * fs_cos_1 + bc2[profile_cluster_idx] * fs_cos_2 + \
                 bc3[profile_cluster_idx] * fs_cos_3 + \
                 btclp[daypart] * cooling_temp_lp + \
                 bthlp[daypart] * heating_temp_lp
            # btc[daypart] * cooling_temp + bth[daypart] * heating_temp + \

            # Model error:
            sigma = pm.Exponential("sigma", 1.0)

            # Likelihood
            y = pm.Normal("y",
                          mu,
                          sigma=sigma,
                          observed=log_electricity[train_index],
                          dims="obs_id")

        # Fitting
        with partial_pooling:
            approx = pm.fit(
                n=50000,
                method='fullrank_advi',
                callbacks=[CheckParametersConvergence(tolerance=0.01)])
            partial_pooling_trace = approx.sample(1000)

        # Sampling from the posterior setting test data to check the predictions on unseen data

        with partial_pooling:
            pm.set_data({
                "profile_cluster_idx": clusters[test_index],
                "daypart": dayparts[test_index],  # "weekday":weekdays,
                "fs_sin_1": daypart_fs_sin_1[test_index],
                "fs_sin_2": daypart_fs_sin_2[test_index],
                "fs_sin_3": daypart_fs_sin_3[test_index],
                "fs_cos_1": daypart_fs_cos_1[test_index],
                "fs_cos_2": daypart_fs_cos_2[test_index],
                "fs_cos_3": daypart_fs_cos_3[test_index],
                # "cooling_temp":outdoor_temp_c, "heating_temp": outdoor_temp_h,
                "cooling_temp_lp": outdoor_temp_lp_c[test_index],
                "heating_temp_lp": outdoor_temp_lp_h[test_index]
            })

            partial_pool_posterior_hdi = pm.sample_posterior_predictive(
                partial_pooling_trace, keep_size=True)
            partial_pool_posterior = pm.sample_posterior_predictive(
                partial_pooling_trace)
            partial_pool_prior = pm.sample_prior_predictive(150)

        # Calculate predictions and HDI

        partial_pool_predictions = np.exp(partial_pool_posterior['y'].mean(0))
        hdi_data = az.hdi(partial_pool_posterior_hdi)
        partial_pool_lower_bound = np.array(
            np.exp(hdi_data.to_array().sel(hdi='lower'))).flatten()
        partial_pool_higher_bound = np.array(
            np.exp(hdi_data.to_array().sel(hdi='higher'))).flatten()

        # Calculate cvrmse and coverage of the HDI
        partial_pool_mse = mean_squared_error(df.total_electricity[test_index],
                                              partial_pool_predictions)
        partial_pool_rmse = sqrt(partial_pool_mse)
        partial_pool_cvrmse = partial_pool_rmse / df.total_electricity.mean()
        partial_pool_coverage = sum(
            (partial_pool_lower_bound <= df.total_electricity[test_index])
            & (df.total_electricity[test_index] <= partial_pool_higher_bound)
        ) * 100 / len(test_index)

        partial_pool_cvrmse_list.append(partial_pool_cvrmse)
        partial_pool_coverage_list.append(partial_pool_coverage)

        # No Pooling

        with pm.Model(coords=coords) as no_pooling:
            profile_cluster_idx = pm.Data("profile_cluster_idx",
                                          clusters[train_index],
                                          dims="obs_id")
            daypart = pm.Data("daypart", dayparts[train_index], dims="obs_id")
            weekday = pm.Data("weekday", weekdays[train_index], dims="obs_id")

            fs_sin_1 = pm.Data("fs_sin_1",
                               daypart_fs_sin_1[train_index],
                               dims="obs_id")
            fs_sin_2 = pm.Data("fs_sin_2",
                               daypart_fs_sin_2[train_index],
                               dims="obs_id")
            fs_sin_3 = pm.Data("fs_sin_3",
                               daypart_fs_sin_3[train_index],
                               dims="obs_id")

            fs_cos_1 = pm.Data("fs_cos_1",
                               daypart_fs_cos_1[train_index],
                               dims="obs_id")
            fs_cos_2 = pm.Data("fs_cos_2",
                               daypart_fs_cos_2[train_index],
                               dims="obs_id")
            fs_cos_3 = pm.Data("fs_cos_3",
                               daypart_fs_cos_3[train_index],
                               dims="obs_id")

            # cooling_temp = pm.Data("cooling_temp", outdoor_temp_c[train_index], dims="obs_id")
            # heating_temp = pm.Data("heating_temp", outdoor_temp_h[train_index], dims="obs_id")
            cooling_temp_lp = pm.Data("cooling_temp_lp",
                                      outdoor_temp_lp_c[train_index],
                                      dims="obs_id")
            heating_temp_lp = pm.Data("heating_temp_lp",
                                      outdoor_temp_lp_h[train_index],
                                      dims="obs_id")

            # Priors:
            a_cluster = pm.Normal("a_cluster",
                                  mu=0.0,
                                  sigma=1.0,
                                  dims=("daypart", "profile_cluster"))
            btclp = pm.Normal("btclp", mu=0.0, sigma=1.0, dims="daypart")
            bthlp = pm.Normal("bthlp", mu=0.0, sigma=1.0, dims="daypart")

            bs1 = pm.Normal("bs1", mu=0.0, sigma=1.0, dims="profile_cluster")
            bs2 = pm.Normal("bs2", mu=0.0, sigma=1.0, dims="profile_cluster")
            bs3 = pm.Normal("bs3", mu=0.0, sigma=1.0, dims="profile_cluster")
            bc1 = pm.Normal("bc1", mu=0.0, sigma=1.0, dims="profile_cluster")
            bc2 = pm.Normal("bc2", mu=0.0, sigma=1.0, dims="profile_cluster")
            bc3 = pm.Normal("bc3", mu=0.0, sigma=1.0, dims="profile_cluster")

            # Expected value per county:
            mu = a_cluster[daypart, profile_cluster_idx] + bs1[profile_cluster_idx] * fs_sin_1 + \
                 bs2[profile_cluster_idx] * fs_sin_2 + bs3[profile_cluster_idx] * fs_sin_3 + \
                 bc1[profile_cluster_idx] * fs_cos_1 + bc2[profile_cluster_idx] * fs_cos_2 + \
                 bc3[profile_cluster_idx] * fs_cos_3 + \
                 btclp[daypart] * cooling_temp_lp + \
                 bthlp[daypart] * heating_temp_lp
            # btc[daypart] * cooling_temp + bth[daypart] * heating_temp + \

            # Model error:
            sigma = pm.Exponential("sigma", 1.0)

            # Likelihood
            y = pm.Normal("y",
                          mu,
                          sigma=sigma,
                          observed=log_electricity[train_index],
                          dims="obs_id")

        # Fitting

        with no_pooling:
            approx = pm.fit(
                n=50000,
                method='fullrank_advi',
                callbacks=[CheckParametersConvergence(tolerance=0.01)])
            no_pooling_trace = approx.sample(1000)

            # Sampling from the posterior setting test data to check the predictions on unseen data

        with no_pooling:
            pm.set_data({
                "profile_cluster_idx": clusters[test_index],
                "daypart": dayparts[test_index],  # "weekday":weekdays,
                "fs_sin_1": daypart_fs_sin_1[test_index],
                "fs_sin_2": daypart_fs_sin_2[test_index],
                "fs_sin_3": daypart_fs_sin_3[test_index],
                "fs_cos_1": daypart_fs_cos_1[test_index],
                "fs_cos_2": daypart_fs_cos_2[test_index],
                "fs_cos_3": daypart_fs_cos_3[test_index],
                # "cooling_temp":outdoor_temp_c, "heating_temp": outdoor_temp_h,
                "cooling_temp_lp": outdoor_temp_lp_c[test_index],
                "heating_temp_lp": outdoor_temp_lp_h[test_index]
            })

            no_pool_posterior_hdi = pm.sample_posterior_predictive(
                no_pooling_trace, keep_size=True)
            no_pool_posterior = pm.sample_posterior_predictive(
                no_pooling_trace)

            no_pool_prior = pm.sample_prior_predictive(150)

            # Calculate predictions and HDI

        no_pool_predictions = np.exp(no_pool_posterior['y'].mean(0))
        no_pool_hdi_data = az.hdi(no_pool_posterior_hdi)
        no_pool_lower_bound = np.array(
            np.exp(no_pool_hdi_data.to_array().sel(hdi='lower'))).flatten()
        no_pool_higher_bound = np.array(
            np.exp(no_pool_hdi_data.to_array().sel(hdi='higher'))).flatten()

        # Calculate cvrmse and coverage of the HDI
        no_pool_mse = mean_squared_error(df.total_electricity[test_index],
                                         no_pool_predictions)
        no_pool_rmse = sqrt(no_pool_mse)
        no_pool_cvrmse = no_pool_rmse / df.total_electricity.mean()
        no_pool_coverage = sum(
            (no_pool_lower_bound <= df.total_electricity[test_index])
            & (df.total_electricity[test_index] <= no_pool_higher_bound)
        ) * 100 / len(test_index)

        no_pool_cvrmse_list.append(no_pool_cvrmse)
        no_pool_coverage_list.append(no_pool_coverage)

        # Complete pooling

        with pm.Model(coords=coords) as complete_pooling:

            fs_sin_1 = pm.Data("fs_sin_1",
                               daypart_fs_sin_1[train_index],
                               dims="obs_id")
            fs_sin_2 = pm.Data("fs_sin_2",
                               daypart_fs_sin_2[train_index],
                               dims="obs_id")
            fs_sin_3 = pm.Data("fs_sin_3",
                               daypart_fs_sin_3[train_index],
                               dims="obs_id")

            fs_cos_1 = pm.Data("fs_cos_1",
                               daypart_fs_cos_1[train_index],
                               dims="obs_id")
            fs_cos_2 = pm.Data("fs_cos_2",
                               daypart_fs_cos_2[train_index],
                               dims="obs_id")
            fs_cos_3 = pm.Data("fs_cos_3",
                               daypart_fs_cos_3[train_index],
                               dims="obs_id")

            # cooling_temp = pm.Data("cooling_temp", outdoor_temp_c[train_index], dims="obs_id")
            # heating_temp = pm.Data("heating_temp", outdoor_temp_h[train_index], dims="obs_id")
            cooling_temp_lp = pm.Data("cooling_temp_lp",
                                      outdoor_temp_lp_c[train_index],
                                      dims="obs_id")
            heating_temp_lp = pm.Data("heating_temp_lp",
                                      outdoor_temp_lp_h[train_index],
                                      dims="obs_id")

            # Priors:
            a = pm.Normal("a", mu=0.0, sigma=1.0)
            btclp = pm.Normal("btclp", mu=0.0, sigma=1.0)
            bthlp = pm.Normal("bthlp", mu=0.0, sigma=1.0)

            bs1 = pm.Normal("bs1", mu=0.0, sigma=1.0)
            bs2 = pm.Normal("bs2", mu=0.0, sigma=1.0)
            bs3 = pm.Normal("bs3", mu=0.0, sigma=1.0)
            bc1 = pm.Normal("bc1", mu=0.0, sigma=1.0)
            bc2 = pm.Normal("bc2", mu=0.0, sigma=1.0)
            bc3 = pm.Normal("bc3", mu=0.0, sigma=1.0)

            # Expected value per county:
            mu = a + bs1 * fs_sin_1 + bs2 * fs_sin_2 + bs3 * fs_sin_3 + bc1 * fs_cos_1 + bc2 * fs_cos_2 + \
                 bc3 * fs_cos_3 + btclp * cooling_temp_lp + bthlp * heating_temp_lp
            # btc[daypart] * cooling_temp + bth[daypart] * heating_temp + \

            # Model error:
            sigma = pm.Exponential("sigma", 1.0)

            # Likelihood
            y = pm.Normal("y",
                          mu,
                          sigma=sigma,
                          observed=log_electricity[train_index],
                          dims="obs_id")

        # Fitting

        with complete_pooling:
            approx = pm.fit(
                n=50000,
                method='fullrank_advi',
                callbacks=[CheckParametersConvergence(tolerance=0.01)])
            complete_pooling_trace = approx.sample(1000)

            # Sampling from the posterior setting test data to check the predictions on unseen data

        with complete_pooling:
            pm.set_data({
                "fs_sin_1": daypart_fs_sin_1[test_index],
                "fs_sin_2": daypart_fs_sin_2[test_index],
                "fs_sin_3": daypart_fs_sin_3[test_index],
                "fs_cos_1": daypart_fs_cos_1[test_index],
                "fs_cos_2": daypart_fs_cos_2[test_index],
                "fs_cos_3": daypart_fs_cos_3[test_index],
                # "cooling_temp":outdoor_temp_c, "heating_temp": outdoor_temp_h,
                "cooling_temp_lp": outdoor_temp_lp_c[test_index],
                "heating_temp_lp": outdoor_temp_lp_h[test_index]
            })

            complete_pool_posterior_hdi = pm.sample_posterior_predictive(
                complete_pooling_trace, keep_size=True)
            complete_pool_posterior = pm.sample_posterior_predictive(
                complete_pooling_trace)

            complete_pool_prior = pm.sample_prior_predictive(150)

            # Calculate predictions and HDI

        complete_pool_predictions = np.exp(
            complete_pool_posterior['y'].mean(0))
        complete_pool_hdi_data = az.hdi(complete_pool_posterior_hdi)
        complete_pool_lower_bound = np.array(
            np.exp(
                complete_pool_hdi_data.to_array().sel(hdi='lower'))).flatten()
        complete_pool_higher_bound = np.array(
            np.exp(complete_pool_hdi_data.to_array().sel(
                hdi='higher'))).flatten()

        # Calculate cvrmse and coverage of the HDI
        complete_pool_mse = mean_squared_error(
            df.total_electricity[test_index], complete_pool_predictions)
        complete_pool_rmse = sqrt(complete_pool_mse)
        complete_pool_cvrmse = complete_pool_rmse / df.total_electricity.mean()
        complete_pool_coverage = sum(
            (complete_pool_lower_bound <= df.total_electricity[test_index])
            & (df.total_electricity[test_index] <= complete_pool_higher_bound)
        ) * 100 / len(test_index)

        complete_pool_cvrmse_list.append(complete_pool_cvrmse)
        complete_pool_coverage_list.append(complete_pool_coverage)

    # Export Results
    np_cvrmse = np.mean(no_pool_cvrmse_list)
    cp_cvrmse = np.mean(complete_pool_cvrmse_list)
    pp_cvrmse = np.mean(partial_pool_cvrmse_list)

    np_coverage = np.mean(no_pool_coverage_list)
    cp_coverage = np.mean(complete_pool_coverage_list)
    pp_coverage = np.mean(partial_pool_coverage_list)

    export_data = {
        'partial_pooling_cvrmse': [pp_cvrmse],
        'no_pooling_cvrmse': [np_cvrmse],
        'complete_pooling_cvrmse': [cp_cvrmse],
        'partial_pooling_coverage': [pp_coverage],
        'no_pooling_coverage': [np_coverage],
        'complete_pooling_coverage': [cp_coverage]
    }
    export_df = pd.DataFrame(data=export_data)
    return export_df
コード例 #23
0
idx_unique_test = np.unique(test.idx.values)

# get n unique for shapes.
n_time_test = len(t_unique_test)
n_idx_test = len(idx_unique_test)

# new coords as well
prediction_coords = {'idx': idx_unique_test, 't': t_unique_test}

# test data in correct format.
t_test = test.t.values.reshape((n_idx_test, n_time_test))
y_test = test.y.values.reshape((n_idx_test, n_time_test))
idx_test = test.idx.values.reshape((n_idx_test, n_time_test))

with m:
    pm.set_data({"t_shared": t_test, "idx_shared": idx_test})
    stl_pred = pm.fast_sample_posterior_predictive(m_idata.posterior,
                                                   random_seed=RANDOM_SEED)
    az.from_pymc3_predictions(stl_pred,
                              idata_orig=m_idata,
                              inplace=True,
                              coords=prediction_coords)

# plot hdi for prediction
fh.plot_hdi(t=t_test,
            y=y_test,
            n_idx=n_idx_test,
            m_idata=m_idata,
            model_type="covariation",
            prior_level="generic",
            kind="predictions")
コード例 #24
0
ファイル: tvatojpower.py プロジェクト: jeti182/tvatoj-power
def sim_and_fit(setup,
                model_func,
                iterations,
                condition_func,
                goal_var_names=None,
                log_var_names=['C_mu', 'wp_mu'],
                single_C=False,
                single_wp=False,
                outfile='out.csv',
                turn_off_warnings=True,
                tune=1000,
                target_accept=0.85,
                init='adapt_diag'):

    if (turn_off_warnings):
        warnings.filterwarnings("ignore")
        logging.warning('Attention: Warnings turned off. '
                        )  # There is so much from pymc3 and theano ..

    if log_var_names == None or len(log_var_names) < 1:
        sys.exit(
            'log_var_names should not be empty or None! Log at least one variable!'
        )
    num_success = 0
    model = None
    for i in tqdm(range(iterations), desc='Overall progress'):
        data = simulate_tojs(setup)
        if model is None:
            model = model_func(data, single_C=single_C, single_wp=single_wp)
        with model:
            pymc3.set_data({'probe_first_count': data['probe_first_count']})
            trace = pymc3.sample(2000,
                                 tune=tune,
                                 cores=4,
                                 init=init,
                                 target_accept=target_accept)
            summary_stats = pymc3.summary(trace,
                                          var_names=goal_var_names,
                                          hdi_prob=0.95)
            print(summary_stats)
        success = condition_func(
            summary_stats
        ) * 1  # Either 0 or 1, depending on reaching our goals.
        num_success += success
        attempts = (i + 1)
        success_rate = num_success / attempts
        hdi = HDIofICDF(beta,
                        a=1 + num_success,
                        b=1 + (attempts - num_success))
        logging.info(('[ESTIMATE] Success rate: %.2f' % success_rate +
                      ' [95 %% HDI: %.2f to %.2f]' % (hdi[0], hdi[1]) + '\n' +
                      '-' * 20))

        out_df = pymc3.summary(trace, var_names=log_var_names, hdi_prob=0.95)
        out_df.insert(0, 'iteration', attempts)
        out_df.insert(1, 'success', success)
        out_df.insert(2, 'power_est', success_rate)
        out_df.insert(3, 'power_hdi_2.5%', hdi[0])
        out_df.insert(4, 'power_hdi_97.5%', hdi[1])
        if attempts == 1:
            out_df.to_csv(outfile)
        else:
            out_df.to_csv(outfile, mode='a', header=False)
コード例 #25
0
# Question 'PyMC3: Different predictions for identical inputs' ----
# (available online: https://stackoverflow.com/questions/59288938/pymc3-different-predictions-for-identical-inputs)

import seaborn as sns
import pymc3 as pm
import pandas as pd
import numpy as np

### . training ----

dat = sns.load_dataset('iris')
trn = dat.iloc[:-1]

with pm.Model() as model:
    s_data = pm.Data('s_data', trn['petal_width'])
    outcome = pm.glm.GLM(x=s_data, y=trn['petal_length'], labels='petal_width')
    trace = pm.sample(500, cores=1, random_seed=1899)

### . testing ----

tst = dat.iloc[-1:]
tst = pd.concat([tst, tst], axis=0, ignore_index=True)

with model:
    pm.set_data({'s_data': tst['petal_width']})
    ppc = pm.sample_posterior_predictive(trace, random_seed=1900)

np.mean(ppc['y'], axis=0)
コード例 #26
0
# y = a + b*x
true_regression_line = true_intercept + true_slope * x

# add noise
y = true_regression_line + np.random.normal(scale=.5, size=size)

data = dict(x=x, y=y)

# train model
with pm.Model() as model:
    s_data = pm.Data('s_data', data['x'])
    # specify glm and pass in data. The resulting linear model, its likelihood
    # and all its parameters are automatically added to our model.
    outcome = pm.glm.GLM(x=s_data, y=data['y'], labels=['x'])
    trace = pm.sample(250, cores=1)

# predict new data (1 observation)
with model:
    pm.set_data({'s_data': data['x'][:1]})
    ppc = pm.sample_posterior_predictive(trace, samples=20, random_seed=1899)

len(ppc['y'][0])  # 200

# predict new data (2+ observations)
with model:
    pm.set_data({'s_data': data['x'][:2]})
    ppc = pm.sample_posterior_predictive(trace, samples=20, random_seed=1899)

len(ppc['y'][0])  # 2
コード例 #27
0
def main(args):
    print("Loading data...")
    teams, df = load_data()
    nt = len(teams)
    train = df[df["split"] == "train"]

    print("Starting inference...")
    with pm.Model() as model:
        # priors
        alpha = pm.Normal("alpha", mu=0, sigma=1)
        sd_att = pm.HalfStudentT("sd_att", nu=3, sigma=2.5)
        sd_def = pm.HalfStudentT("sd_def", nu=3, sigma=2.5)

        home = pm.Normal("home", mu=0, sigma=1)  # home advantage

        # team-specific model parameters
        attack = pm.Normal("attack", mu=0, sigma=sd_att, shape=nt)
        defend = pm.Normal("defend", mu=0, sigma=sd_def, shape=nt)

        # data
        home_id = pm.Data("home_data", train["Home_id"])
        away_id = pm.Data("away_data", train["Away_id"])

        # likelihood
        theta1 = tt.exp(alpha + home + attack[home_id] - defend[away_id])
        theta2 = tt.exp(alpha + attack[away_id] - defend[home_id])

        pm.Poisson("s1", mu=theta1, observed=train["score1"])
        pm.Poisson("s2", mu=theta2, observed=train["score2"])

    with model:
        fit = pm.sample(
            draws=args.num_samples,
            tune=args.num_warmup,
            chains=args.num_chains,
            cores=args.num_cores,
            random_seed=args.rng_seed,
        )

    print("Analyse posterior...")
    az.plot_forest(
        fit,
        var_names=("alpha", "home", "sd_att", "sd_def"),
        backend="bokeh",
    )

    az.plot_trace(
        fit,
        var_names=("alpha", "home", "sd_att", "sd_def"),
        backend="bokeh",
    )

    # Attack and defence
    quality = teams.copy()
    quality = quality.assign(
        attack=fit["attack"].mean(axis=0),
        attacksd=fit["attack"].std(axis=0),
        defend=fit["defend"].mean(axis=0),
        defendsd=fit["defend"].std(axis=0),
    )
    quality = quality.assign(
        attack_low=quality["attack"] - quality["attacksd"],
        attack_high=quality["attack"] + quality["attacksd"],
        defend_low=quality["defend"] - quality["defendsd"],
        defend_high=quality["defend"] + quality["defendsd"],
    )

    plot_quality(quality)

    # Predicted goals and table
    predict = df[df["split"] == "predict"]

    with model:
        pm.set_data({"home_data": predict["Home_id"]})
        pm.set_data({"away_data": predict["Away_id"]})

        predicted_score = pm.sample_posterior_predictive(
            fit, var_names=["s1", "s2"], random_seed=1)

    predicted_full = predict.copy()
    predicted_full = predicted_full.assign(
        score1=predicted_score["s1"].mean(axis=0).round(),
        score1error=predicted_score["s1"].std(axis=0),
        score2=predicted_score["s2"].mean(axis=0).round(),
        score2error=predicted_score["s2"].std(axis=0),
    )

    predicted_full = train.append(
        predicted_full.drop(columns=["score1error", "score2error"]))

    print(score_table(df))
    print(score_table(predicted_full))
コード例 #28
0
    y_lik = pm.Normal('y_lik', mu=theta, sigma=sigma, observed=y)
    trace_linear = pm.sample(tune=2000, chains=1, cores=1)
    pp_samples = pm.sample_posterior_predictive(trace=trace_linear, random_seed=123)

y_pred = pp_samples['y_lik'].mean(axis=0)

_, axi = plt.subplots(1, 4, figsize=(8, 5))
sns.scatterplot(x, y_obs, ax=axi[0]).set_title("Data")
sns.lineplot(x, y_pred, ax=axi[0])
az.plot_hdi(x, trace_linear['theta'], hdi_prob=0.98, ax=axi[0], color='gray')
az.plot_posterior(trace_linear, var_names=['intercept', 'coefx'], ax=axi[1])
az.plot_posterior(trace_linear, var_names=['coefx'], ax=axi[2])
az.plot_posterior(trace_linear, var_names=['coefxSqd'], ax=axi[3])
plt.show()


with linear_Model:
    pm.set_data({'xs': [1, 5.6, 4]})
    y_test = pm.sample_posterior_predictive(trace=trace_linear)
print(y_test['y_lik'].mean(axis=0))
print(1 + 3.2 * 1 + 4 * 1**2)









コード例 #29
0
ファイル: BNN.py プロジェクト: moeketsims/nn
                           testval=initial_out)

    layer_1 = pm.math.tanh(pm.math.dot(X, weight_1))
    layer_2 = pm.math.tanh(pm.math.dot(layer_1, weight_2))
    output = pm.math.sigmoid(pm.math.dot(layer_2, weight_Out))

    y_lik = pm.Bernoulli('y_lik', output, observed=Y)
    inference = pm.ADVI()
    approx = pm.fit(3000, method=inference)
    trace = approx.sample(draws=500)
    ppc = pm.sample_posterior_predictive(trace)

pred = ppc['y_lik'].mean(axis=0) > 0
pred = np.round(pred)
print(f'Accuracy is {(pred == y).mean() * 100}')

grid = np.mgrid[-1:1:15j, -1:1:15j]
grid_2d = grid.reshape(2, -1).T
dummy_out = np.ones(grid.shape[1], dtype=np.int8)

with BNN:
    pm.set_data({'x': grid_2d})
    pm.set_data({'Y': dummy_out})
    ppc = pm.sample_ppc(trace, samples=5000)

pred = ppc['y_lik'].mean(axis=0)

# sns.heatmap(pred.reshape(15, 15).T)
#sns.heatmap(ppc['y_lik'].std(axis=0).reshape(15, 15).T)
plt.show()
コード例 #30
0
def worker(task):
    (i1, i2), data, model_kw, basename = task

    g = GaiaData(data)

    cache_filename = os.path.abspath(f'../cache/tmp-{basename}_{i1}-{i2}.fits')
    if os.path.exists(cache_filename):
        print(f"({pid}) cache filename exists for index range: "
              f"{cache_filename}")
        return cache_filename

    print(f"({pid}) setting up model")
    helper = ComovingHelper(g)

    niter = 0
    while niter < 10:
        try:
            model = helper.get_model(**model_kw)
            break
        except OSError:
            print(f"{pid} failed to compile - trying again in 2sec...")
            time.sleep(5)
            niter += 1
            continue
    else:
        print(f"{pid} never successfully compiled. aborting")
        import socket
        print(socket.gethostname(), socket.getfqdn(),
              os.path.exists("/cm/shared/sw/pkg/devel/gcc/7.4.0/bin/g++"))
        return ''

    print(f"({pid}) done init model - running {len(g)} stars")

    probs = np.full(helper.N, np.nan)
    for n in range(helper.N):
        with model:
            pm.set_data({
                'y': helper.ys[n],
                'Cinv': helper.Cinvs[n],
                'M': helper.Ms[n]
            })

            test_pt = {
                'vxyz': helper.test_vxyz[n],
                'r': helper.test_r[n],
                'w': np.array([0.5, 0.5])
            }
            try:
                print("starting optimize")
                res = xo.optimize(start=test_pt,
                                  progress_bar=False,
                                  verbose=False)

                print("done optimize - starting sample")
                trace = pm.sample(
                    start=res,
                    tune=2000,
                    draws=1000,
                    cores=1,
                    chains=1,
                    step=xo.get_dense_nuts_step(target_accept=0.95),
                    progressbar=False)
            except Exception as e:
                print(str(e))
                continue

            # print("done sample - computing prob")
            ll_fg = trace.get_values(model.group_logp)
            ll_bg = trace.get_values(model.field_logp)
            post_prob = np.exp(ll_fg - np.logaddexp(ll_fg, ll_bg))
            probs[n] = post_prob.sum() / len(post_prob)

    # write probs to cache filename
    tbl = at.Table()
    tbl['source_id'] = g.source_id
    tbl['prob'] = probs
    tbl.write(cache_filename)

    return cache_filename