def sample_mod( self, posterior_draws = 2000, # this is not enough post_pred_draws = 1000, prior_pred_draws = 1000, random_seed = 42, chains = 2): """Sample the posterior, the posterior predictive and the prior predictive distribution. Args: posterior_draws (int, optional): Number of draws for the posterior. Defaults to 2000. prior_pred_draws (int, optional): Number of draws for the prior predictive distribution. Defaults to 1000. post_pred_draws (int, optional): Number of draws from the posterior predictive distribution. Defaults to 1000. random_seed (int, optional): Random seed for ensuring reproducibility. Defaults to 42. chains (int, optional): Number of chains used for sampling the posterior. Defaults to 2. Example: Pc.sample_mod(posterior_draws = 3000, post_pred_draws = 1500, prior_pred_draws = 55, random_seed = 13, chains = 4) """ # we need these for later self.posterior_draws = posterior_draws self.post_pred_draws = post_pred_draws self.prior_pred_draws = prior_pred_draws with self.model: self.trace = pm.sample( return_inferencedata = False, draws = posterior_draws, target_accept = .99, random_seed = random_seed, chains = chains) #hard set to 42 self.post_pred = pm.sample_posterior_predictive(self.trace, samples = post_pred_draws) self.prior_pred = pm.sample_prior_predictive(samples = prior_pred_draws) self.m_idata = az.from_pymc3(trace = self.trace, posterior_predictive=self.post_pred, prior=self.prior_pred) with self.model: pm.set_data({"t1_shared": self.t1_test}) pm.set_data({"t2_shared": self.t2_test}) pm.set_data({"idx_shared": self.idx_test}) pm.set_data({"t3_shared": np.array(self.t3_test)}) predictions = pm.fast_sample_posterior_predictive( self.m_idata.posterior ) az.from_pymc3_predictions( predictions, idata_orig = self.m_idata, coords = {'idx': self.test[self.index].values}, inplace = True)
def predict(self): ## make this work for only one. with self.model: pm.set_data({"t1_shared": self.t1_test}) pm.set_data({"t2_shared": self.t2_test}) pm.set_data({"idx_shared": self.idx_test}) pm.set_data({"t3_shared": np.array(self.t3_test)}) predictions = pm.fast_sample_posterior_predictive( self.m_idata.posterior ) az.from_pymc3_predictions( predictions, idata_orig = self.m_idata, coords = {'idx': self.test[self.index].values}, inplace = True)
def test_predictions_constant_data(self): with pm.Model(): x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable trace = pm.sample(100, tune=100) inference_data = from_pymc3(trace=trace) test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails with pm.Model(): x = pm.Data("x", [1.0, 2.0]) y = pm.Data("y", [1.0, 2.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable predictive_trace = pm.sample_posterior_predictive(trace) assert set(predictive_trace.keys()) == {"obs"} # this should be four chains of 100 samples # assert predictive_trace["obs"].shape == (400, 2) # but the shape seems to vary between pymc3 versions inference_data = from_pymc3_predictions(predictive_trace, posterior_trace=trace) test_dict = {"posterior": ["beta"], "~observed_data": ""} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Posterior data not copied over as expected." test_dict = {"predictions": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Predictions not instantiated as expected." test_dict = {"predictions_constant_data": ["x"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Predictions constant data not instantiated as expected."
def forecast_election(self, idata: arviz.InferenceData) -> arviz.InferenceData: """ Generate out-of-sample predictions for ``election_to_predict`` specified in ``__init__``. Parameters ---------- idata: arviz.InferenceData Posterior trace generated by ``self.sample_all`` on the training dataset. The dataset used for predictions is generated automatically: one observation for each of the days in ``self.coords["countdown"]``. The corresponding values of predictors are handled automatically. """ new_dates, oos_data = self._generate_oos_data(idata) oos_data = self._join_with_continuous_predictors(oos_data) forecast_data_index = pd.DataFrame( data=0, # just a placeholder index=pd.MultiIndex.from_frame(oos_data), columns=self.parties_complete, ) forecast_data = forecast_data_index.reset_index() PREDICTION_COORDS = {"observations": new_dates} PREDICTION_DIMS = { "latent_popularity": ["observations", "parties_complete"], "noisy_popularity": ["observations", "parties_complete"], "N_approve": ["observations", "parties_complete"], } forecast_model = self.build_model( polls=forecast_data, continuous_predictors=forecast_data, ) with forecast_model: ppc = pm.fast_sample_posterior_predictive( idata, var_names=[ "party_intercept", "latent_popularity", "noisy_popularity", "N_approve", "latent_pop_t0", "R", ], ) ppc = arviz.from_pymc3_predictions( ppc, idata_orig=idata, inplace=False, coords=PREDICTION_COORDS, dims=PREDICTION_DIMS, ) return ppc
def make_predictions_inference_data( self, data, eight_schools_params ) -> Tuple[InferenceData, Dict[str, np.ndarray]]: with data.model: posterior_predictive = pm.sample_posterior_predictive(data.obj) idata = from_pymc3_predictions( posterior_predictive, posterior_trace=data.obj, coords={"school": np.arange(eight_schools_params["J"])}, dims={"theta": ["school"], "eta": ["school"]}, ) assert isinstance(idata, InferenceData) return idata, posterior_predictive
def get_predictions_inference_data( self, data, eight_schools_params, inplace ) -> Tuple[InferenceData, Dict[str, np.ndarray]]: with data.model: prior = pm.sample_prior_predictive() posterior_predictive = pm.sample_posterior_predictive(data.obj) idata = from_pymc3( trace=data.obj, prior=prior, coords={"school": np.arange(eight_schools_params["J"])}, dims={"theta": ["school"], "eta": ["school"]}, ) assert isinstance(idata, InferenceData) extended = from_pymc3_predictions( posterior_predictive, idata_orig=idata, inplace=inplace ) assert isinstance(extended, InferenceData) assert (id(idata) == id(extended)) == inplace return (extended, posterior_predictive)
n_idx_test = len(idx_unique_test) # new coords as well prediction_coords = {'idx': idx_unique_test, 't': t_unique_test} # test data in correct format. t_test = test.t.values.reshape((n_idx_test, n_time_test)) y_test = test.y.values.reshape((n_idx_test, n_time_test)) idx_test = test.idx.values.reshape((n_idx_test, n_time_test)) with m: pm.set_data({"t_shared": t_test, "idx_shared": idx_test}) stl_pred = pm.fast_sample_posterior_predictive(m_idata.posterior, random_seed=RANDOM_SEED) az.from_pymc3_predictions(stl_pred, idata_orig=m_idata, inplace=True, coords=prediction_coords) # plot hdi for prediction fh.plot_hdi(t=t_test, y=y_test, n_idx=n_idx_test, m_idata=m_idata, model_type="covariation", prior_level="generic", kind="predictions") model_type = "covariation" prior_level = "generic" # plot hdi for individual aliens
y_ar = [] for tt in trace: y_ar.append(generate_harmonic_sample(tdaynew, omega, tt)) a0_pred = np.array(y_ar) ####### # Save the samples ####### print('Saving the output to ', outputnc) # Convert the data to arviz structure # Save the predictions dims = ('chain','draw','time') ds = az.from_pymc3_predictions({'a0':a0_pred}, \ coords={'time':predtime,'chain':np.array([1])}, dims={'a0':dims}) # Save the posterior ds2 = az.from_pymc3(trace=trace) # Update the observed data becuase it comes out as a theano.tensor in the way # our particular model is specified ds2.observed_data['X_obs'] = xr.DataArray(X, dims=('time',), coords={'time':timein}) # This merges the data sets ds2.extend(ds) # Save ds2.to_netcdf(outputnc) print(ds2)
'idx': idx_unique_test, 't': t_unique_test } # test data in correct format. t_test = test.t.values.reshape((n_idx_test, n_time_test)) y_test = test.y.values.reshape((n_idx_test, n_time_test)) idx_test = test.idx.values.reshape((n_idx_test, n_time_test)) with m_covariation: pm.set_data({"t_shared": t_test, "idx_shared": idx_test}) stl_pred = pm.fast_sample_posterior_predictive( idata_covariation.posterior, random_seed=RANDOM_SEED ) az.from_pymc3_predictions( stl_pred, idata_orig=idata_covariation, inplace=True, coords=prediction_coords ) ### python: plot hdi (full uncertainty) ### # take posterior predictive out of idata for convenience ppc = idata_covariation.posterior_predictive # take out predictions (mean over chains). y_pred = ppc["y_pred"].mean(axis = 0).values # calculate mean y predicted (mean over draws and idx) y_mean = y_pred.mean(axis = (0, 1)) # THE DIFFERENCE: base it on the actual predictions of the full model outcome = y_pred.reshape((4000*n_idx, n_time)) # 4000 = 2000 (draws) * 2 (chains)