def test_sample_posterior_predictive_after_set_data_with_coords(self): y = np.array([1.0, 2.0, 3.0]) with pm.Model() as model: x = pm.MutableData("x", [1.0, 2.0, 3.0], dims="obs_id") beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y, dims="obs_id") idata = pm.sample( 10, tune=100, chains=1, return_inferencedata=True, compute_convergence_checks=False, ) # Predict on new data. with model: x_test = [5, 6] pm.set_data(new_data={"x": x_test}, coords={"obs_id": ["a", "b"]}) pm.sample_posterior_predictive(idata, extend_inferencedata=True, predictions=True) assert idata.predictions["obs"].shape == (1, 10, 2) assert np.all( idata.predictions["obs_id"].values == np.array(["a", "b"])) np.testing.assert_allclose(x_test, idata.predictions["obs"].mean( ("chain", "draw")), atol=1e-1)
def test_shared_data_as_index(self): """ Allow pm.Data to be used for index variables, i.e with integers as well as floats. See https://github.com/pymc-devs/pymc/issues/3813 """ with pm.Model() as model: index = pm.MutableData("index", [2, 0, 1, 0, 2]) y = pm.MutableData("y", [1.0, 2.0, 3.0, 2.0, 1.0]) alpha = pm.Normal("alpha", 0, 1.5, size=3) pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y) prior_trace = pm.sample_prior_predictive(1000) idata = pm.sample( 1000, tune=1000, chains=1, compute_convergence_checks=False, ) # Predict on new data new_index = np.array([0, 1, 2]) new_y = [5.0, 6.0, 9.0] with model: pm.set_data(new_data={"index": new_index, "y": new_y}) pp_trace = pm.sample_posterior_predictive( idata, var_names=["alpha", "obs"]) assert prior_trace.prior["alpha"].shape == (1, 1000, 3) assert idata.posterior["alpha"].shape == (1, 1000, 3) assert pp_trace.posterior_predictive["alpha"].shape == (1, 1000, 3) assert pp_trace.posterior_predictive["obs"].shape == (1, 1000, 3)
def test_sample_after_set_data(self): with pm.Model() as model: x = pm.MutableData("x", [1.0, 2.0, 3.0]) y = pm.MutableData("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y) pm.sample( 1000, tune=1000, chains=1, compute_convergence_checks=False, ) # Predict on new data. new_x = [5.0, 6.0, 9.0] new_y = [5.0, 6.0, 9.0] with model: pm.set_data(new_data={"x": new_x, "y": new_y}) new_idata = pm.sample( 1000, tune=1000, chains=1, compute_convergence_checks=False, ) pp_trace = pm.sample_posterior_predictive(new_idata) assert pp_trace.posterior_predictive["obs"].shape == (1, 1000, 3) np.testing.assert_allclose(new_y, pp_trace.posterior_predictive["obs"].mean( ("chain", "draw")), atol=1e-1)
def test_explicit_coords(self): N_rows = 5 N_cols = 7 data = np.random.uniform(size=(N_rows, N_cols)) coords = { "rows": [f"R{r+1}" for r in range(N_rows)], "columns": [f"C{c+1}" for c in range(N_cols)], } # pass coordinates explicitly, use numpy array in Data container with pm.Model(coords=coords) as pmodel: # Dims created from coords are constant by default assert isinstance(pmodel.dim_lengths["rows"], TensorConstant) assert isinstance(pmodel.dim_lengths["columns"], TensorConstant) pm.MutableData("observations", data, dims=("rows", "columns")) # new data with same (!) shape pm.set_data({"observations": data + 1}) # new data with same (!) shape and coords pm.set_data({"observations": data}, coords=coords) assert "rows" in pmodel.coords assert pmodel.coords["rows"] == ("R1", "R2", "R3", "R4", "R5") assert "rows" in pmodel.dim_lengths assert pmodel.dim_lengths["rows"].eval() == 5 assert "columns" in pmodel.coords assert pmodel.coords["columns"] == ("C1", "C2", "C3", "C4", "C5", "C6", "C7") assert pmodel.RV_dims == {"observations": ("rows", "columns")} assert "columns" in pmodel.dim_lengths assert pmodel.dim_lengths["columns"].eval() == 7
def test_valueerror_from_resize_without_coords_update(): """ Resizing a mutable dimension that had coords, without passing new coords raises a ValueError. """ with pm.Model() as pmodel: pmodel.add_coord("shared", [1, 2, 3], mutable=True) pm.MutableData("m", [1, 2, 3], dims=("shared")) with pytest.raises(ValueError, match="'m' variable already had 3"): # tries to resize m but without passing coords so raise ValueError pm.set_data({"m": [1, 2, 3, 4]})
def test_set_data_to_non_data_container_variables(self): with pm.Model() as model: x = np.array([1.0, 2.0, 3.0]) y = np.array([1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y) pm.sample( 1000, tune=1000, chains=1, compute_convergence_checks=False, ) with pytest.raises(TypeError) as error: pm.set_data({"beta": [1.1, 2.2, 3.3]}, model=model) error.match("The variable `beta` must be a `SharedVariable`")
def test_shared_data_as_rv_input(self): """ Allow pm.Data to be used as input for other RVs. See https://github.com/pymc-devs/pymc/issues/3842 """ with pm.Model() as m: x = pm.MutableData("x", [1.0, 2.0, 3.0]) y = pm.Normal("y", mu=x, size=(2, 3)) assert y.eval().shape == (2, 3) idata = pm.sample( chains=1, tune=500, draws=550, return_inferencedata=True, compute_convergence_checks=False, ) samples = idata.posterior["y"] assert samples.shape == (1, 550, 2, 3) np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), x.get_value(), atol=1e-1) np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), samples.mean(("chain", "draw", "y_dim_0")), atol=1e-1) with m: pm.set_data({"x": np.array([2.0, 4.0, 6.0])}) assert y.eval().shape == (2, 3) idata = pm.sample( chains=1, tune=500, draws=620, return_inferencedata=True, compute_convergence_checks=False, ) samples = idata.posterior["y"] assert samples.shape == (1, 620, 2, 3) np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), x.get_value(), atol=1e-1) np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), samples.mean(("chain", "draw", "y_dim_0")), atol=1e-1)
def test_shapeerror_from_resize_immutable_dims(): """ Trying to resize an immutable dimension should raise a ShapeError. Even if the variable being updated is a SharedVariable and has other dimensions that are mutable. """ with pm.Model() as pmodel: a = pm.Normal("a", mu=[1, 2, 3], dims="fixed") m = pm.MutableData("m", [[1, 2, 3]], dims=("one", "fixed")) # This is fine because the "fixed" dim is not resized pm.set_data({"m": [[1, 2, 3], [3, 4, 5]]}) with pytest.raises(ShapeError, match="was initialized from 'a'"): # Can't work because the "fixed" dimension is linked to a constant shape: # Note that the new data tries to change both dimensions with pmodel: pm.set_data({"m": [[1, 2], [3, 4]]})
def update_error_estimate(self, accepted, skipped_logp): """Updates the adaptive error model estimate with the latest accepted forward model output difference. Also updates the model variables mu_B and Sigma_B. The current level estimates and stores the error model between the current level and the level below.""" # only save errors when a sample is accepted (excluding skipped_logp) if accepted and not skipped_logp: # this is the error (i.e. forward model output difference) # between the current level's model and the model in the level below self.last_synced_output_diff = ( self.model.model_output.get_value() - self.model_below.model_output.get_value()) self.adaptation_started = True if self.adaptation_started: # update the internal recursive bias estimator with the last saved error self.bias.update(self.last_synced_output_diff) # Update the model variables in the level below the current one. # Each level has its own bias correction (i.e. bias object) that # estimates the error between that level and the one below. # The model variables mu_B and Signa_B of a level are the # sum of the bias corrections of all levels below and including # that level. This sum is updated here. with self.model_below: pm.set_data({ "mu_B": sum(bias.get_mu() for bias in self.bias_all[:len(self.bias_all) - self.num_levels + 2]) }) pm.set_data({ "Sigma_B": sum(bias.get_sigma() for bias in self.bias_all[:len(self.bias_all) - self.num_levels + 2]) })
def test_sample_posterior_predictive_after_set_data(self): with pm.Model() as model: x = pm.MutableData("x", [1.0, 2.0, 3.0]) y = pm.ConstantData("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y) trace = pm.sample( 1000, tune=1000, chains=1, return_inferencedata=False, compute_convergence_checks=False, ) # Predict on new data. with model: x_test = [5, 6, 9] pm.set_data(new_data={"x": x_test}) y_test = pm.sample_posterior_predictive(trace) assert y_test.posterior_predictive["obs"].shape == (1, 1000, 3) np.testing.assert_allclose(x_test, y_test.posterior_predictive["obs"].mean( ("chain", "draw")), atol=1e-1)