Beispiel #1
0
    def test_no_trace(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            idata = pm.sample(100, tune=100)
            prior = pm.sample_prior_predictive(return_inferencedata=False)
            posterior_predictive = pm.sample_posterior_predictive(
                idata, return_inferencedata=False)

        # Only prior
        inference_data = to_inference_data(prior=prior, model=model)
        test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
        # Only posterior_predictive
        inference_data = to_inference_data(
            posterior_predictive=posterior_predictive, model=model)
        test_dict = {"posterior_predictive": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
        # Prior and posterior_predictive but no trace
        inference_data = to_inference_data(
            prior=prior,
            posterior_predictive=posterior_predictive,
            model=model)
        test_dict = {
            "prior": ["beta"],
            "prior_predictive": ["obs"],
            "posterior_predictive": ["obs"],
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
Beispiel #2
0
    def test_shared_data_as_index(self):
        """
        Allow pm.Data to be used for index variables, i.e with integers as well as floats.
        See https://github.com/pymc-devs/pymc/issues/3813
        """
        with pm.Model() as model:
            index = pm.Data("index", [2, 0, 1, 0, 2])
            y = pm.Data("y", [1.0, 2.0, 3.0, 2.0, 1.0])
            alpha = pm.Normal("alpha", 0, 1.5, size=3)
            pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y)

            prior_trace = pm.sample_prior_predictive(1000)
            idata = pm.sample(
                1000,
                init=None,
                tune=1000,
                chains=1,
                compute_convergence_checks=False,
            )

        # Predict on new data
        new_index = np.array([0, 1, 2])
        new_y = [5.0, 6.0, 9.0]
        with model:
            pm.set_data(new_data={"index": new_index, "y": new_y})
            pp_trace = pm.sample_posterior_predictive(
                idata, 1000, var_names=["alpha", "obs"])

        assert prior_trace.prior["alpha"].shape == (1, 1000, 3)
        assert idata.posterior["alpha"].shape == (1, 1000, 3)
        assert pp_trace.posterior_predictive["alpha"].shape == (1, 1000, 3)
        assert pp_trace.posterior_predictive["obs"].shape == (1, 1000, 3)
Beispiel #3
0
    def test_sample_after_set_data(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
            pm.sample(
                1000,
                init=None,
                tune=1000,
                chains=1,
                compute_convergence_checks=False,
            )
        # Predict on new data.
        new_x = [5.0, 6.0, 9.0]
        new_y = [5.0, 6.0, 9.0]
        with model:
            pm.set_data(new_data={"x": new_x, "y": new_y})
            new_idata = pm.sample(
                1000,
                init=None,
                tune=1000,
                chains=1,
                compute_convergence_checks=False,
            )
            pp_trace = pm.sample_posterior_predictive(new_idata, 1000)

        assert pp_trace.posterior_predictive["obs"].shape == (1, 1000, 3)
        np.testing.assert_allclose(new_y,
                                   pp_trace.posterior_predictive["obs"].mean(
                                       ("chain", "draw")),
                                   atol=1e-1)
Beispiel #4
0
def model_with_dims():
    with pm.Model(coords={"city": ["Aachen", "Maastricht", "London", "Bergheim"]}) as pmodel:
        economics = pm.Uniform("economics", lower=-1, upper=1, shape=(1,))

        population = pm.HalfNormal("population", sd=5, dims=("city"))

        time = pm.Data("year", [2014, 2015, 2016], dims="year")

        n = pm.Deterministic(
            "tax revenue", economics * population[None, :] * time[:, None], dims=("year", "city")
        )

        yobs = pm.Data("observed", np.ones((3, 4)))
        L = pm.Normal("L", n, observed=yobs)

    compute_graph = {
        "economics": set(),
        "population": set(),
        "year": set(),
        "tax revenue": {"economics", "population", "year"},
        "L": {"tax revenue"},
        "observed": {"L"},
    }
    plates = {
        "1": {"economics"},
        "city (4)": {"population"},
        "year (3)": {"year"},
        "year (3) x city (4)": {"tax revenue"},
        "3 x 4": {"L", "observed"},
    }

    return pmodel, compute_graph, plates
Beispiel #5
0
def radon_model():
    """Similar in shape to the Radon model"""
    n_homes = 919
    counties = 85
    uranium = np.random.normal(-0.1, 0.4, size=n_homes)
    xbar = np.random.normal(1, 0.1, size=n_homes)
    floor_measure = np.random.randint(0, 2, size=n_homes)

    d, r = divmod(919, 85)
    county = np.hstack((np.tile(np.arange(counties, dtype=int), d), np.arange(r)))
    with pm.Model() as model:
        sigma_a = pm.HalfCauchy("sigma_a", 5)
        gamma = pm.Normal("gamma", mu=0.0, sigma=1e5, shape=3)
        mu_a = pm.Deterministic("mu_a", gamma[0] + gamma[1] * uranium + gamma[2] * xbar)
        eps_a = pm.Normal("eps_a", mu=0, sigma=sigma_a, shape=counties)
        a = pm.Deterministic("a", mu_a + eps_a[county])
        b = pm.Normal("b", mu=0.0, sigma=1e15)
        sigma_y = pm.Uniform("sigma_y", lower=0, upper=100)

        # Anonymous SharedVariables don't show up
        floor_measure = aesara.shared(floor_measure)
        floor_measure_offset = pm.Data("floor_measure_offset", 1)
        y_hat = a + b * floor_measure + floor_measure_offset
        log_radon = pm.Data("log_radon", np.random.normal(1, 1, size=n_homes))
        y_like = pm.Normal("y_like", mu=y_hat, sigma=sigma_y, observed=log_radon)

    compute_graph = {
        # variable_name : set of named parents in the graph
        "sigma_a": set(),
        "gamma": set(),
        "mu_a": {"gamma"},
        "eps_a": {"sigma_a"},
        "a": {"mu_a", "eps_a"},
        "b": set(),
        "sigma_y": set(),
        "y_like": {"a", "b", "sigma_y", "floor_measure_offset"},
        "floor_measure_offset": set(),
        # observed data don't have parents in the model graph, but are shown as decendants
        # of the model variables that the observations belong to:
        "log_radon": {"y_like"},
    }
    plates = {
        "": {"b", "sigma_a", "sigma_y", "floor_measure_offset"},
        "3": {"gamma"},
        "85": {"eps_a"},
        "919": {"a", "mu_a", "y_like", "log_radon"},
    }
    return model, compute_graph, plates
Beispiel #6
0
    def test_sample(self):
        x = np.random.normal(size=100)
        y = x + np.random.normal(scale=1e-2, size=100)

        x_pred = np.linspace(-3, 3, 200, dtype="float32")

        with pm.Model():
            x_shared = pm.Data("x_shared", x)
            b = pm.Normal("b", 0.0, 10.0)
            pm.Normal("obs", b * x_shared, np.sqrt(1e-2), observed=y)

            prior_trace0 = pm.sample_prior_predictive(1000)
            idata = pm.sample(1000, init=None, tune=1000, chains=1)
            pp_trace0 = pm.sample_posterior_predictive(idata, 1000)

            x_shared.set_value(x_pred)
            prior_trace1 = pm.sample_prior_predictive(1000)
            pp_trace1 = pm.sample_posterior_predictive(idata, samples=1000)

        assert prior_trace0.prior["b"].shape == (1, 1000)
        assert prior_trace0.prior_predictive["obs"].shape == (1, 1000, 100)
        assert prior_trace1.prior_predictive["obs"].shape == (1, 1000, 200)

        assert pp_trace0.posterior_predictive["obs"].shape == (1, 1000, 100)
        np.testing.assert_allclose(x,
                                   pp_trace0.posterior_predictive["obs"].mean(
                                       ("chain", "draw")),
                                   atol=1e-1)

        assert pp_trace1.posterior_predictive["obs"].shape == (1, 1000, 200)
        np.testing.assert_allclose(x_pred,
                                   pp_trace1.posterior_predictive["obs"].mean(
                                       ("chain", "draw")),
                                   atol=1e-1)
Beispiel #7
0
 def test_data_mutable_default_warning(self):
     with pm.Model():
         with pytest.warns(FutureWarning,
                           match="`mutable` kwarg was not specified"):
             data = pm.Data("x", [1, 2, 3])
         assert isinstance(data, SharedVariable)
     pass
Beispiel #8
0
 def test_data_defined_size_dimension_can_register_dimname(self):
     with pm.Model() as pmodel:
         x = pm.Data("x", [[1, 2, 3, 4]], dims=("first", "second"))
         assert "first" in pmodel.dim_lengths
         assert "second" in pmodel.dim_lengths
         # two dimensions are implied; a "third" dimension is created
         y = pm.Normal("y", mu=x, size=2, dims=("third", "first", "second"))
         assert "third" in pmodel.dim_lengths
         assert y.eval().shape() == (2, 1, 4)
Beispiel #9
0
def test_data_naming():
    """
    This is a test for issue #3793 -- `Data` objects in named models are
    not given model-relative names.
    """
    with pm.Model("named_model") as model:
        x = pm.Data("x", [1.0, 2.0, 3.0])
        y = pm.Normal("y")
    assert y.name == "named_model_y"
    assert x.name == "named_model_x"
Beispiel #10
0
    def test_constant_data(self, use_context):
        """Test constant_data group behaviour."""
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            trace = pm.sample(100, tune=100, return_inferencedata=False)
            if use_context:
                inference_data = to_inference_data(trace=trace)

        if not use_context:
            inference_data = to_inference_data(trace=trace, model=model)
        test_dict = {
            "posterior": ["beta"],
            "observed_data": ["obs"],
            "constant_data": ["x"]
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
Beispiel #11
0
 def test_data_kwargs(self):
     strict_value = True
     allow_downcast_value = False
     with pm.Model():
         data = pm.Data(
             "data",
             value=[[1.0], [2.0], [3.0]],
             strict=strict_value,
             allow_downcast=allow_downcast_value,
         )
     assert data.container.strict is strict_value
     assert data.container.allow_downcast is allow_downcast_value
Beispiel #12
0
    def test_priors_separation(self, use_context):
        """Test model is enough to get prior, prior predictive and observed_data."""
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            prior = pm.sample_prior_predictive(return_inferencedata=False)

        test_dict = {
            "prior": ["beta", "~obs"],
            "observed_data": ["obs"],
            "prior_predictive": ["obs"],
        }
        if use_context:
            with model:
                inference_data = to_inference_data(prior=prior)
        else:
            inference_data = to_inference_data(prior=prior, model=model)
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
Beispiel #13
0
    def test_predictions_constant_data(self):
        with pm.Model():
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            trace = pm.sample(100, tune=100, return_inferencedata=False)
            inference_data = to_inference_data(trace)

        test_dict = {
            "posterior": ["beta"],
            "observed_data": ["obs"],
            "constant_data": ["x"]
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails

        with pm.Model():
            x = pm.Data("x", [1.0, 2.0])
            y = pm.Data("y", [1.0, 2.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            predictive_trace = pm.sample_posterior_predictive(
                inference_data, return_inferencedata=False)
            assert set(predictive_trace.keys()) == {"obs"}
            # this should be four chains of 100 samples
            # assert predictive_trace["obs"].shape == (400, 2)
            # but the shape seems to vary between pymc versions
            inference_data = predictions_to_inference_data(
                predictive_trace, posterior_trace=trace)
        test_dict = {"posterior": ["beta"], "~observed_data": ""}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Posterior data not copied over as expected."
        test_dict = {"predictions": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Predictions not instantiated as expected."
        test_dict = {"predictions_constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Predictions constant data not instantiated as expected."
Beispiel #14
0
 def test_can_resize_data_defined_size(self):
     with pm.Model() as pmodel:
         x = pm.Data("x", [[1, 2, 3, 4]], dims=("first", "second"))
         y = pm.Normal("y", mu=0, dims=("first", "second"))
         z = pm.Normal("z", mu=y, observed=np.ones((1, 4)))
         assert x.eval().shape == (1, 4)
         assert y.eval().shape == (1, 4)
         assert z.eval().shape == (1, 4)
         assert "first" in pmodel.dim_lengths
         assert "second" in pmodel.dim_lengths
         pmodel.set_data("x", [[1, 2], [3, 4], [5, 6]])
         assert x.eval().shape == (3, 2)
         assert y.eval().shape == (3, 2)
         assert z.eval().shape == (3, 2)
Beispiel #15
0
    def test_no_resize_of_implied_dimensions(self):
        with pm.Model() as pmodel:
            # Imply a dimension through RV params
            pm.Normal("n", mu=[1, 2, 3], dims="city")
            # _Use_ the dimension for a data variable
            inhabitants = pm.Data("inhabitants", [100, 200, 300], dims="city")

            # Attempting to re-size the dimension through the data variable would
            # cause shape problems in InferenceData conversion, because the RV remains (3,).
            with pytest.raises(
                    ShapeError,
                    match=
                    "was initialized from 'n' which is not a shared variable"):
                pmodel.set_data("inhabitants", [1, 2, 3, 4])
Beispiel #16
0
    def test_model_to_graphviz_for_model_with_data_container(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            obs_sigma = floatX(np.sqrt(1e-2))
            pm.Normal("obs", beta * x, obs_sigma, observed=y)
            pm.sample(
                1000,
                init=None,
                tune=1000,
                chains=1,
                compute_convergence_checks=False,
            )

        for formatting in {"latex", "latex_with_params"}:
            with pytest.raises(ValueError, match="Unsupported formatting"):
                pm.model_to_graphviz(model, formatting=formatting)

        exp_without = [
            'x [label="x\n~\nData" shape=box style="rounded, filled"]',
            'beta [label="beta\n~\nNormal"]',
            'obs [label="obs\n~\nNormal" style=filled]',
        ]
        exp_with = [
            'x [label="x\n~\nData" shape=box style="rounded, filled"]',
            'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]',
            f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]',
        ]
        for formatting, expected_substrings in [
            ("plain", exp_without),
            ("plain_with_params", exp_with),
        ]:
            g = pm.model_to_graphviz(model, formatting=formatting)
            # check formatting of RV nodes
            for expected in expected_substrings:
                assert expected in g.source
Beispiel #17
0
    def test_sample_posterior_predictive_after_set_data(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
            trace = pm.sample(
                1000,
                tune=1000,
                chains=1,
                return_inferencedata=False,
                compute_convergence_checks=False,
            )
        # Predict on new data.
        with model:
            x_test = [5, 6, 9]
            pm.set_data(new_data={"x": x_test})
            y_test = pm.sample_posterior_predictive(trace)

        assert y_test.posterior_predictive["obs"].shape == (1, 1000, 3)
        np.testing.assert_allclose(x_test,
                                   y_test.posterior_predictive["obs"].mean(
                                       ("chain", "draw")),
                                   atol=1e-1)
Beispiel #18
0
    def test_autodetect_coords_from_model(self, use_context):
        df_data = pd.DataFrame(columns=["date"]).set_index("date")
        dates = pd.date_range(start="2020-05-01", end="2020-05-20")
        for city, mu in {"Berlin": 15, "San Marino": 18, "Paris": 16}.items():
            df_data[city] = np.random.normal(loc=mu, size=len(dates))
        df_data.index = dates
        df_data.index.name = "date"

        coords = {"date": df_data.index, "city": df_data.columns}
        with pm.Model(coords=coords) as model:
            europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sd=3.0)
            city_offset = pm.Normal("city_offset", mu=0.0, sd=3.0, dims="city")
            city_temperature = pm.Deterministic("city_temperature",
                                                europe_mean + city_offset,
                                                dims="city")

            data_dims = ("date", "city")
            data = pm.Data("data", df_data, dims=data_dims)
            _ = pm.Normal("likelihood",
                          mu=city_temperature,
                          sd=0.5,
                          observed=data,
                          dims=data_dims)

            trace = pm.sample(
                return_inferencedata=False,
                compute_convergence_checks=False,
                cores=1,
                chains=1,
                tune=20,
                draws=30,
                step=pm.Metropolis(),
            )
            if use_context:
                idata = to_inference_data(trace=trace)
        if not use_context:
            idata = to_inference_data(trace=trace, model=model)

        assert "city" in list(idata.posterior.dims)
        assert "city" in list(idata.observed_data.dims)
        assert "date" in list(idata.observed_data.dims)

        np.testing.assert_array_equal(idata.posterior.coords["city"],
                                      coords["city"])
        np.testing.assert_array_equal(idata.observed_data.coords["date"],
                                      coords["date"])
        np.testing.assert_array_equal(idata.observed_data.coords["city"],
                                      coords["city"])
Beispiel #19
0
 def test_eval_rv_shapes(self):
     with pm.Model(coords={
             "city": ["Sydney", "Las Vegas", "Düsseldorf"],
     }) as pmodel:
         pm.Data("budget", [1, 2, 3, 4], dims="year")
         pm.Normal("untransformed", size=(1, 2))
         pm.Uniform("transformed", size=(7, ))
         obs = pm.Uniform("observed", size=(3, ), observed=[0.1, 0.2, 0.3])
         pm.LogNormal("lognorm", mu=at.log(obs))
         pm.Normal("from_dims", dims=("city", "year"))
     shapes = pmodel.eval_rv_shapes()
     assert shapes["untransformed"] == (1, 2)
     assert shapes["transformed"] == (7, )
     assert shapes["transformed_interval__"] == (7, )
     assert shapes["lognorm"] == (3, )
     assert shapes["lognorm_log__"] == (3, )
     assert shapes["from_dims"] == (3, 4)
Beispiel #20
0
    def test_ignores_observed(self):
        observed = np.random.normal(10, 1, size=200)
        with pm.Model():
            # Use a prior that's way off to show we're ignoring the observed variables
            observed_data = pm.Data("observed_data", observed)
            mu = pm.Normal("mu", mu=-100, sigma=1)
            positive_mu = pm.Deterministic("positive_mu", np.abs(mu))
            z = -1 - positive_mu
            pm.Normal("x_obs", mu=z, sigma=1, observed=observed_data)
            prior = pm.sample_prior_predictive(return_inferencedata=False)

        assert "observed_data" not in prior
        assert (prior["mu"] < -90).all()
        assert (prior["positive_mu"] > 90).all()
        assert (prior["x_obs"] < -90).all()
        assert prior["x_obs"].shape == (500, 200)
        npt.assert_array_almost_equal(prior["positive_mu"], np.abs(prior["mu"]), decimal=4)
Beispiel #21
0
    def test_shared_data_as_rv_input(self):
        """
        Allow pm.Data to be used as input for other RVs.
        See https://github.com/pymc-devs/pymc/issues/3842
        """
        with pm.Model() as m:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Normal("y", mu=x, size=(2, 3))
            assert y.eval().shape == (2, 3)
            idata = pm.sample(
                chains=1,
                tune=500,
                draws=550,
                return_inferencedata=True,
                compute_convergence_checks=False,
            )
        samples = idata.posterior["y"]
        assert samples.shape == (1, 550, 2, 3)

        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]),
                                   x.get_value(),
                                   atol=1e-1)
        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]),
                                   samples.mean(("chain", "draw", "y_dim_0")),
                                   atol=1e-1)

        with m:
            pm.set_data({"x": np.array([2.0, 4.0, 6.0])})
            assert y.eval().shape == (2, 3)
            idata = pm.sample(
                chains=1,
                tune=500,
                draws=620,
                return_inferencedata=True,
                compute_convergence_checks=False,
            )
        samples = idata.posterior["y"]
        assert samples.shape == (1, 620, 2, 3)

        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]),
                                   x.get_value(),
                                   atol=1e-1)
        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]),
                                   samples.mean(("chain", "draw", "y_dim_0")),
                                   atol=1e-1)
Beispiel #22
0
    def test_implicit_coords_series(self):
        ser_sales = pd.Series(
            data=np.random.randint(low=0, high=30, size=22),
            index=pd.date_range(start="2020-05-01",
                                periods=22,
                                freq="24H",
                                name="date"),
            name="sales",
        )
        with pm.Model() as pmodel:
            pm.Data("sales",
                    ser_sales,
                    dims="date",
                    export_index_as_coords=True)

        assert "date" in pmodel.coords
        assert len(pmodel.coords["date"]) == 22
        assert pmodel.RV_dims == {"sales": ("date", )}
Beispiel #23
0
    def test_symbolic_coords(self):
        """
        In v4 dimensions can be created without passing coordinate values.
        Their lengths are then automatically linked to the corresponding Tensor dimension.
        """
        with pm.Model() as pmodel:
            intensity = pm.Data("intensity",
                                np.ones((2, 3)),
                                dims=("row", "column"))
            assert "row" in pmodel.dim_lengths
            assert "column" in pmodel.dim_lengths
            assert isinstance(pmodel.dim_lengths["row"], TensorVariable)
            assert isinstance(pmodel.dim_lengths["column"], TensorVariable)
            assert pmodel.dim_lengths["row"].eval() == 2
            assert pmodel.dim_lengths["column"].eval() == 3

            intensity.set_value(floatX(np.ones((4, 5))))
            assert pmodel.dim_lengths["row"].eval() == 4
            assert pmodel.dim_lengths["column"].eval() == 5
Beispiel #24
0
    def test_implicit_coords_dataframe(self):
        N_rows = 5
        N_cols = 7
        df_data = pd.DataFrame()
        for c in range(N_cols):
            df_data[f"Column {c+1}"] = np.random.normal(size=(N_rows, ))
        df_data.index.name = "rows"
        df_data.columns.name = "columns"

        # infer coordinates from index and columns of the DataFrame
        with pm.Model() as pmodel:
            pm.Data("observations",
                    df_data,
                    dims=("rows", "columns"),
                    export_index_as_coords=True)

        assert "rows" in pmodel.coords
        assert "columns" in pmodel.coords
        assert pmodel.RV_dims == {"observations": ("rows", "columns")}
Beispiel #25
0
    def test_ovewrite_model_coords_dims(self):
        """Check coords and dims from model object can be partially overwrited."""
        dim1 = ["a", "b"]
        new_dim1 = ["c", "d"]
        coords = {"dim1": dim1, "dim2": ["c1", "c2"]}
        x_data = np.arange(4).reshape((2, 2))
        y = x_data + np.random.normal(size=(2, 2))
        with pm.Model(coords=coords):
            x = pm.Data("x", x_data, dims=("dim1", "dim2"))
            beta = pm.Normal("beta", 0, 1, dims="dim1")
            _ = pm.Normal("obs",
                          x * beta,
                          1,
                          observed=y,
                          dims=("dim1", "dim2"))
            trace = pm.sample(100, tune=100, return_inferencedata=False)
            idata1 = to_inference_data(trace)
            idata2 = to_inference_data(trace,
                                       coords={"dim1": new_dim1},
                                       dims={"beta": ["dim2"]})

        test_dict = {
            "posterior": ["beta"],
            "observed_data": ["obs"],
            "constant_data": ["x"]
        }
        fails1 = check_multiple_attrs(test_dict, idata1)
        assert not fails1
        fails2 = check_multiple_attrs(test_dict, idata2)
        assert not fails2
        assert "dim1" in list(idata1.posterior.beta.dims)
        assert "dim2" in list(idata2.posterior.beta.dims)
        assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1))
        assert np.all(
            idata1.constant_data.x.dim2.values == np.array(["c1", "c2"]))
        assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1))
        assert np.all(
            idata2.constant_data.x.dim2.values == np.array(["c1", "c2"]))
Beispiel #26
0
    def test_explicit_coords(self):
        N_rows = 5
        N_cols = 7
        data = np.random.uniform(size=(N_rows, N_cols))
        coords = {
            "rows": [f"R{r+1}" for r in range(N_rows)],
            "columns": [f"C{c+1}" for c in range(N_cols)],
        }
        # pass coordinates explicitly, use numpy array in Data container
        with pm.Model(coords=coords) as pmodel:
            pm.Data("observations", data, dims=("rows", "columns"))

        assert "rows" in pmodel.coords
        assert pmodel.coords["rows"] == ("R1", "R2", "R3", "R4", "R5")
        assert "rows" in pmodel.dim_lengths
        assert isinstance(pmodel.dim_lengths["rows"], ScalarSharedVariable)
        assert pmodel.dim_lengths["rows"].eval() == 5
        assert "columns" in pmodel.coords
        assert pmodel.coords["columns"] == ("C1", "C2", "C3", "C4", "C5", "C6",
                                            "C7")
        assert pmodel.RV_dims == {"observations": ("rows", "columns")}
        assert "columns" in pmodel.dim_lengths
        assert isinstance(pmodel.dim_lengths["columns"], ScalarSharedVariable)
        assert pmodel.dim_lengths["columns"].eval() == 7
Beispiel #27
0
 def test_creation_of_data_outside_model_context(self):
     with pytest.raises((IndexError, TypeError)) as error:
         pm.Data("data", [1.1, 2.2, 3.3])
     error.match("No model on context stack")
Beispiel #28
0
 def test_deterministic(self):
     data_values = np.array([0.5, 0.4, 5, 2])
     with pm.Model() as model:
         X = pm.Data("X", data_values)
         pm.Normal("y", 0, 1, observed=X)
         model.logp(model.initial_point)