Ejemplo n.º 1
0
    def test_predictions_constant_data(self):
        with pm.Model():
            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
            y = pm.MutableData("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            trace = pm.sample(100, tune=100, return_inferencedata=False)
            inference_data = to_inference_data(trace)

        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails

        with pm.Model():
            x = pm.MutableData("x", [1.0, 2.0])
            y = pm.ConstantData("y", [1.0, 2.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            predictive_trace = pm.sample_posterior_predictive(
                inference_data, return_inferencedata=False
            )
            assert set(predictive_trace.keys()) == {"obs"}
            # this should be four chains of 100 samples
            # assert predictive_trace["obs"].shape == (400, 2)
            # but the shape seems to vary between pymc versions
            inference_data = predictions_to_inference_data(predictive_trace, posterior_trace=trace)
        test_dict = {"posterior": ["beta"], "~observed_data": ""}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Posterior data not copied over as expected."
        test_dict = {"predictions": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Predictions not instantiated as expected."
        test_dict = {"predictions_constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Predictions constant data not instantiated as expected."
Ejemplo n.º 2
0
    def test_simultaneous_shape_and_dims(self, ellipsis_in):
        with pm.Model() as pmodel:
            x = pm.ConstantData("x", [1, 2, 3], dims="ddata")

            if ellipsis_in == "none":
                # The shape and dims tuples correspond to each other.
                # Note: No checks are performed that implied shape (x), shape and dims actually match.
                y = pm.Normal("y",
                              mu=x,
                              shape=(2, 3),
                              dims=("dshape", "ddata"))
                assert pmodel.RV_dims["y"] == ("dshape", "ddata")
            elif ellipsis_in == "shape":
                y = pm.Normal("y",
                              mu=x,
                              shape=(2, ...),
                              dims=("dshape", "ddata"))
                assert pmodel.RV_dims["y"] == ("dshape", "ddata")
            elif ellipsis_in == "dims":
                y = pm.Normal("y", mu=x, shape=(2, 3), dims=("dshape", ...))
                assert pmodel.RV_dims["y"] == ("dshape", None)
            elif ellipsis_in == "both":
                y = pm.Normal("y", mu=x, shape=(2, ...), dims=("dshape", ...))
                assert pmodel.RV_dims["y"] == ("dshape", None)

            assert "dshape" in pmodel.dim_lengths
            assert y.eval().shape == (2, 3)
Ejemplo n.º 3
0
    def test_no_trace(self):
        with pm.Model() as model:
            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
            y = pm.MutableData("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            idata = pm.sample(100, tune=100)
            prior = pm.sample_prior_predictive(return_inferencedata=False)
            posterior_predictive = pm.sample_posterior_predictive(idata, return_inferencedata=False)

        # Only prior
        inference_data = to_inference_data(prior=prior, model=model)
        test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
        # Only posterior_predictive
        inference_data = to_inference_data(posterior_predictive=posterior_predictive, model=model)
        test_dict = {"posterior_predictive": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
        # Prior and posterior_predictive but no trace
        inference_data = to_inference_data(
            prior=prior, posterior_predictive=posterior_predictive, model=model
        )
        test_dict = {
            "prior": ["beta"],
            "prior_predictive": ["obs"],
            "posterior_predictive": ["obs"],
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
Ejemplo n.º 4
0
    def test_ovewrite_model_coords_dims(self):
        """Check coords and dims from model object can be partially overwritten."""
        dim1 = ["a", "b"]
        new_dim1 = ["c", "d"]
        coords = {"dim1": dim1, "dim2": ["c1", "c2"]}
        x_data = np.arange(4).reshape((2, 2))
        y = x_data + np.random.normal(size=(2, 2))
        with pm.Model(coords=coords):
            x = pm.ConstantData("x", x_data, dims=("dim1", "dim2"))
            beta = pm.Normal("beta", 0, 1, dims="dim1")
            _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2"))
            trace = pm.sample(100, tune=100, return_inferencedata=False)
            idata1 = to_inference_data(trace)
            idata2 = to_inference_data(trace, coords={"dim1": new_dim1}, dims={"beta": ["dim2"]})

        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
        fails1 = check_multiple_attrs(test_dict, idata1)
        assert not fails1
        fails2 = check_multiple_attrs(test_dict, idata2)
        assert not fails2
        assert "dim1" in list(idata1.posterior.beta.dims)
        assert "dim2" in list(idata2.posterior.beta.dims)
        assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1))
        assert np.all(idata1.constant_data.x.dim2.values == np.array(["c1", "c2"]))
        assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1))
        assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"]))
Ejemplo n.º 5
0
def model_with_dims():
    with pm.Model(
            coords={"city": ["Aachen", "Maastricht", "London", "Bergheim"]
                    }) as pmodel:
        economics = pm.Uniform("economics", lower=-1, upper=1, shape=(1, ))

        population = pm.HalfNormal("population", sigma=5, dims=("city"))

        time = pm.ConstantData("time", [2014, 2015, 2016], dims="year")

        n = pm.Deterministic("tax revenue",
                             economics * population[None, :] * time[:, None],
                             dims=("year", "city"))

        yobs = pm.MutableData("observed", np.ones((3, 4)))
        L = pm.Normal("L", n, observed=yobs)

    compute_graph = {
        "economics": set(),
        "population": set(),
        "time": set(),
        "tax revenue": {"economics", "population", "time"},
        "L": {"tax revenue"},
        "observed": {"L"},
    }
    plates = {
        "1": {"economics"},
        "city (4)": {"population"},
        "year (3)": {"time"},
        "year (3) x city (4)": {"tax revenue"},
        "3 x 4": {"L", "observed"},
    }

    return pmodel, compute_graph, plates
Ejemplo n.º 6
0
 def test_data_defined_size_dimension_can_register_dimname(self):
     with pm.Model() as pmodel:
         x = pm.ConstantData("x", [[1, 2, 3, 4]], dims=("first", "second"))
         assert "first" in pmodel.dim_lengths
         assert "second" in pmodel.dim_lengths
         # two dimensions are implied; a "third" dimension is created
         y = pm.Normal("y", mu=x, size=2, dims=("third", "first", "second"))
         assert "third" in pmodel.dim_lengths
         assert y.eval().shape() == (2, 1, 4)
Ejemplo n.º 7
0
def test_data_naming():
    """
    This is a test for issue #3793 -- `Data` objects in named models are
    not given model-relative names.
    """
    with pm.Model("named_model") as model:
        x = pm.ConstantData("x", [1.0, 2.0, 3.0])
        y = pm.Normal("y")
    assert y.name == "named_model::y"
    assert x.name == "named_model::x"
Ejemplo n.º 8
0
def test_coords_and_constantdata_create_immutable_dims():
    """
    When created from `pm.Model(coords=...)` or `pm.ConstantData`
    a dimension should be resizable.
    """
    with pm.Model(coords={"group": ["A", "B"]}) as m:
        x = pm.ConstantData("x", [0], dims="feature")
        y = pm.Normal("y", x, 1, dims=("group", "feature"))
    assert isinstance(m._dim_lengths["feature"], TensorConstant)
    assert isinstance(m._dim_lengths["group"], TensorConstant)
    assert x.eval().shape == (1,)
    assert y.eval().shape == (2, 1)
Ejemplo n.º 9
0
    def test_autodetect_coords_from_model(self, use_context):
        pd = pytest.importorskip("pandas")
        df_data = pd.DataFrame(columns=["date"]).set_index("date")
        dates = pd.date_range(start="2020-05-01", end="2020-05-20")
        for city, mu in {"Berlin": 15, "San Marino": 18, "Paris": 16}.items():
            df_data[city] = np.random.normal(loc=mu, size=len(dates))
        df_data.index = dates
        df_data.index.name = "date"

        coords = {"date": df_data.index, "city": df_data.columns}
        with pm.Model(coords=coords) as model:
            europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sigma=3.0)
            city_offset = pm.Normal("city_offset",
                                    mu=0.0,
                                    sigma=3.0,
                                    dims="city")
            city_temperature = pm.Deterministic("city_temperature",
                                                europe_mean + city_offset,
                                                dims="city")

            data_dims = ("date", "city")
            data = pm.ConstantData("data", df_data, dims=data_dims)
            _ = pm.Normal("likelihood",
                          mu=city_temperature,
                          sigma=0.5,
                          observed=data,
                          dims=data_dims)

            trace = pm.sample(
                return_inferencedata=False,
                compute_convergence_checks=False,
                cores=1,
                chains=1,
                tune=20,
                draws=30,
                step=pm.Metropolis(),
            )
            if use_context:
                idata = to_inference_data(trace=trace)
        if not use_context:
            idata = to_inference_data(trace=trace, model=model)

        assert "city" in list(idata.posterior.dims)
        assert "city" in list(idata.observed_data.dims)
        assert "date" in list(idata.observed_data.dims)

        np.testing.assert_array_equal(idata.posterior.coords["city"],
                                      coords["city"])
        np.testing.assert_array_equal(idata.observed_data.coords["date"],
                                      coords["date"])
        np.testing.assert_array_equal(idata.observed_data.coords["city"],
                                      coords["city"])
Ejemplo n.º 10
0
 def test_set_coords_through_pmdata(self):
     with pm.Model() as pmodel:
         pm.ConstantData("population", [100, 200],
                         dims="city",
                         coords={"city": ["Tinyvil", "Minitown"]})
         pm.MutableData(
             "temperature",
             [[15, 20, 22, 17], [18, 22, 21, 12]],
             dims=("city", "season"),
             coords={"season": ["winter", "spring", "summer", "fall"]},
         )
     assert "city" in pmodel.coords
     assert "season" in pmodel.coords
     assert pmodel.coords["city"] == ("Tinyvil", "Minitown")
     assert pmodel.coords["season"] == ("winter", "spring", "summer",
                                        "fall")
Ejemplo n.º 11
0
    def test_simultaneous_size_and_dims(self, with_dims_ellipsis):
        with pm.Model() as pmodel:
            x = pm.ConstantData("x", [1, 2, 3], dims="ddata")
            assert "ddata" in pmodel.dim_lengths

            # Size does not include support dims, so this test must use a dist with support dims.
            kwargs = dict(name="y", size=2, mu=at.ones((3, 4)), cov=at.eye(4))
            if with_dims_ellipsis:
                y = pm.MvNormal(**kwargs, dims=("dsize", ...))
                assert pmodel.RV_dims["y"] == ("dsize", None, None)
            else:
                y = pm.MvNormal(**kwargs, dims=("dsize", "ddata", "dsupport"))
                assert pmodel.RV_dims["y"] == ("dsize", "ddata", "dsupport")

            assert "dsize" in pmodel.dim_lengths
            assert y.eval().shape == (2, 3, 4)
Ejemplo n.º 12
0
    def test_constant_data(self, use_context):
        """Test constant_data group behaviour."""
        with pm.Model() as model:
            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
            y = pm.MutableData("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            trace = pm.sample(100, chains=2, tune=100, return_inferencedata=False)
            if use_context:
                inference_data = to_inference_data(trace=trace)

        if not use_context:
            inference_data = to_inference_data(trace=trace, model=model)
        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
        assert inference_data.log_likelihood["obs"].shape == (2, 100, 3)
Ejemplo n.º 13
0
def model_with_different_descendants():
    """
    Model proposed by Michael to test variable selection functionality
    From here: https://github.com/pymc-devs/pymc/pull/5634#pullrequestreview-916297509
    """
    with pm.Model() as pmodel2:
        a = pm.Normal("a")
        b = pm.Normal("b")
        pm.Normal("c", a * b)
        intermediate = pm.Deterministic("intermediate", a + b)
        pred = pm.Deterministic("pred", intermediate * 3)

        obs = pm.ConstantData("obs", 1.75)

        L = pm.Normal("L", mu=1 + 0.5 * pred, observed=obs)

    return pmodel2
Ejemplo n.º 14
0
    def test_implicit_coords_series(self):
        ser_sales = pd.Series(
            data=np.random.randint(low=0, high=30, size=22),
            index=pd.date_range(start="2020-05-01",
                                periods=22,
                                freq="24H",
                                name="date"),
            name="sales",
        )
        with pm.Model() as pmodel:
            pm.ConstantData("sales",
                            ser_sales,
                            dims="date",
                            export_index_as_coords=True)

        assert "date" in pmodel.coords
        assert len(pmodel.coords["date"]) == 22
        assert pmodel.RV_dims == {"sales": ("date", )}
Ejemplo n.º 15
0
    def test_implicit_coords_dataframe(self):
        N_rows = 5
        N_cols = 7
        df_data = pd.DataFrame()
        for c in range(N_cols):
            df_data[f"Column {c+1}"] = np.random.normal(size=(N_rows, ))
        df_data.index.name = "rows"
        df_data.columns.name = "columns"

        # infer coordinates from index and columns of the DataFrame
        with pm.Model() as pmodel:
            pm.ConstantData("observations",
                            df_data,
                            dims=("rows", "columns"),
                            export_index_as_coords=True)

        assert "rows" in pmodel.coords
        assert "columns" in pmodel.coords
        assert pmodel.RV_dims == {"observations": ("rows", "columns")}
Ejemplo n.º 16
0
    def test_priors_separation(self, use_context):
        """Test model is enough to get prior, prior predictive and observed_data."""
        with pm.Model() as model:
            x = pm.MutableData("x", [1.0, 2.0, 3.0])
            y = pm.ConstantData("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            prior = pm.sample_prior_predictive(return_inferencedata=False)

        test_dict = {
            "prior": ["beta", "~obs"],
            "observed_data": ["obs"],
            "prior_predictive": ["obs"],
        }
        if use_context:
            with model:
                inference_data = to_inference_data(prior=prior)
        else:
            inference_data = to_inference_data(prior=prior, model=model)
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
Ejemplo n.º 17
0
    def test_model_to_graphviz_for_model_with_data_container(self):
        with pm.Model() as model:
            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
            y = pm.MutableData("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            obs_sigma = floatX(np.sqrt(1e-2))
            pm.Normal("obs", beta * x, obs_sigma, observed=y)
            pm.sample(
                1000,
                init=None,
                tune=1000,
                chains=1,
                compute_convergence_checks=False,
            )

        for formatting in {"latex", "latex_with_params"}:
            with pytest.raises(ValueError, match="Unsupported formatting"):
                pm.model_to_graphviz(model, formatting=formatting)

        exp_without = [
            'x [label="x\n~\nConstantData" shape=box style="rounded, filled"]',
            'y [label="x\n~\nMutableData" shape=box style="rounded, filled"]',
            'beta [label="beta\n~\nNormal"]',
            'obs [label="obs\n~\nNormal" style=filled]',
        ]
        exp_with = [
            'x [label="x\n~\nConstantData" shape=box style="rounded, filled"]',
            'y [label="x\n~\nMutableData" shape=box style="rounded, filled"]',
            'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]',
            f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]',
        ]
        for formatting, expected_substrings in [
            ("plain", exp_without),
            ("plain_with_params", exp_with),
        ]:
            g = pm.model_to_graphviz(model, formatting=formatting)
            # check formatting of RV nodes
            for expected in expected_substrings:
                assert expected in g.source
Ejemplo n.º 18
0
    def test_sample_posterior_predictive_after_set_data(self):
        with pm.Model() as model:
            x = pm.MutableData("x", [1.0, 2.0, 3.0])
            y = pm.ConstantData("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
            trace = pm.sample(
                1000,
                tune=1000,
                chains=1,
                return_inferencedata=False,
                compute_convergence_checks=False,
            )
        # Predict on new data.
        with model:
            x_test = [5, 6, 9]
            pm.set_data(new_data={"x": x_test})
            y_test = pm.sample_posterior_predictive(trace)

        assert y_test.posterior_predictive["obs"].shape == (1, 1000, 3)
        np.testing.assert_allclose(x_test,
                                   y_test.posterior_predictive["obs"].mean(
                                       ("chain", "draw")),
                                   atol=1e-1)
Ejemplo n.º 19
0
 def test_creation_of_data_outside_model_context(self):
     with pytest.raises((IndexError, TypeError)) as error:
         pm.ConstantData("data", [1.1, 2.2, 3.3])
     error.match("No model on context stack")