Esempio n. 1
0
def test_logistic_regression_good_numeric():
    data = pd.DataFrame({
        "y": np.random.choice([1, 0], 50),
        "x": np.random.normal(size=50)
    })
    model = Model(data)
    model.fit("y ~ x", family="bernoulli")
Esempio n. 2
0
def test_logistic_regression_empty_index():
    data = pd.DataFrame({
        "y": np.random.choice(["a", "b"], 50),
        "x": np.random.normal(size=50)
    })
    model = Model(data)
    model.fit("y ~ x", family="bernoulli")
Esempio n. 3
0
def test_categorical_term():
    data = pd.DataFrame(
        {
            "y": np.random.normal(size=6),
            "x1": np.random.normal(size=6),
            "x2": [1, 1, 0, 0, 1, 1],
            "g1": ["a"] * 3 + ["b"] * 3,
            "g2": ["x", "x", "z", "z", "y", "y"],
        }
    )
    model = Model("y ~ x1 + x2 + g1 + (g1|g2) + (x2|g2)", data)
    fitted = model.fit(draws=10)
    df = az.summary(fitted)
    names = [
        "Intercept",
        "x1",
        "x2",
        "g1[b]",
        "1|g2_sigma",
        "1|g2[x]",
        "1|g2[y]",
        "1|g2[z]",
        "g1|g2_sigma[b]",
        "g1|g2[b, x]",
        "g1|g2[b, y]",
        "g1|g2[b, z]",
        "x2|g2_sigma",
        "x2|g2[x]",
        "x2|g2[y]",
        "x2|g2[z]",
        "y_sigma",
    ]
    assert list(df.index) == names
Esempio n. 4
0
def test_response_prior():
    data = pd.DataFrame({
        "y": np.random.randint(3, 10, size=50),
        "x": np.random.normal(size=50)
    })

    priors = {"sigma": Prior("Uniform", lower=0, upper=50)}
    model = Model("y ~ x", data, priors=priors)
    assert model.response.prior.args["sigma"] == priors["sigma"]

    priors = {"alpha": Prior("Uniform", lower=1, upper=20)}
    model = Model("y ~ x", data, family="negativebinomial", priors=priors)
    assert model.response.prior.args["alpha"] == priors["alpha"]

    priors = {"alpha": Prior("Uniform", lower=0, upper=50)}
    model = Model("y ~ x", data, family="gamma", priors=priors)
    assert model.response.prior.args["alpha"] == Prior("Uniform",
                                                       lower=0,
                                                       upper=50)

    priors = {"alpha": Prior("Uniform", lower=0, upper=50)}
    model = Model("y ~ x", data, family="gamma", priors=priors)
    assert model.response.prior.args["alpha"] == Prior("Uniform",
                                                       lower=0,
                                                       upper=50)
Esempio n. 5
0
def test_bad_links():
    """Passes names of links that are not suitable for the family."""
    data = pd.DataFrame(
        {
            "g": np.random.choice([0, 1], size=100),
            "y": np.random.randint(3, 10, size=100),
            "x": np.random.randint(3, 10, size=100),
        }
    )
    FAMILIES = {
        "bernoulli": ["inverse", "inverse_squared", "log"],
        "beta": ["inverse", "inverse_squared", "log"],
        "gamma": ["logit", "probit", "cloglog"],
        "gaussian": ["logit", "probit", "cloglog"],
        "negativebinomial": ["logit", "probit", "inverse", "inverse_squared"],
        "poisson": ["logit", "probit", "cloglog", "inverse", "inverse_squared"],
        "wald": ["logit", "probit", "cloglog"],
    }

    for family, links in FAMILIES.items():
        for link in links:
            with pytest.raises(ValueError):
                if family == "bernoulli":
                    Model("g ~ x", data, family=family, link=link)
                else:
                    Model("y ~ x", data, family=family, link=link)
Esempio n. 6
0
def test_prior_shape():
    data = pd.DataFrame(
        {
            "score": np.random.normal(size=100),
            "q": np.random.choice(["1", "2", "3", "4", "5"], size=100),
            "s": np.random.choice(["a", "b", "c"], size=100),
            "g": np.random.choice(["A", "B", "C"], size=100),
        }
    )

    model = Model("score ~ 0 + q", data)
    assert model.terms["q"].prior.args["mu"].shape == (5,)
    assert model.terms["q"].prior.args["sigma"].shape == (5,)

    model = Model("score ~ q", data)
    assert model.terms["q"].prior.args["mu"].shape == (4,)
    assert model.terms["q"].prior.args["sigma"].shape == (4,)

    model = Model("score ~ 0 + q:s", data)
    assert model.terms["q:s"].prior.args["mu"].shape == (15,)
    assert model.terms["q:s"].prior.args["sigma"].shape == (15,)

    # "s" is automatically added to ensure full rank matrix
    model = Model("score ~ q:s", data)
    assert model.terms["Intercept"].prior.args["mu"].shape == ()
    assert model.terms["Intercept"].prior.args["sigma"].shape == ()

    assert model.terms["s"].prior.args["mu"].shape == (2,)
    assert model.terms["s"].prior.args["sigma"].shape == (2,)

    assert model.terms["q:s"].prior.args["mu"].shape == (12,)
    assert model.terms["q:s"].prior.args["sigma"].shape == (12,)
Esempio n. 7
0
def test_auto_scale(diabetes_data):

    # By default, should scale everything except custom Prior() objects
    priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)}
    model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors)
    p1 = model.terms["S1"].prior
    p2 = model.terms["S2"].prior
    p3 = model.terms["BP"].prior
    assert p1.name == p2.name == "Normal"
    assert 0 < p1.args["sigma"] < 1
    assert p2.args["sigma"] > p1.args["sigma"]
    assert p3.name == "Cauchy"
    assert p3.args["beta"] == 17.5

    # With auto_scale off, custom priors are considered, but not custom scaling.
    # Prior has no effect, and prior for BP has effect.
    priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)}
    model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors, auto_scale=False)
    p1_off = model.terms["S1"].prior
    p2_off = model.terms["S2"].prior
    p3_off = model.terms["BP"].prior
    assert p1_off.name == "Normal"
    assert p2_off.name == "Flat"
    assert p1_off.args["sigma"] == 1
    assert "sigma" not in p2_off.args
    assert p3_off.name == "Cauchy"
Esempio n. 8
0
def test_model_term_names_property_interaction(crossed_data):
    crossed_data["fourcats"] = sum([[x] * 10
                                    for x in ["a", "b", "c", "d"]], list()) * 3
    model = Model("Y ~ threecats*fourcats", crossed_data)
    model.build()
    assert model.term_names == [
        "Intercept", "threecats", "fourcats", "threecats:fourcats"
    ]
Esempio n. 9
0
def test_logistic_regression_bad_numeric():
    data = pd.DataFrame({
        "y": np.random.choice([1, 2], 50),
        "x": np.random.normal(size=50)
    })
    with pytest.raises(ValueError):
        model = Model(data)
        model.fit("y ~ x", family="bernoulli")
Esempio n. 10
0
def test_complete_separation():
    data = pd.DataFrame({"y": [0] * 5 + [1] * 5, "g": ["a"] * 5 + ["b"] * 5})

    with pytest.raises(PerfectSeparationError):
        Model("y ~ g", data, family="bernoulli", automatic_priors="mle")

    # No error is raised
    priors = {"common": Prior("Normal", mu=0, sigma=10)}
    Model("y ~ g", data, family="bernoulli", priors=priors)
Esempio n. 11
0
def test_laplace():
    data = pd.DataFrame(np.repeat((0, 1), (30, 60)), columns=["w"])
    priors = {"Intercept": Prior("Uniform", lower=0, upper=1)}
    model = Model("w ~ 1", data=data, family="bernoulli", priors=priors, link="identity")
    results = model.fit(method="laplace")
    mode_n = np.round(results["Intercept"][0], 2)
    std_n = np.round(results["Intercept"][1][0], 2)
    mode_a = data.mean()
    std_a = data.std() / len(data) ** 0.5
    np.testing.assert_array_almost_equal((mode_n, std_n), (mode_a.item(), std_a.item()), decimal=2)
Esempio n. 12
0
def test_omit_offsets_true():
    data = pd.DataFrame({
        "y": np.random.normal(size=100),
        "x1": np.random.normal(size=100),
        "g1": ["a"] * 50 + ["b"] * 50,
    })
    model = Model(data)
    fitted = model.fit("y ~ x1 + (x1|g1)", omit_offsets=True)
    offsets = [v for v in fitted.posterior.dims if "offset" in v]
    assert not offsets
Esempio n. 13
0
def test_omit_offsets_false():
    data = pd.DataFrame({
        "y": np.random.normal(size=100),
        "x1": np.random.normal(size=100),
        "g1": ["a"] * 50 + ["b"] * 50,
    })
    model = Model("y ~ x1 + (x1|g1)", data)
    fitted = model.fit(omit_offsets=False)
    offsets = [v for v in fitted.posterior.dims if "offset" in v]
    assert offsets == ["1|g1_offset_dim_0", "x1|g1_offset_dim_0"]
Esempio n. 14
0
def test_beta_regression():
    from os.path import dirname, join

    data_dir = join(dirname(__file__), "data")
    data = pd.read_csv(join(data_dir, "gasoline.csv"))
    model = Model("yield ~  temp + batch",
                  data,
                  family="beta",
                  categorical="batch")
    idata = model.fit(target_accept=0.9)
Esempio n. 15
0
def test_model_init_and_intercept(diabetes_data):

    model = Model(diabetes_data, intercept=True)
    assert hasattr(model, 'data')
    assert 'Intercept' in model.terms
    assert len(model.terms) == 1
    assert model.y is None
    assert hasattr(model, 'backend')
    model = Model(diabetes_data)
    assert 'Intercept' not in model.terms
    assert not model.terms
Esempio n. 16
0
def test_family_bad_type():
    data = pd.DataFrame({"x": [1], "y": [1]})

    with pytest.raises(ValueError):
        Model("y ~ x", data, family=0)

    with pytest.raises(ValueError):
        Model("y ~ x", data, family=set("gaussian"))

    with pytest.raises(ValueError):
        Model("y ~ x", data, family={"family": "gaussian"})
Esempio n. 17
0
def test_set_prior_unexisting_term():
    data = pd.DataFrame(
        {
            "y": np.random.normal(size=100),
            "x": np.random.normal(size=100),
        }
    )
    prior = Prior("Uniform", lower=0, upper=50)
    model = Model("y ~ x", data)
    with pytest.raises(ValueError):
        model.set_priors(priors={("x", "z"): prior})
Esempio n. 18
0
def test_posterior_predictive(crossed_data):
    crossed_data["count"] = (crossed_data["Y"] - crossed_data["Y"].min()).round()
    model = Model("count ~ threecats + continuous + dummy", crossed_data, family="poisson")
    fitted = model.fit(tune=0, draws=2)
    pps = model.posterior_predictive(fitted, draws=500, inplace=False)

    assert pps.posterior_predictive["count"].shape == (1, 500, 120)

    pps = model.posterior_predictive(fitted, draws=500, inplace=True)

    assert pps is None
    assert fitted.posterior_predictive["count"].shape == (1, 500, 120)
Esempio n. 19
0
def test_omit_offsets_true():
    data = pd.DataFrame(
        {
            "y": np.random.normal(size=100),
            "x1": np.random.normal(size=100),
            "g1": ["a"] * 50 + ["b"] * 50,
        }
    )
    model = Model("y ~ x1 + (x1|g1)", data)
    fitted = model.fit(omit_offsets=True)
    offsets = [var for var in fitted.posterior.var() if var.endswith("_offset")]
    assert not offsets
Esempio n. 20
0
def test_model_terms_cleaned_levels_interaction(crossed_data):
    crossed_data["fourcats"] = sum([[x] * 10
                                    for x in ["a", "b", "c", "d"]], list()) * 3
    model = Model(crossed_data)
    model.fit("Y ~ threecats*fourcats", run=False)
    assert model.terms["threecats:fourcats"].cleaned_levels == [
        "threecats[b]:fourcats[b]",
        "threecats[b]:fourcats[c]",
        "threecats[b]:fourcats[d]",
        "threecats[c]:fourcats[b]",
        "threecats[c]:fourcats[c]",
        "threecats[c]:fourcats[d]",
    ]
Esempio n. 21
0
def test_model_term_names_property(diabetes_data):
    model = Model(diabetes_data)
    model.add('BMI ~ age_grp')
    model.add('BP')
    model.add('S1')
    model.build(backend='pymc')
    assert model.term_names == ['Intercept', 'age_grp', 'BP', 'S1']
Esempio n. 22
0
def test_auto_scale(diabetes_data):

    # By default, should scale everything except custom Prior() objects
    priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)}
    model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors)
    model.build(backend="pymc3")
    p1 = model.terms["S1"].prior
    p2 = model.terms["S2"].prior
    p3 = model.terms["BP"].prior
    assert p1.name == p2.name == "Normal"
    assert 0 < p1.args["sigma"] < 1
    assert p2.args["sigma"] > p1.args["sigma"]
    assert p3.name == "Cauchy"
    assert p3.args["beta"] == 17.5

    # With auto_scale off, everything should be flat unless explicitly named in priors
    model = Model("BMI ~ S1 + S2 + BP",
                  diabetes_data,
                  priors=priors,
                  auto_scale=False)
    model.build(backend="pymc3")
    p1_off = model.terms["S1"].prior
    p2_off = model.terms["S2"].prior
    p3_off = model.terms["BP"].prior
    assert p1_off.name == "Normal"
    assert p2_off.name == "Flat"
    assert 0 < p1_off.args["sigma"] < 1
    assert "sigma" not in p2_off.args
    assert p3_off.name == "Cauchy"
    assert p3_off.args["beta"] == 17.5
Esempio n. 23
0
def test_set_prior_with_tuple():
    data = pd.DataFrame(
        {
            "y": np.random.normal(size=100),
            "x": np.random.normal(size=100),
            "z": np.random.normal(size=100),
        }
    )
    prior = Prior("Uniform", lower=0, upper=50)
    model = Model("y ~ x + z", data)
    model.set_priors(priors={("x", "z"): prior})

    assert model.terms["x"].prior == prior
    assert model.terms["z"].prior == prior
Esempio n. 24
0
def test_model_term_names_property(diabetes_data):
    model = Model(diabetes_data)
    model.add("BMI ~ age_grp")
    model.add("BP")
    model.add("S1")
    model.build(backend="pymc")
    assert model.term_names == ["Intercept", "age_grp", "BP", "S1"]
Esempio n. 25
0
def test_model_graph(crossed_data):
    model = Model("Y ~ 0 + threecats", crossed_data)
    # Graph cannot be plotted until model is built.
    with pytest.raises(ValueError):
        model.graph()
    model.build()
    model.graph()
Esempio n. 26
0
def test_plot_priors(crossed_data):
    model = Model("Y ~ 0 + threecats", crossed_data)
    # Priors cannot be plotted until model is built.
    with pytest.raises(ValueError):
        model.plot_priors()
    model.build()
    model.plot_priors()
Esempio n. 27
0
def test_distribute_group_specific_effect_over(diabetes_data):
    # 163 unique levels of BMI in diabetes_data
    # With intercept
    model = Model("BP ~ (C(age_grp)|BMI)", diabetes_data)
    model.build()

    # Treatment encoding because of the intercept
    lvls = sorted(list(diabetes_data["age_grp"].unique()))[1:]

    assert "C(age_grp)|BMI" in model.terms
    assert "1|BMI" in model.terms
    assert model.terms["C(age_grp)|BMI"].pymc_coords["C(age_grp)_coord_group_expr"] == lvls

    # This is equal to the sub-matrix of Z that corresponds to this term.
    # 442 is the number of observations. 163 the number of groups.
    # 2 is the number of levels of the categorical variable 'C(age_grp)' after removing
    # the reference level. Then the number of columns is 326 = 163 * 2.
    assert model.terms["C(age_grp)|BMI"].data.shape == (442, 326)

    # Without intercept. Reference level is not removed.
    model = Model("BP ~ (0 + C(age_grp)|BMI)", diabetes_data)
    model.build()

    assert "C(age_grp)|BMI" in model.terms
    assert not "1|BMI" in model.terms
    assert model.terms["C(age_grp)|BMI"].data.shape == (442, 489)
Esempio n. 28
0
def test_constant_terms():
    data = pd.DataFrame(
        {
            "y": np.random.normal(size=10),
            "x": np.random.choice([1], size=10),
            "z": np.random.choice(["A"], size=10),
        }
    )

    with pytest.raises(ValueError):
        Model("y ~ 0 + x", data)

    with pytest.raises(ValueError):
        Model("y ~ 0 + z", data)
Esempio n. 29
0
def test_hyperprior_on_common_effect():
    data = pd.DataFrame({
        "y": np.random.normal(size=100),
        "x1": np.random.normal(size=100),
        "g1": ["a"] * 50 + ["b"] * 50,
    })
    slope = Prior("Normal", mu=0, sd=Prior("HalfCauchy", beta=2))

    priors = {"x1": slope}
    with pytest.raises(ValueError):
        Model("y ~ x1 + (x1|g1)", data, priors=priors)

    priors = {"common": slope}
    with pytest.raises(ValueError):
        Model("y ~ x1 + (x1|g1)", data, priors=priors)
Esempio n. 30
0
def test_model_categorical_argument():
    data = pd.DataFrame(
        {
            "y": np.random.normal(size=100),
            "x": np.random.randint(2, size=100),
            "z": np.random.randint(2, size=100),
        }
    )
    model = Model("y ~ 0 + x", data, categorical="x")
    assert model.terms["x"].categorical

    model = Model("y ~ 0 + x*z", data, categorical=["x", "z"])
    assert model.terms["x"].categorical
    assert model.terms["z"].categorical
    assert model.terms["x:z"].categorical