def test_intercept_only_model(crossed_data):
    # using fit
    model0 = Model(crossed_data)
    model0.fit('Y ~ 1', run=False)
    model0.build(backend='pymc3')
    model0.fit(samples=1)

    # using add
    model1 = Model(crossed_data)
    model1.add('Y ~ 0')
    model1.add('1')
    model1.build(backend='pymc3')
    model1.fit(samples=1)

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    assert set(priors0) == set(priors1)
Example #2
0
def test_distribute_group_specific_effect_over(diabetes_data):
    # 163 unique levels of BMI in diabetes_data
    # With intercept
    model = Model("BP ~ (C(age_grp)|BMI)", diabetes_data)
    model.build()

    # Treatment encoding because of the intercept
    lvls = sorted(list(diabetes_data["age_grp"].unique()))[1:]

    assert "C(age_grp)|BMI" in model.terms
    assert "1|BMI" in model.terms
    assert model.terms["C(age_grp)|BMI"].pymc_coords["C(age_grp)_coord_group_expr"] == lvls

    # This is equal to the sub-matrix of Z that corresponds to this term.
    # 442 is the number of observations. 163 the number of groups.
    # 2 is the number of levels of the categorical variable 'C(age_grp)' after removing
    # the reference level. Then the number of columns is 326 = 163 * 2.
    assert model.terms["C(age_grp)|BMI"].data.shape == (442, 326)

    # Without intercept. Reference level is not removed.
    model = Model("BP ~ (0 + C(age_grp)|BMI)", diabetes_data)
    model.build()

    assert "C(age_grp)|BMI" in model.terms
    assert not "1|BMI" in model.terms
    assert model.terms["C(age_grp)|BMI"].data.shape == (442, 489)
Example #3
0
def test_cell_means_with_covariate(crossed_data):
    # build model using formula
    model0 = Model(crossed_data)
    model0.fit('Y ~ 0 + threecats + continuous', run=False)
    model0.build()
    model0.fit(samples=1)

    # build model using add_term
    model1 = Model(crossed_data)
    model1.add_y('Y')
    model1.add_term('threecats', drop_first=False)
    model1.add_term('continuous')
    model1.build()
    model1.fit(samples=1)

    # check that design matries are the same,
    # even if term names / level names / order of columns is different
    X0 = set([tuple(t.data[:,lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))])
    X1 = set([tuple(t.data[:,lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))])
    assert X0 == X1

    # check that threecats priors have finite variance
    assert not any(np.isinf(model0.terms['threecats'].prior.args['sd']))

    # check that add_formula and add_term models have same priors for fixed effects
    priors0 = {x.name:x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name:x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
Example #4
0
def test_many_fixed_effects(crossed_data):
    # build model using formula
    model0 = Model(crossed_data)
    model0.fit('Y ~ continuous + dummy + threecats', run=False)
    model0.build()
    model0.fit(samples=1)

    # build model using add_term
    model1 = Model(crossed_data)
    model1.add_y('Y')
    model1.add_intercept()
    model1.add_term('continuous')
    model1.add_term('dummy')
    model1.add_term('threecats')
    model1.build()
    model1.fit(samples=1)

    # check that term names agree
    assert set(model0.term_names) == set(model1.term_names)

    # check that design matries are the same,
    # even if term names / level names / order of columns is different
    X0 = set([tuple(t.data[:,lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))])
    X1 = set([tuple(t.data[:,lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))])
    assert X0 == X1

    # check that add_formula and add_term models have same priors for fixed effects
    priors0 = {x.name:x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name:x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
Example #5
0
def test_cell_means_with_random_intercepts(crossed_data):
    # using formula
    model0 = Model(crossed_data)
    model0.fit('Y ~ 0 + threecats', random=['subj'], run=False)
    model0.build()
    model0.fit(samples=1)

    # using add_term
    model1 = Model(crossed_data, intercept=False)
    model1.add_y('Y')
    model1.add_term('threecats', categorical=True, drop_first=False)
    model1.add_term('subj', categorical=True, random=True, drop_first=False)
    model1.build()
    model1.fit(samples=1)

    # check that they have the same random terms
    assert set(model0.random_terms) == set(model1.random_terms)

    # check that fixed effect design matries are the same,
    # even if term names / level names / order of columns is different
    X0 = set([tuple(t.data[:,lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))])
    X1 = set([tuple(t.data[:,lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))])
    assert X0 == X1

    # check that add_formula and add_term models have same priors for fixed effects
    priors0 = {x.name:x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name:x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)

    # check that add_formula and add_term models have same priors for random effects
    priors0 = {x.name:x.prior.args['sd'].args for x in model0.terms.values() if x.random}
    priors1 = {x.name:x.prior.args['sd'].args for x in model1.terms.values() if x.random}
    assert set(priors0) == set(priors1)
Example #6
0
def test_one_shot_formula_fit(diabetes_data):
    model = Model(diabetes_data)
    model.fit('S3 ~ S1 + S2', samples=50, run=False)
    model.build(backend='pymc3')
    nv = model.backend.model.named_vars
    targets = ['S3', 'S1', 'Intercept']
    assert len(set(nv.keys()) & set(targets)) == 3
Example #7
0
def test_cell_means_parameterization(crossed_data):
    # build model using fit
    model0 = Model(crossed_data)
    model0.fit("Y ~ 0 + threecats", run=False)
    model0.build(backend="pymc3")
    model0.fit(tune=0, samples=1, init=None)

    # build model using add
    model1 = Model(crossed_data)
    model1.add("Y ~ 0")
    model1.add("0 + threecats")
    model1.build(backend="pymc3")
    model1.fit(tune=0, samples=1)

    # check that design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set(
        [tuple(t.data[:, lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))]
    )
    X1 = set(
        [tuple(t.data[:, lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))]
    )
    assert X0 == X1

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
Example #8
0
def test_one_shot_formula_fit(diabetes_data):
    model = Model(diabetes_data)
    model.fit("S3 ~ S1 + S2", samples=50, run=False)
    model.build(backend="pymc3")
    nv = model.backend.model.named_vars
    targets = ["S3", "S1", "Intercept"]
    assert len(set(nv.keys()) & set(targets)) == 3
Example #9
0
def test_model_term_names_property(diabetes_data):
    model = Model(diabetes_data)
    model.add("BMI ~ age_grp")
    model.add("BP")
    model.add("S1")
    model.build(backend="pymc")
    assert model.term_names == ["Intercept", "age_grp", "BP", "S1"]
Example #10
0
def test_plot_priors(crossed_data):
    model = Model("Y ~ 0 + threecats", crossed_data)
    # Priors cannot be plotted until model is built.
    with pytest.raises(ValueError):
        model.plot_priors()
    model.build()
    model.plot_priors()
Example #11
0
def test_model_graph(crossed_data):
    model = Model("Y ~ 0 + threecats", crossed_data)
    # Graph cannot be plotted until model is built.
    with pytest.raises(ValueError):
        model.graph()
    model.build()
    model.graph()
Example #12
0
def test_auto_scale(diabetes_data):

    # By default, should scale everything except custom Prior() objects
    model = Model(diabetes_data)
    priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)}
    model.fit("BMI ~ S1 + S2 + BP", run=False, priors=priors)
    model.build(backend="pymc3")
    p1 = model.terms["S1"].prior
    p2 = model.terms["S2"].prior
    p3 = model.terms["BP"].prior
    assert p1.name == p2.name == "Normal"
    assert 0 < p1.args["sd"] < 1
    assert p2.args["sd"] > p1.args["sd"]
    assert p3.name == "Cauchy"
    assert p3.args["beta"] == 17.5

    # With auto_scale off, everything should be flat unless explicitly named
    # in priors
    model = Model(diabetes_data, auto_scale=False)
    model.fit("BMI ~ S1 + S2 + BP", run=False, priors=priors)
    model.build(backend="pymc3")
    p1_off = model.terms["S1"].prior
    p2_off = model.terms["S2"].prior
    p3_off = model.terms["BP"].prior
    assert p1_off.name == "Normal"
    assert p2_off.name == "Flat"
    assert 0 < p1_off.args["sd"] < 1
    assert "sd" not in p2_off.args
    assert p3_off.name == "Cauchy"
    assert p3_off.args["beta"] == 17.5
Example #13
0
def test_slope_only_model(crossed_data):
    # using fit
    model0 = Model(crossed_data)
    model0.fit('Y ~ 0 + continuous', run=False)
    model0.build(backend='pymc3')
    model0.fit(tune=0, samples=1, init=None)

    # using add
    model1 = Model(crossed_data)
    model1.add('Y ~ 0')
    model1.add('0 + continuous')
    model1.build(backend='pymc3')
    model1.fit(tune=0, samples=1)

    # check that term names agree
    assert set(model0.term_names) == set(model1.term_names)

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    assert set(priors0) == set(priors1)
Example #14
0
def test_model_term_names_property(diabetes_data):
    model = Model(diabetes_data)
    model.add('BMI ~ age_grp')
    model.add('BP')
    model.add('S1')
    model.build(backend='pymc')
    assert model.term_names == ['Intercept', 'age_grp', 'BP', 'S1']
Example #15
0
def test_auto_scale(diabetes_data):

    # By default, should scale everything except custom Prior() objects
    model = Model(diabetes_data)
    priors = {'S1': 0.3, 'BP': Prior('Cauchy', alpha=1, beta=17.5)}
    model.fit('BMI ~ S1 + S2 + BP', run=False, priors=priors)
    model.build(backend='pymc3')
    p1 = model.terms['S1'].prior
    p2 = model.terms['S2'].prior
    p3 = model.terms['BP'].prior
    assert p1.name == p2.name == 'Normal'
    assert 0 < p1.args['sd'] < 1
    assert p2.args['sd'] > p1.args['sd']
    assert p3.name == 'Cauchy'
    assert p3.args['beta'] == 17.5

    # With auto_scale off, everything should be flat unless explicitly named
    # in priors
    model = Model(diabetes_data, auto_scale=False)
    model.fit('BMI ~ S1 + S2 + BP', run=False, priors=priors)
    model.build(backend='pymc3')
    p1_off = model.terms['S1'].prior
    p2_off = model.terms['S2'].prior
    p3_off = model.terms['BP'].prior
    assert p1_off.name == 'Normal'
    assert p2_off.name == 'Flat'
    assert 0 < p1_off.args['sd'] < 1
    assert 'sd' not in p2_off.args
    assert p3_off.name == 'Cauchy'
    assert p3_off.args['beta'] == 17.5
Example #16
0
def test_cell_means_with_covariate(crossed_data):
    # build model using fit
    model0 = Model(crossed_data)
    model0.fit("Y ~ 0 + threecats + continuous", run=False)
    model0.build(backend="pymc3")
    # model0.fit(tune=0, samples=1)

    # build model using add
    model1 = Model(crossed_data)
    model1.add("Y ~ 0")
    model1.add("0 + threecats")
    model1.add("0 + continuous")
    model1.build(backend="pymc3")
    # model1.fit(tune=0, samples=1)

    # check that design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set(
        [tuple(t.data[:, lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))]
    )
    X1 = set(
        [tuple(t.data[:, lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))]
    )
    assert X0 == X1

    # check that threecats priors have finite variance
    assert not any(np.isinf(model0.terms["threecats"].prior.args["sd"]))

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
Example #17
0
def test_model_term_names_property_interaction(crossed_data):
    crossed_data["fourcats"] = sum([[x] * 10
                                    for x in ["a", "b", "c", "d"]], list()) * 3
    model = Model("Y ~ threecats*fourcats", crossed_data)
    model.build()
    assert model.term_names == [
        "Intercept", "threecats", "fourcats", "threecats:fourcats"
    ]
Example #18
0
def test_many_random_effects(crossed_data):
    # build model using formula
    model0 = Model(crossed_data)
    model0.fit('Y ~ continuous',
        random=['0+threecats|subj','continuous|item','dummy|item','threecats|site'], run=False)
    model0.build()
    # model0.fit(samples=1)

    # build model using add_term
    model1 = Model(crossed_data)
    model1.add_y('Y')
    # fixed effects
    model1.add_intercept()
    model1.add_term('continuous')
    # random effects
    model1.add_term('threecats', over='subj', drop_first=False, random=True,
                    categorical=True)
    model1.add_term('item', random=True, categorical=True)
    model1.add_term('continuous', over='item', random=True)
    model1.add_term('dummy', over='item', random=True)
    model1.add_term('site', random=True, categorical=True)
    model1.add_term('threecats', over='site', random=True, categorical=True)
    model1.build()
    # model1.fit(samples=1)

    # check that the random effects design matrices have the same shape
    X0 = pd.concat([pd.DataFrame(t.data) if not isinstance(t.data, dict) else
                    pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
                    for t in model0.random_terms.values()], axis=1)
    X1 = pd.concat([pd.DataFrame(t.data) if not isinstance(t.data, dict) else
                    pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
                    for t in model0.random_terms.values()], axis=1)
    assert X0.shape == X1.shape

    # check that the random effect design matrix contain the same columns,
    # even if term names / columns names / order of columns is different
    X0_set = set(tuple(X0.iloc[:,i]) for i in range(len(X0.columns)))
    X1_set = set(tuple(X1.iloc[:,i]) for i in range(len(X1.columns)))
    assert X0_set == X1_set

    # check that fixed effect design matries are the same,
    # even if term names / level names / order of columns is different
    X0 = set([tuple(t.data[:,lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))])
    X1 = set([tuple(t.data[:,lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))])
    assert X0 == X1

    # check that add_formula and add_term models have same priors for fixed effects
    priors0 = {x.name:x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name:x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)

    # check that add_formula and add_term models have same priors for random effects
    priors0 = {x.name:x.prior.args['sd'].args for x in model0.terms.values() if x.random}
    priors1 = {x.name:x.prior.args['sd'].args for x in model1.terms.values() if x.random}
    assert set(priors0) == set(priors1)
Example #19
0
def test_add_formula_append(diabetes_data):
    model = Model(diabetes_data)
    model.add('S3 ~ 0')
    model.add('S1')
    model.build(backend='pymc')
    assert hasattr(model, 'y') and model.y is not None and model.y.name == 'S3'
    assert 'S1' in model.terms
    model.add('S2', append=False)
    assert model.y is None
    model.add('S3 ~ 0')
    model.build(backend='pymc')
    assert 'S2' in model.terms
    assert 'S1' not in model.terms
Example #20
0
def test_add_formula_append(diabetes_data):
    model = Model(diabetes_data)
    model.add("S3 ~ 0")
    model.add("S1")
    model.build(backend="pymc")
    assert hasattr(model, "y") and model.y is not None and model.y.name == "S3"
    assert "S1" in model.terms
    model.add("S2", append=False)
    assert model.y is None
    model.add("S3 ~ 0")
    model.build(backend="pymc")
    assert "S2" in model.terms
    assert "S1" not in model.terms
Example #21
0
def test_distribute_random_effect_over(diabetes_data):
    # Random slopes
    model = Model(diabetes_data)
    model.add('BP ~ 1')
    model.add(random='C(age_grp)|BMI')
    model.build(backend='pymc')
    assert model.terms['C(age_grp)[T.1]|BMI'].data.shape == (442, 163)
    # Nested or crossed random intercepts
    model.reset()
    model.add('BP ~ 1')
    model.add(random='0+C(age_grp)|BMI')
    model.build(backend='pymc')
    assert model.terms['C(age_grp)[0]|BMI'].data.shape == (442, 163)
Example #22
0
def test_model_terms_cleaned_levels_interaction(crossed_data):
    crossed_data["fourcats"] = sum([[x] * 10
                                    for x in ["a", "b", "c", "d"]], list()) * 3
    model = Model("Y ~ threecats*fourcats", crossed_data)
    model.build()
    assert model.terms["threecats:fourcats"].cleaned_levels == [
        "threecats[b]:fourcats[b]",
        "threecats[b]:fourcats[c]",
        "threecats[b]:fourcats[d]",
        "threecats[c]:fourcats[b]",
        "threecats[c]:fourcats[c]",
        "threecats[c]:fourcats[d]",
    ]
Example #23
0
def test_empty_model(crossed_data):
    model0 = Model(crossed_data)
    model0.add("Y ~ 0")
    model0.build(backend="pymc3")
    model0.fit(tune=0, samples=1)

    model1 = Model(crossed_data)
    model1.fit("Y ~ 0", run=False)
    model1.build(backend="pymc3")
    model1.fit(tune=0, samples=1)

    # check that both models have same priors for fixed effects
    priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
Example #24
0
def test_categorical_term():
    data = pd.DataFrame({
        "y": np.random.normal(size=6),
        "x1": np.random.normal(size=6),
        "x2": [1, 1, 0, 0, 1, 1],
        "g1": ["a"] * 3 + ["b"] * 3,
        "g2": ["x", "x", "z", "z", "y", "y"],
    })
    model = Model("y ~ x1 + x2 + g1 + (g1|g2) + (x2|g2)", data)
    model.build()
    terms = ["x1", "x2", "g1", "1|g2", "g1[b]|g2", "x2|g2"]
    expecteds = [False, False, True, False, True, False]

    for term, expected in zip(terms, expecteds):
        assert model.terms[term].categorical is expected
Example #25
0
def test_derived_term_search(diabetes_data):
    model = Model(diabetes_data)
    model.add('BMI ~ 1', random='age_grp|BP', categorical=['age_grp'])
    model.build(backend='pymc')
    terms = model._match_derived_terms('age_grp|BP')
    names = set([t.name for t in terms])
    assert names == {'1|BP', 'age_grp[T.1]|BP', 'age_grp[T.2]|BP'}

    term = model._match_derived_terms('1|BP')[0]
    assert term.name == '1|BP'

    # All of these should find nothing
    assert model._match_derived_terms('1|ZZZ') is None
    assert model._match_derived_terms('ZZZ|BP') is None
    assert model._match_derived_terms('BP') is None
    assert model._match_derived_terms('BP') is None
Example #26
0
def test_derived_term_search(diabetes_data):
    model = Model(diabetes_data)
    model.add("BMI ~ 1", random="age_grp|BP", categorical=["age_grp"])
    model.build(backend="pymc")
    terms = model._match_derived_terms("age_grp|BP")
    names = set([t.name for t in terms])
    assert names == {"1|BP", "age_grp[T.1]|BP", "age_grp[T.2]|BP"}

    term = model._match_derived_terms("1|BP")[0]
    assert term.name == "1|BP"

    # All of these should find nothing
    assert model._match_derived_terms("1|ZZZ") is None
    assert model._match_derived_terms("ZZZ|BP") is None
    assert model._match_derived_terms("BP") is None
    assert model._match_derived_terms("BP") is None
Example #27
0
def test_3x4_fixed_anova(crossed_data):
    # add a four-level category that's perfectly crossed with threecats
    crossed_data["fourcats"] = sum([[x] * 10 for x in ["a", "b", "c", "d"]], list()) * 3

    # using fit, with intercept
    model0 = Model(crossed_data)
    model0.fit("Y ~ threecats*fourcats", run=False)
    model0.build(backend="pymc3")
    fitted0 = model0.fit(tune=0, samples=1, init=None)
    assert len(fitted0.posterior.data_vars) == 5

    # using fit, without intercept (i.e., 2-factor cell means model)
    model1 = Model(crossed_data)
    model1.fit("Y ~ 0 + threecats*fourcats", run=False)
    model1.build(backend="pymc3")
    fitted1 = model1.fit(tune=0, samples=1)
    assert len(fitted1.posterior.data_vars) == 4
Example #28
0
def test_empty_model(crossed_data):
    # using formula
    model0 = Model(crossed_data)
    model0.add_y('Y')
    model0.build()
    model0.fit(samples=1)

    # using add_term
    model1 = Model(crossed_data)
    model1.fit('Y ~ 0', run=False)
    model1.build()
    model1.fit(samples=1)

    # check that add_formula and add_term models have same priors for fixed effects
    priors0 = {x.name:x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name:x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
def test_nan_handling(crossed_data):
    data = crossed_data.copy()

    # Should fail because predictor has NaN
    model_fail_na = Model(crossed_data)
    model_fail_na.fit('Y ~ continuous', run=False)
    model_fail_na.terms['continuous'].data[[4, 6, 8], :] = np.nan
    with pytest.raises(ValueError):
        model_fail_na.build(backend='pymc3')

    # Should drop 3 rows with warning
    model_drop_na = Model(crossed_data, dropna=True)
    model_drop_na.fit('Y ~ continuous', run=False)
    model_drop_na.terms['continuous'].data[[4, 6, 8], :] = np.nan
    with pytest.warns(UserWarning) as w:
        model_drop_na.build(backend='pymc3')
    assert '3 rows' in w[0].message.args[0]
Example #30
0
def test_distribute_group_specific_effect_over(diabetes_data):
    # 163 unique levels of BMI in diabetes_data
    # With intercept
    model = Model("BP ~ (C(age_grp)|BMI)", diabetes_data)
    model.build()
    # Since intercept is present, it uses treatment encoding
    lvls = sorted(list(diabetes_data["age_grp"].unique()))[1:]
    for lvl in lvls:
        assert model.terms[f"C(age_grp)[{lvl}]|BMI"].data.shape == (442, 163)
    assert "1|BMI" in model.terms

    # Without intercept
    model = Model("BP ~ (0 + C(age_grp)|BMI)", diabetes_data)
    model.build()
    assert model.terms["C(age_grp)[0]|BMI"].data.shape == (442, 163)
    assert model.terms["C(age_grp)[1]|BMI"].data.shape == (442, 163)
    assert model.terms["C(age_grp)[2]|BMI"].data.shape == (442, 163)
    assert not "1|BMI" in model.terms