예제 #1
0
def test_slope_only_model(crossed_data):
    # using fit
    model0 = Model(crossed_data)
    model0.fit('Y ~ 0 + continuous', run=False)
    model0.build(backend='pymc3')
    model0.fit(tune=0, samples=1, init=None)

    # using add
    model1 = Model(crossed_data)
    model1.add('Y ~ 0')
    model1.add('0 + continuous')
    model1.build(backend='pymc3')
    model1.fit(tune=0, samples=1)

    # check that term names agree
    assert set(model0.term_names) == set(model1.term_names)

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    assert set(priors0) == set(priors1)
예제 #2
0
def test_model_term_names_property(diabetes_data):
    model = Model(diabetes_data)
    model.add('BMI ~ age_grp')
    model.add('BP')
    model.add('S1')
    model.build(backend='pymc')
    assert model.term_names == ['Intercept', 'age_grp', 'BP', 'S1']
def test_intercept_only_model(crossed_data):
    # using fit
    model0 = Model(crossed_data)
    model0.fit('Y ~ 1', run=False)
    model0.build(backend='pymc3')
    model0.fit(samples=1)

    # using add
    model1 = Model(crossed_data)
    model1.add('Y ~ 0')
    model1.add('1')
    model1.build(backend='pymc3')
    model1.fit(samples=1)

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    assert set(priors0) == set(priors1)
예제 #4
0
def test_model_term_names_property(diabetes_data):
    model = Model(diabetes_data)
    model.add("BMI ~ age_grp")
    model.add("BP")
    model.add("S1")
    model.build(backend="pymc")
    assert model.term_names == ["Intercept", "age_grp", "BP", "S1"]
예제 #5
0
def test_cell_means_parameterization(crossed_data):
    # build model using fit
    model0 = Model(crossed_data)
    model0.fit("Y ~ 0 + threecats", run=False)
    model0.build(backend="pymc3")
    model0.fit(tune=0, samples=1, init=None)

    # build model using add
    model1 = Model(crossed_data)
    model1.add("Y ~ 0")
    model1.add("0 + threecats")
    model1.build(backend="pymc3")
    model1.fit(tune=0, samples=1)

    # check that design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set(
        [tuple(t.data[:, lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))]
    )
    X1 = set(
        [tuple(t.data[:, lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))]
    )
    assert X0 == X1

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
예제 #6
0
def test_distribute_random_effect_over(diabetes_data):
    # Random slopes
    model = Model(diabetes_data)
    model.add(random='C(age_grp)|BMI')
    assert model.terms['C(age_grp)[T.1]|BMI'].data.shape == (442, 163)
    # Nested or crossed random intercepts
    model.reset()
    model.add(random='0+C(age_grp)|BMI')
    assert model.terms['C(age_grp)[0]|BMI'].data.shape == (442, 163)
예제 #7
0
def test_update_term_priors_after_init(diabetes_data):
    model = Model(diabetes_data)
    model.add('BMI')
    model.add('S1')
    model.add(random='age_grp|BP')

    p1 = Prior('Normal', mu=-10, sd=10)
    p2 = Prior('Beta', alpha=2, beta=2)

    model.set_priors({'BMI': 0.3, 'S1': p2})
    assert model.terms['S1'].prior.args['beta'] == 2
    assert model.terms['BMI'].prior == 0.3

    model.set_priors({('S1', 'BMI'): p1})
    assert model.terms['S1'].prior.args['sd'] == 10
    assert model.terms['BMI'].prior.args['mu'] == -10

    p3 = Prior('Normal', mu=0, sd=Prior('Normal', mu=0, sd=7))
    model.set_priors(fixed=0.4, random=p3)
    assert model.terms['BMI'].prior == 0.4
    assert model.terms['age_grp|BP'].prior.args['sd'].args['sd'] == 7

    # Invalid names should raise error
    with pytest.raises(ValueError):
        model.set_priors({'nonexistent_term': 0.3})

    # Test for partial names, e.g., 'threecats' should match 'threecats[0]'.
    model = Model(diabetes_data)
    model.add(random='age_grp|BP', categorical='age_grp')
    model.set_priors({'age_grp|BP': 0.5})
    assert model.terms['age_grp[T.1]|BP'].prior == 0.5
    assert model.terms['1|BP'].prior == 0.5
예제 #8
0
def test_empty_model(crossed_data):
    model0 = Model(crossed_data)
    model0.add("Y ~ 0")
    model0.build(backend="pymc3")
    model0.fit(tune=0, samples=1)

    model1 = Model(crossed_data)
    model1.fit("Y ~ 0", run=False)
    model1.build(backend="pymc3")
    model1.fit(tune=0, samples=1)

    # check that both models have same priors for fixed effects
    priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
예제 #9
0
def test_derived_term_search(diabetes_data):
    model = Model(diabetes_data)
    model.add(random='age_grp|BP', categorical=['age_grp'])
    terms = model._match_derived_terms('age_grp|BP')
    names = set([t.name for t in terms])
    assert names == {'1|BP', 'age_grp[T.1]|BP', 'age_grp[T.2]|BP'}

    term = model._match_derived_terms('1|BP')[0]
    assert term.name == '1|BP'

    # All of these should find nothing
    assert model._match_derived_terms('1|ZZZ') is None
    assert model._match_derived_terms('ZZZ|BP') is None
    assert model._match_derived_terms('BP') is None
    assert model._match_derived_terms('BP') is None
예제 #10
0
def test_derived_term_search(diabetes_data):
    model = Model(diabetes_data)
    model.add("BMI ~ 1", random="age_grp|BP", categorical=["age_grp"])
    model.build(backend="pymc")
    terms = model._match_derived_terms("age_grp|BP")
    names = set([t.name for t in terms])
    assert names == {"1|BP", "age_grp[T.1]|BP", "age_grp[T.2]|BP"}

    term = model._match_derived_terms("1|BP")[0]
    assert term.name == "1|BP"

    # All of these should find nothing
    assert model._match_derived_terms("1|ZZZ") is None
    assert model._match_derived_terms("ZZZ|BP") is None
    assert model._match_derived_terms("BP") is None
    assert model._match_derived_terms("BP") is None
예제 #11
0
def test_distribute_random_effect_over(diabetes_data):
    # Random slopes
    model = Model(diabetes_data)
    model.add("BP ~ 1")
    model.add(random="C(age_grp)|BMI")
    model.build(backend="pymc")
    assert model.terms["C(age_grp)[T.1]|BMI"].data.shape == (442, 163)
    # Nested or crossed random intercepts
    model.reset()
    model.add("BP ~ 1")
    model.add(random="0+C(age_grp)|BMI")
    model.build(backend="pymc")
    assert model.terms["C(age_grp)[0]|BMI"].data.shape == (442, 163)
예제 #12
0
def test_add_formula_append(diabetes_data):
    model = Model(diabetes_data)
    model.add('S3 ~ 0')
    model.add('S1')
    model.build(backend='pymc')
    assert hasattr(model, 'y') and model.y is not None and model.y.name == 'S3'
    assert 'S1' in model.terms
    model.add('S2', append=False)
    assert model.y is None
    model.add('S3 ~ 0')
    model.build(backend='pymc')
    assert 'S2' in model.terms
    assert 'S1' not in model.terms
예제 #13
0
def test_add_formula_append(diabetes_data):
    model = Model(diabetes_data)
    model.add("S3 ~ 0")
    model.add("S1")
    model.build(backend="pymc")
    assert hasattr(model, "y") and model.y is not None and model.y.name == "S3"
    assert "S1" in model.terms
    model.add("S2", append=False)
    assert model.y is None
    model.add("S3 ~ 0")
    model.build(backend="pymc")
    assert "S2" in model.terms
    assert "S1" not in model.terms
예제 #14
0
def test_cell_means_with_covariate(crossed_data):
    # build model using fit
    model0 = Model(crossed_data)
    model0.fit("Y ~ 0 + threecats + continuous", run=False)
    model0.build(backend="pymc3")
    # model0.fit(tune=0, samples=1)

    # build model using add
    model1 = Model(crossed_data)
    model1.add("Y ~ 0")
    model1.add("0 + threecats")
    model1.add("0 + continuous")
    model1.build(backend="pymc3")
    # model1.fit(tune=0, samples=1)

    # check that design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set(
        [tuple(t.data[:, lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))]
    )
    X1 = set(
        [tuple(t.data[:, lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))]
    )
    assert X0 == X1

    # check that threecats priors have finite variance
    assert not any(np.isinf(model0.terms["threecats"].prior.args["sd"]))

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.random}
    assert set(priors0) == set(priors1)
예제 #15
0
def test_add_to_model(diabetes_data):
    model = Model(diabetes_data)
    model.add('BMI')
    assert isinstance(model.terms['BMI'], Term)
    model.add('age_grp')
    assert set(model.terms.keys()) == {'Intercept', 'BMI', 'age_grp'}
    # Test that arguments are passed appropriately onto Term initializer
    model.add(random='C(age_grp)|BP')
    assert isinstance(model.terms['C(age_grp)[T.1]|BP'], Term)
    assert 'BP[108.0]' in model.terms['C(age_grp)[T.1]|BP'].levels
def test_update_term_priors_after_init(diabetes_data):
    model = Model(diabetes_data)
    model.add("Y ~ BMI")
    model.add("S1")
    model.add(random="age_grp|BP")

    p1 = Prior("Normal", mu=-10, sigma=10)
    p2 = Prior("Beta", alpha=2, beta=2)

    model.set_priors({"BMI": 0.3, "S1": p2})
    model.build(backend="pymc")
    assert model.terms["S1"].prior.args["beta"] == 2
    assert model.terms["BMI"].prior.scale == 0.3
    assert np.isclose(model.terms["BMI"].prior.args["sigma"], 4.7, rtol=0.1)[0]

    model.set_priors({("S1", "BMI"): p1})
    model.build(backend="pymc")
    assert model.terms["S1"].prior.args["sigma"] == 10
    assert model.terms["BMI"].prior.args["mu"] == -10

    p3 = Prior("Normal", mu=0, sigma=Prior("Normal", mu=0, sigma=7))
    model.set_priors(fixed=0.3, random=p3)
    model.build(backend="pymc")
    assert model.terms["BMI"].prior.scale == 0.3
    assert np.isclose(model.terms["BMI"].prior.args["sigma"], 4.7, rtol=0.1)[0]
    assert model.terms["age_grp|BP"].prior.args["sigma"].args["sigma"] == 7

    # Invalid names should raise error
    with pytest.raises(ValueError):
        model.set_priors({"nonexistent_term": 0.3})
        model.build(backend="pymc")

    # Test for partial names, e.g., 'threecats' should match 'threecats[0]'.
    model = Model(diabetes_data)
    model.add("Y ~ 1", random="age_grp|BP", categorical="age_grp")
    model.set_priors({"age_grp|BP": 0.5})
    model.build(backend="pymc")
    assert model.terms["age_grp[T.1]|BP"].prior.scale == 0.5
    assert np.isclose(
        model.terms["age_grp[T.1]|BP"].prior.args["sigma"].args["sigma"], 94, rtol=0.2
    )
    assert model.terms["1|BP"].prior.scale == 0.5
예제 #17
0
def test_add_to_model(diabetes_data):
    model = Model(diabetes_data)
    model.add("BP ~ BMI")
    model.build(backend="pymc")
    assert isinstance(model.terms["BMI"], Term)
    model.add("age_grp")
    model.build(backend="pymc")
    assert set(model.terms.keys()) == {"Intercept", "BMI", "age_grp"}
    # Test that arguments are passed appropriately onto Term initializer
    model.add(random="C(age_grp)|BP")
    model.build(backend="pymc")
    assert isinstance(model.terms["C(age_grp)[T.1]|BP"], Term)
    assert "BP[108.0]" in model.terms["C(age_grp)[T.1]|BP"].levels
def test_poisson_regression(crossed_data):
    # build model using fit and pymc3
    crossed_data['count'] = (crossed_data['Y'] -
                             crossed_data['Y'].min()).round()
    model0 = Model(crossed_data)
    fitted = model0.fit('count ~ threecats + continuous + dummy',
                        family='poisson',
                        backend='pymc3',
                        samples=1,
                        init=None)
    # model0.build()
    # model0.fit()

    # build model using add
    model1 = Model(crossed_data)
    model1.add('count ~ 1', family='poisson')
    model1.add('threecats')
    model1.add('continuous')
    model1.add('dummy')
    model1.build(backend='pymc3')
    model1.fit(samples=1, init=None)

    # build model using fit and stan
    model2 = Model(crossed_data)
    fitted2 = model2.fit('count ~ threecats + continuous + dummy',
                         family='poisson',
                         backend='stan',
                         samples=1)

    # check that term names agree
    assert set(model0.term_names) == set(model1.term_names)
    assert set(model0.term_names) == set(model2.term_names)
    assert set(model1.term_names) == set(model2.term_names)

    # check that fixed effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set([
        tuple(t.data[:, lev]) for t in model0.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X1 = set([
        tuple(t.data[:, lev]) for t in model1.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X2 = set([
        tuple(t.data[:, lev]) for t in model2.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    assert X0 == X1
    assert X0 == X2
    assert X1 == X2

    # check that models have same priors for fixed effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    priors2 = {
        x.name: x.prior.args
        for x in model2.terms.values() if not x.random
    }
    # check dictionary keys
    assert set(priors0) == set(priors1)
    assert set(priors0) == set(priors2)
    assert set(priors1) == set(priors2)

    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [
                np.allclose(a[x], b[x], atol=0, rtol=.01) for x in a.keys()
            ]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
    assert all([dicts_close(priors0[x], priors2[x]) for x in priors0.keys()])
    assert all([dicts_close(priors1[x], priors2[x]) for x in priors0.keys()])
예제 #19
0
def test_many_fixed_many_random(crossed_data):
    # delete a few values to also test dropna=True functionality
    crossed_data_missing = crossed_data.copy()
    crossed_data_missing.loc[0, "Y"] = np.nan
    crossed_data_missing.loc[1, "continuous"] = np.nan
    crossed_data_missing.loc[2, "threecats"] = np.nan

    # build model using fit
    model0 = Model(crossed_data_missing, dropna=True)
    fitted = model0.fit(
        "Y ~ continuous + dummy + threecats",
        random=["0+threecats|subj", "1|item", "0+continuous|item", "dummy|item", "threecats|site"],
        backend="pymc3",
        init=None,
        tune=10,
        samples=10,
        chains=2,
    )
    # model0.build(backend='pymc3')
    # model0.fit(tune=0, samples=1)

    # build model using add(append=True)
    model1 = Model(crossed_data_missing, dropna=True)
    model1.add("Y ~ 1")
    model1.add("continuous")
    model1.add("dummy")
    model1.add("threecats")
    model1.add(random="0+threecats|subj")
    model1.add(random="1|item")
    model1.add(random="0+continuous|item")
    model1.add(random="dummy|item")
    model1.add(random="threecats|site")
    model1.build(backend="pymc3")
    # model1.fit(tune=0, samples=1)

    # build model using stan backend
    model2 = Model(crossed_data_missing, dropna=True)
    fitted2 = model2.fit(
        "Y ~ continuous + dummy + threecats",
        random=["0+threecats|subj", "1|item", "0+continuous|item", "dummy|item", "threecats|site"],
        backend="stan",
        samples=100,
        chains=2,
    )

    # check that the random effects design matrices have the same shape
    X0 = pd.concat(
        [
            pd.DataFrame(t.data)
            if not isinstance(t.data, dict)
            else pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
            for t in model0.random_terms.values()
        ],
        axis=1,
    )
    X1 = pd.concat(
        [
            pd.DataFrame(t.data)
            if not isinstance(t.data, dict)
            else pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
            for t in model1.random_terms.values()
        ],
        axis=1,
    )
    X2 = pd.concat(
        [
            pd.DataFrame(t.data)
            if not isinstance(t.data, dict)
            else pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
            for t in model2.random_terms.values()
        ],
        axis=1,
    )
    assert X0.shape == X1.shape
    assert X0.shape == X2.shape
    assert X1.shape == X2.shape

    # check that the random effect design matrix contain the same columns,
    # even if term names / columns names / order of columns is different
    X0_set = set(tuple(X0.iloc[:, i]) for i in range(len(X0.columns)))
    X1_set = set(tuple(X1.iloc[:, i]) for i in range(len(X1.columns)))
    X2_set = set(tuple(X1.iloc[:, i]) for i in range(len(X2.columns)))
    assert X0_set == X1_set
    assert X0_set == X2_set
    assert X1_set == X2_set

    # check that fixed effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set(
        [tuple(t.data[:, lev]) for t in model0.fixed_terms.values() for lev in range(len(t.levels))]
    )
    X1 = set(
        [tuple(t.data[:, lev]) for t in model1.fixed_terms.values() for lev in range(len(t.levels))]
    )
    X2 = set(
        [tuple(t.data[:, lev]) for t in model2.fixed_terms.values() for lev in range(len(t.levels))]
    )
    assert X0 == X1
    assert X0 == X2
    assert X1 == X2

    # check that models have same priors for fixed effects
    priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.random}
    priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.random}
    priors2 = {x.name: x.prior.args for x in model2.terms.values() if not x.random}
    # check dictionary keys
    assert set(priors0) == set(priors1)
    assert set(priors0) == set(priors2)
    assert set(priors1) == set(priors2)
    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [np.allclose(a[x], b[x], atol=0, rtol=0.01) for x in a.keys()]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
    assert all([dicts_close(priors0[x], priors2[x]) for x in priors0.keys()])
    assert all([dicts_close(priors1[x], priors2[x]) for x in priors0.keys()])

    # check that fit and add models have same priors for random effects
    priors0 = {x.name: x.prior.args["sd"].args for x in model0.terms.values() if x.random}
    priors1 = {x.name: x.prior.args["sd"].args for x in model1.terms.values() if x.random}
    priors2 = {x.name: x.prior.args["sd"].args for x in model2.terms.values() if x.random}
    # check dictionary keys
    assert set(priors0) == set(priors1)
    assert set(priors0) == set(priors2)
    assert set(priors1) == set(priors2)
    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [np.allclose(a[x], b[x], atol=0, rtol=0.01) for x in a.keys()]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
    assert all([dicts_close(priors0[x], priors2[x]) for x in priors0.keys()])
    assert all([dicts_close(priors1[x], priors2[x]) for x in priors0.keys()])

    # test consistency between summary and to_df for pymc3
    assert len(set(fitted.to_df().columns)) == 15

    # test consistency between summary and to_df for stan
    assert len(set(fitted2.to_df().columns)) == 15

    # test consistenct between pymc3 and stan
    assert set(fitted.to_df().columns) == set(fitted2.to_df().columns)

#    # check hide_transformed for pymc3
#    # it looks like some versions of pymc3 add a trailing '_' to transformed
#    # vars and some dont. so here for consistency we strip out any trailing '_'
#    # that we find
#    full = fitted.summary(ranefs=True, transformed=True).index
#    full = set([re.sub(r"_+$", r"", x) for x in full])
#    test_set = fitted.summary(ranefs=True, transformed=False).index
#    test_set = set([re.sub(r"_+$", r"", x) for x in test_set])
#    answer = {
#        "1|item_offset[0]",
#        "1|item_offset[10]",
#        "1|item_offset[11]",
#        "1|item_offset[1]",
#        "1|item_offset[2]",
#        "1|item_offset[3]",
#        "1|item_offset[4]",
#        "1|item_offset[5]",
#        "1|item_offset[6]",
#        "1|item_offset[7]",
#        "1|item_offset[8]",
#        "1|item_offset[9]",
#        "1|item_sd_log",
#        "1|site_offset[0]",
#        "1|site_offset[1]",
#        "1|site_offset[2]",
#        "1|site_offset[3]",
#        "1|site_offset[4]",
#        "1|site_sd_log",
#        "Y_sd_interval",
#        "continuous|item_offset[0]",
#        "continuous|item_offset[10]",
#        "continuous|item_offset[11]",
#        "continuous|item_offset[1]",
#        "continuous|item_offset[2]",
#        "continuous|item_offset[3]",
#        "continuous|item_offset[4]",
#        "continuous|item_offset[5]",
#        "continuous|item_offset[6]",
#        "continuous|item_offset[7]",
#        "continuous|item_offset[8]",
#        "continuous|item_offset[9]",
#        "continuous|item_sd_log",
#        "dummy|item_offset[0]",
#        "dummy|item_offset[10]",
#        "dummy|item_offset[11]",
#        "dummy|item_offset[1]",
#        "dummy|item_offset[2]",
#        "dummy|item_offset[3]",
#        "dummy|item_offset[4]",
#        "dummy|item_offset[5]",
#        "dummy|item_offset[6]",
#        "dummy|item_offset[7]",
#        "dummy|item_offset[8]",
#        "dummy|item_offset[9]",
#        "dummy|item_sd_log",
#        "threecats[T.b]|site_offset[0]",
#        "threecats[T.b]|site_offset[1]",
#        "threecats[T.b]|site_offset[2]",
#        "threecats[T.b]|site_offset[3]",
#        "threecats[T.b]|site_offset[4]",
#        "threecats[T.b]|site_sd_log",
#        "threecats[T.c]|site_offset[0]",
#        "threecats[T.c]|site_offset[1]",
#        "threecats[T.c]|site_offset[2]",
#        "threecats[T.c]|site_offset[3]",
#        "threecats[T.c]|site_offset[4]",
#        "threecats[T.c]|site_sd_log",
#        "threecats[a]|subj_offset[0]",
#        "threecats[a]|subj_offset[1]",
#        "threecats[a]|subj_offset[2]",
#        "threecats[a]|subj_offset[3]",
#        "threecats[a]|subj_offset[4]",
#        "threecats[a]|subj_offset[5]",
#        "threecats[a]|subj_offset[6]",
#        "threecats[a]|subj_offset[7]",
#        "threecats[a]|subj_offset[8]",
#        "threecats[a]|subj_offset[9]",
#        "threecats[a]|subj_sd_log",
#        "threecats[b]|subj_offset[0]",
#        "threecats[b]|subj_offset[1]",
#        "threecats[b]|subj_offset[2]",
#        "threecats[b]|subj_offset[3]",
#        "threecats[b]|subj_offset[4]",
#        "threecats[b]|subj_offset[5]",
#        "threecats[b]|subj_offset[6]",
#        "threecats[b]|subj_offset[7]",
#        "threecats[b]|subj_offset[8]",
#        "threecats[b]|subj_offset[9]",
#        "threecats[b]|subj_sd_log",
#        "threecats[c]|subj_offset[0]",
#        "threecats[c]|subj_offset[1]",
#        "threecats[c]|subj_offset[2]",
#        "threecats[c]|subj_offset[3]",
#        "threecats[c]|subj_offset[4]",
#        "threecats[c]|subj_offset[5]",
#        "threecats[c]|subj_offset[6]",
#        "threecats[c]|subj_offset[7]",
#        "threecats[c]|subj_offset[8]",
#        "threecats[c]|subj_offset[9]",
#        "threecats[c]|subj_sd_log",
#    }
#    assert full.difference(test_set) == answer

#    # check hide_transformed for stan
#    full2 = fitted2.summary(ranefs=True, transformed=True)
#    full2 = set(full2.index)
#    test_set2 = fitted2.summary(ranefs=True, transformed=False)
#    test_set2 = set(test_set2.index)
#    answer = set(
#        ["lp__"]
#        + ["yhat[{}]".format(i) for i in range(1, len(crossed_data.index) - 2)]
#        + [
#            "1|item_offset[0]",
#            "1|item_offset[10]",
#            "1|item_offset[11]",
#            "1|item_offset[1]",
#            "1|item_offset[2]",
#            "1|item_offset[3]",
#            "1|item_offset[4]",
#            "1|item_offset[5]",
#            "1|item_offset[6]",
#            "1|item_offset[7]",
#            "1|item_offset[8]",
#            "1|item_offset[9]",
#            "1|site_offset[0]",
#            "1|site_offset[1]",
#            "1|site_offset[2]",
#            "1|site_offset[3]",
#            "1|site_offset[4]",
#            "continuous|item_offset[0]",
#            "continuous|item_offset[10]",
#            "continuous|item_offset[11]",
#            "continuous|item_offset[1]",
#            "continuous|item_offset[2]",
#            "continuous|item_offset[3]",
#            "continuous|item_offset[4]",
#            "continuous|item_offset[5]",
#            "continuous|item_offset[6]",
#            "continuous|item_offset[7]",
#            "continuous|item_offset[8]",
#            "continuous|item_offset[9]",
#            "dummy|item_offset[0]",
#            "dummy|item_offset[10]",
#            "dummy|item_offset[11]",
#            "dummy|item_offset[1]",
#            "dummy|item_offset[2]",
#            "dummy|item_offset[3]",
#            "dummy|item_offset[4]",
#            "dummy|item_offset[5]",
#            "dummy|item_offset[6]",
#            "dummy|item_offset[7]",
#            "dummy|item_offset[8]",
#            "dummy|item_offset[9]",
#            "threecats[T.b]|site_offset[0]",
#            "threecats[T.b]|site_offset[1]",
#            "threecats[T.b]|site_offset[2]",
#            "threecats[T.b]|site_offset[3]",
#            "threecats[T.b]|site_offset[4]",
#            "threecats[T.c]|site_offset[0]",
#            "threecats[T.c]|site_offset[1]",
#            "threecats[T.c]|site_offset[2]",
#            "threecats[T.c]|site_offset[3]",
#            "threecats[T.c]|site_offset[4]",
#            "threecats[a]|subj_offset[0]",
#            "threecats[a]|subj_offset[1]",
#            "threecats[a]|subj_offset[2]",
#            "threecats[a]|subj_offset[3]",
#            "threecats[a]|subj_offset[4]",
#            "threecats[a]|subj_offset[5]",
#            "threecats[a]|subj_offset[6]",
#            "threecats[a]|subj_offset[7]",
#            "threecats[a]|subj_offset[8]",
#            "threecats[a]|subj_offset[9]",
#            "threecats[b]|subj_offset[0]",
#            "threecats[b]|subj_offset[1]",
#            "threecats[b]|subj_offset[2]",
#            "threecats[b]|subj_offset[3]",
#            "threecats[b]|subj_offset[4]",
#            "threecats[b]|subj_offset[5]",
#            "threecats[b]|subj_offset[6]",
#            "threecats[b]|subj_offset[7]",
#            "threecats[b]|subj_offset[8]",
#            "threecats[b]|subj_offset[9]",
#            "threecats[c]|subj_offset[0]",
#            "threecats[c]|subj_offset[1]",
#            "threecats[c]|subj_offset[2]",
#            "threecats[c]|subj_offset[3]",
#            "threecats[c]|subj_offset[4]",
#            "threecats[c]|subj_offset[5]",
#            "threecats[c]|subj_offset[6]",
#            "threecats[c]|subj_offset[7]",
#            "threecats[c]|subj_offset[8]",
#            "threecats[c]|subj_offset[9]",
#        ]
#    )
#    assert full2.difference(test_set2) == answer

#    # check for consistency in parameter names between pymc3 and stan,
#    # minus the end diffs due to Stan's 1-based indexing
#    assert test_set == test_set2

#    # check exclude_ranefs for pymc3
#    test_set = fitted.summary(ranefs=False, transformed=True).index
#    test_set = set([re.sub(r"_+$", r"", x) for x in test_set])
#    answer = {
#        "1|item[0]",
#        "1|item[10]",
#        "1|item[11]",
#        "1|item[1]",
#        "1|item[2]",
#        "1|item[3]",
#        "1|item[4]",
#        "1|item[5]",
#        "1|item[6]",
#        "1|item[7]",
#        "1|item[8]",
#        "1|item[9]",
#        "1|item_offset[0]",
#        "1|item_offset[10]",
#        "1|item_offset[11]",
#        "1|item_offset[1]",
#        "1|item_offset[2]",
#        "1|item_offset[3]",
#        "1|item_offset[4]",
#        "1|item_offset[5]",
#        "1|item_offset[6]",
#        "1|item_offset[7]",
#        "1|item_offset[8]",
#        "1|item_offset[9]",
#        "1|site[0]",
#        "1|site[1]",
#        "1|site[2]",
#        "1|site[3]",
#        "1|site[4]",
#        "1|site_offset[0]",
#        "1|site_offset[1]",
#        "1|site_offset[2]",
#        "1|site_offset[3]",
#        "1|site_offset[4]",
#        "continuous|item[0]",
#        "continuous|item[10]",
#        "continuous|item[11]",
#        "continuous|item[1]",
#        "continuous|item[2]",
#        "continuous|item[3]",
#        "continuous|item[4]",
#        "continuous|item[5]",
#        "continuous|item[6]",
#        "continuous|item[7]",
#        "continuous|item[8]",
#        "continuous|item[9]",
#        "continuous|item_offset[0]",
#        "continuous|item_offset[10]",
#        "continuous|item_offset[11]",
#        "continuous|item_offset[1]",
#        "continuous|item_offset[2]",
#        "continuous|item_offset[3]",
#        "continuous|item_offset[4]",
#        "continuous|item_offset[5]",
#        "continuous|item_offset[6]",
#        "continuous|item_offset[7]",
#        "continuous|item_offset[8]",
#        "continuous|item_offset[9]",
#        "dummy|item[0]",
#        "dummy|item[10]",
#        "dummy|item[11]",
#        "dummy|item[1]",
#        "dummy|item[2]",
#        "dummy|item[3]",
#        "dummy|item[4]",
#        "dummy|item[5]",
#        "dummy|item[6]",
#        "dummy|item[7]",
#        "dummy|item[8]",
#        "dummy|item[9]",
#        "dummy|item_offset[0]",
#        "dummy|item_offset[10]",
#        "dummy|item_offset[11]",
#        "dummy|item_offset[1]",
#        "dummy|item_offset[2]",
#        "dummy|item_offset[3]",
#        "dummy|item_offset[4]",
#        "dummy|item_offset[5]",
#        "dummy|item_offset[6]",
#        "dummy|item_offset[7]",
#        "dummy|item_offset[8]",
#        "dummy|item_offset[9]",
#        "threecats[T.b]|site[0]",
#        "threecats[T.b]|site[1]",
#        "threecats[T.b]|site[2]",
#        "threecats[T.b]|site[3]",
#        "threecats[T.b]|site[4]",
#        "threecats[T.b]|site_offset[0]",
#        "threecats[T.b]|site_offset[1]",
#        "threecats[T.b]|site_offset[2]",
#        "threecats[T.b]|site_offset[3]",
#        "threecats[T.b]|site_offset[4]",
#        "threecats[T.c]|site[0]",
#        "threecats[T.c]|site[1]",
#        "threecats[T.c]|site[2]",
#        "threecats[T.c]|site[3]",
#        "threecats[T.c]|site[4]",
#        "threecats[T.c]|site_offset[0]",
#        "threecats[T.c]|site_offset[1]",
#        "threecats[T.c]|site_offset[2]",
#        "threecats[T.c]|site_offset[3]",
#        "threecats[T.c]|site_offset[4]",
#        "threecats[a]|subj[0]",
#        "threecats[a]|subj[1]",
#        "threecats[a]|subj[2]",
#        "threecats[a]|subj[3]",
#        "threecats[a]|subj[4]",
#        "threecats[a]|subj[5]",
#        "threecats[a]|subj[6]",
#        "threecats[a]|subj[7]",
#        "threecats[a]|subj[8]",
#        "threecats[a]|subj[9]",
#        "threecats[a]|subj_offset[0]",
#        "threecats[a]|subj_offset[1]",
#        "threecats[a]|subj_offset[2]",
#        "threecats[a]|subj_offset[3]",
#        "threecats[a]|subj_offset[4]",
#        "threecats[a]|subj_offset[5]",
#        "threecats[a]|subj_offset[6]",
#        "threecats[a]|subj_offset[7]",
#        "threecats[a]|subj_offset[8]",
#        "threecats[a]|subj_offset[9]",
#        "threecats[b]|subj[0]",
#        "threecats[b]|subj[1]",
#        "threecats[b]|subj[2]",
#        "threecats[b]|subj[3]",
#        "threecats[b]|subj[4]",
#        "threecats[b]|subj[5]",
#        "threecats[b]|subj[6]",
#        "threecats[b]|subj[7]",
#        "threecats[b]|subj[8]",
#        "threecats[b]|subj[9]",
#        "threecats[b]|subj_offset[0]",
#        "threecats[b]|subj_offset[1]",
#        "threecats[b]|subj_offset[2]",
#        "threecats[b]|subj_offset[3]",
#        "threecats[b]|subj_offset[4]",
#        "threecats[b]|subj_offset[5]",
#        "threecats[b]|subj_offset[6]",
#        "threecats[b]|subj_offset[7]",
#        "threecats[b]|subj_offset[8]",
#        "threecats[b]|subj_offset[9]",
#        "threecats[c]|subj[0]",
#        "threecats[c]|subj[1]",
#        "threecats[c]|subj[2]",
#        "threecats[c]|subj[3]",
#        "threecats[c]|subj[4]",
#        "threecats[c]|subj[5]",
#        "threecats[c]|subj[6]",
#        "threecats[c]|subj[7]",
#        "threecats[c]|subj[8]",
#        "threecats[c]|subj[9]",
#        "threecats[c]|subj_offset[0]",
#        "threecats[c]|subj_offset[1]",
#        "threecats[c]|subj_offset[2]",
#        "threecats[c]|subj_offset[3]",
#        "threecats[c]|subj_offset[4]",
#        "threecats[c]|subj_offset[5]",
#        "threecats[c]|subj_offset[6]",
#        "threecats[c]|subj_offset[7]",
#        "threecats[c]|subj_offset[8]",
#        "threecats[c]|subj_offset[9]",
#    }
#    assert full.difference(test_set) == answer

#    # check exclude_ranefs for stan
#    test_set2 = set(fitted2.summary(ranefs=False, transformed=True).index)
#    answer = set(
#        [
#            "1|item[0]",
#            "1|item[10]",
#            "1|item[11]",
#            "1|item[1]",
#            "1|item[2]",
#            "1|item[3]",
#            "1|item[4]",
#            "1|item[5]",
#            "1|item[6]",
#            "1|item[7]",
#            "1|item[8]",
#            "1|item[9]",
#            "1|site[0]",
#            "1|site[1]",
#            "1|site[2]",
#            "1|site[3]",
#            "1|site[4]",
#            "continuous|item[0]",
#            "continuous|item[10]",
#            "continuous|item[11]",
#            "continuous|item[1]",
#            "continuous|item[2]",
#            "continuous|item[3]",
#            "continuous|item[4]",
#            "continuous|item[5]",
#            "continuous|item[6]",
#            "continuous|item[7]",
#            "continuous|item[8]",
#            "continuous|item[9]",
#            "dummy|item[0]",
#            "dummy|item[10]",
#            "dummy|item[11]",
#            "dummy|item[1]",
#            "dummy|item[2]",
#            "dummy|item[3]",
#            "dummy|item[4]",
#            "dummy|item[5]",
#            "dummy|item[6]",
#            "dummy|item[7]",
#            "dummy|item[8]",
#            "dummy|item[9]",
#            "threecats[T.b]|site[0]",
#            "threecats[T.b]|site[1]",
#            "threecats[T.b]|site[2]",
#            "threecats[T.b]|site[3]",
#            "threecats[T.b]|site[4]",
#            "threecats[T.c]|site[0]",
#            "threecats[T.c]|site[1]",
#            "threecats[T.c]|site[2]",
#            "threecats[T.c]|site[3]",
#            "threecats[T.c]|site[4]",
#            "threecats[a]|subj[0]",
#            "threecats[a]|subj[1]",
#            "threecats[a]|subj[2]",
#            "threecats[a]|subj[3]",
#            "threecats[a]|subj[4]",
#            "threecats[a]|subj[5]",
#            "threecats[a]|subj[6]",
#            "threecats[a]|subj[7]",
#            "threecats[a]|subj[8]",
#            "threecats[a]|subj[9]",
#            "threecats[b]|subj[0]",
#            "threecats[b]|subj[1]",
#            "threecats[b]|subj[2]",
#            "threecats[b]|subj[3]",
#            "threecats[b]|subj[4]",
#            "threecats[b]|subj[5]",
#            "threecats[b]|subj[6]",
#            "threecats[b]|subj[7]",
#            "threecats[b]|subj[8]",
#            "threecats[b]|subj[9]",
#            "threecats[c]|subj[0]",
#            "threecats[c]|subj[1]",
#            "threecats[c]|subj[2]",
#            "threecats[c]|subj[3]",
#            "threecats[c]|subj[4]",
#            "threecats[c]|subj[5]",
#            "threecats[c]|subj[6]",
#            "threecats[c]|subj[7]",
#            "threecats[c]|subj[8]",
#            "threecats[c]|subj[9]",
#        ]
#        + [
#            "1|item_offset[0]",
#            "1|item_offset[10]",
#            "1|item_offset[11]",
#            "1|item_offset[1]",
#            "1|item_offset[2]",
#            "1|item_offset[3]",
#            "1|item_offset[4]",
#            "1|item_offset[5]",
#            "1|item_offset[6]",
#            "1|item_offset[7]",
#            "1|item_offset[8]",
#            "1|item_offset[9]",
#            "1|site_offset[0]",
#            "1|site_offset[1]",
#            "1|site_offset[2]",
#            "1|site_offset[3]",
#            "1|site_offset[4]",
#            "continuous|item_offset[0]",
#            "continuous|item_offset[10]",
#            "continuous|item_offset[11]",
#            "continuous|item_offset[1]",
#            "continuous|item_offset[2]",
#            "continuous|item_offset[3]",
#            "continuous|item_offset[4]",
#            "continuous|item_offset[5]",
#            "continuous|item_offset[6]",
#            "continuous|item_offset[7]",
#            "continuous|item_offset[8]",
#            "continuous|item_offset[9]",
#            "dummy|item_offset[0]",
#            "dummy|item_offset[10]",
#            "dummy|item_offset[11]",
#            "dummy|item_offset[1]",
#            "dummy|item_offset[2]",
#            "dummy|item_offset[3]",
#            "dummy|item_offset[4]",
#            "dummy|item_offset[5]",
#            "dummy|item_offset[6]",
#            "dummy|item_offset[7]",
#            "dummy|item_offset[8]",
#            "dummy|item_offset[9]",
#            "threecats[T.b]|site_offset[0]",
#            "threecats[T.b]|site_offset[1]",
#            "threecats[T.b]|site_offset[2]",
#            "threecats[T.b]|site_offset[3]",
#            "threecats[T.b]|site_offset[4]",
#            "threecats[T.c]|site_offset[0]",
#            "threecats[T.c]|site_offset[1]",
#            "threecats[T.c]|site_offset[2]",
#            "threecats[T.c]|site_offset[3]",
#            "threecats[T.c]|site_offset[4]",
#            "threecats[a]|subj_offset[0]",
#            "threecats[a]|subj_offset[1]",
#            "threecats[a]|subj_offset[2]",
#            "threecats[a]|subj_offset[3]",
#            "threecats[a]|subj_offset[4]",
#            "threecats[a]|subj_offset[5]",
#            "threecats[a]|subj_offset[6]",
#            "threecats[a]|subj_offset[7]",
#            "threecats[a]|subj_offset[8]",
#            "threecats[a]|subj_offset[9]",
#            "threecats[b]|subj_offset[0]",
#            "threecats[b]|subj_offset[1]",
#            "threecats[b]|subj_offset[2]",
#            "threecats[b]|subj_offset[3]",
#            "threecats[b]|subj_offset[4]",
#            "threecats[b]|subj_offset[5]",
#            "threecats[b]|subj_offset[6]",
#            "threecats[b]|subj_offset[7]",
#            "threecats[b]|subj_offset[8]",
#            "threecats[b]|subj_offset[9]",
#            "threecats[c]|subj_offset[0]",
#            "threecats[c]|subj_offset[1]",
#            "threecats[c]|subj_offset[2]",
#            "threecats[c]|subj_offset[3]",
#            "threecats[c]|subj_offset[4]",
#            "threecats[c]|subj_offset[5]",
#            "threecats[c]|subj_offset[6]",
#            "threecats[c]|subj_offset[7]",
#            "threecats[c]|subj_offset[8]",
#            "threecats[c]|subj_offset[9]",
#        ]
#    )
#    assert full2.difference(test_set2) == answer

    # test plots for pymc3
    fitted.plot(kind="priors")
    fitted.plot()

    # test plots for stan
    fitted2.plot(kind="priors")
    fitted2.plot()
def test_logistic_regression(crossed_data):
    # build model using fit and pymc3
    model0 = Model(crossed_data)
    fitted = model0.fit('threecats[b] ~ continuous + dummy',
                        family='bernoulli',
                        link='logit',
                        backend='pymc3',
                        samples=100)
    # model0.build()
    # fitted = model0.fit()

    # build model using add
    model1 = Model(crossed_data)
    model1.add('threecats[b] ~ 1', family='bernoulli', link='logit')
    model1.add('continuous')
    model1.add('dummy')
    model1.build(backend='pymc3')
    model1.fit(samples=1)

    # build model using fit and stan
    model2 = Model(crossed_data)
    fitted2 = model2.fit('threecats[b] ~ continuous + dummy',
                         family='bernoulli',
                         link='logit',
                         backend='stan',
                         samples=100)

    # check that term names agree
    assert set(model0.term_names) == set(model1.term_names)
    assert set(model0.term_names) == set(model2.term_names)
    assert set(model1.term_names) == set(model2.term_names)

    # check that fixed effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set([
        tuple(t.data[:, lev]) for t in model0.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X1 = set([
        tuple(t.data[:, lev]) for t in model1.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X2 = set([
        tuple(t.data[:, lev]) for t in model2.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    assert X0 == X1
    assert X0 == X2
    assert X1 == X2

    # check that models have same priors for fixed effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    priors2 = {
        x.name: x.prior.args
        for x in model2.terms.values() if not x.random
    }
    # check dictionary keys
    assert set(priors0) == set(priors1)
    assert set(priors0) == set(priors2)
    assert set(priors1) == set(priors2)

    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [
                np.allclose(a[x], b[x], atol=0, rtol=.01) for x in a.keys()
            ]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
    assert all([dicts_close(priors0[x], priors2[x]) for x in priors0.keys()])
    assert all([dicts_close(priors1[x], priors2[x]) for x in priors0.keys()])

    # test that summary reminds user which event is being modeled
    fitted.summary(quantiles=.5)

    # test that traceplot reminds user which event is being modeled
    fitted.plot()
def test_cell_means_with_many_random_effects(crossed_data):
    # build model using fit
    model0 = Model(crossed_data)
    model0.fit('Y ~ 0 + threecats',
               random=[
                   '0+threecats|subj', 'continuous|item', 'dummy|item',
                   'threecats|site'
               ],
               run=False)
    model0.build(backend='pymc3')
    # model0.fit(samples=1)

    # build model using add(append=True)
    model1 = Model(crossed_data)
    model1.add('Y ~ 0')
    model1.add('0 + threecats')
    model1.add(random='0+threecats|subj')
    model1.add(random='1|item')
    model1.add(random='0+continuous|item')
    model1.add(random='dummy|item')
    model1.add(random='threecats|site')
    model1.build(backend='pymc3')
    # model1.fit(samples=1)

    # check that the random effects design matrices have the same shape
    X0 = pd.concat([
        pd.DataFrame(t.data) if not isinstance(t.data, dict) else pd.concat(
            [pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
        for t in model0.random_terms.values()
    ],
                   axis=1)
    X1 = pd.concat([
        pd.DataFrame(t.data) if not isinstance(t.data, dict) else pd.concat(
            [pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
        for t in model0.random_terms.values()
    ],
                   axis=1)
    assert X0.shape == X1.shape

    # check that the random effect design matrix contain the same columns,
    # even if term names / columns names / order of columns is different
    X0_set = set(tuple(X0.iloc[:, i]) for i in range(len(X0.columns)))
    X1_set = set(tuple(X1.iloc[:, i]) for i in range(len(X1.columns)))
    assert X0_set == X1_set

    # check that fixed effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set([
        tuple(t.data[:, lev]) for t in model0.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X1 = set([
        tuple(t.data[:, lev]) for t in model1.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    assert X0 == X1

    # check that fit and add models have same priors for fixed
    # effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    assert set(priors0) == set(priors1)

    # check that fit and add models have same priors for random
    # effects
    priors0 = {
        x.name: x.prior.args['sd'].args
        for x in model0.terms.values() if x.random
    }
    priors1 = {
        x.name: x.prior.args['sd'].args
        for x in model1.terms.values() if x.random
    }
    assert set(priors0) == set(priors1)
def test_poisson_regression(crossed_data):
    # build model using fit and pymc3
    crossed_data["count"] = (crossed_data["Y"] -
                             crossed_data["Y"].min()).round()
    model0 = Model(crossed_data)
    fitted = model0.fit(
        "count ~ threecats + continuous + dummy",
        family="poisson",
        backend="pymc3",
        tune=0,
        samples=1,
        init=None,
    )
    # model0.build()
    # model0.fit()

    # build model using add
    model1 = Model(crossed_data)
    model1.add("count ~ 1", family="poisson")
    model1.add("threecats")
    model1.add("continuous")
    model1.add("dummy")
    model1.build(backend="pymc3")
    model1.fit(tune=0, samples=1, init=None)

    # check that term names agree
    assert set(model0.term_names) == set(model1.term_names)

    # check that fixed effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set([
        tuple(t.data[:, lev]) for t in model0.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X1 = set([
        tuple(t.data[:, lev]) for t in model1.fixed_terms.values()
        for lev in range(len(t.levels))
    ])

    assert X0 == X1

    # check that models have same priors for fixed effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    # check dictionary keys
    assert set(priors0) == set(priors1)

    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [
                np.allclose(a[x], b[x], atol=0, rtol=0.01) for x in a.keys()
            ]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
def test_logistic_regression(crossed_data):
    # build model using fit and pymc3
    model0 = Model(crossed_data)
    fitted0 = model0.fit(
        "threecats[b] ~ continuous + dummy",
        family="bernoulli",
        link="logit",
        backend="pymc3",
        tune=0,
        samples=1000,
    )
    # model0.build()
    # fitted0 = model0.fit()

    # build model using add
    model1 = Model(crossed_data)
    model1.add("threecats[b] ~ 1", family="bernoulli", link="logit")
    model1.add("continuous")
    model1.add("dummy")
    model1.build(backend="pymc3")
    model1.fit(tune=0, samples=1)

    # build model using fit, pymc3 and theano link function
    model3 = Model(crossed_data)
    fitted3 = model3.fit(
        "threecats[b] ~ continuous + dummy",
        family="bernoulli",
        link=tt.nnet.sigmoid,
        backend="pymc3",
        tune=0,
        samples=1000,
    )

    # check that using a theano link function works
    assert np.allclose(az.summary(fitted0)["mean"],
                       az.summary(fitted3)["mean"],
                       atol=0.2)

    # check that term names agree
    assert set(model0.term_names) == set(model1.term_names)

    # check that fixed effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set([
        tuple(t.data[:, lev]) for t in model0.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X1 = set([
        tuple(t.data[:, lev]) for t in model1.fixed_terms.values()
        for lev in range(len(t.levels))
    ])

    assert X0 == X1

    # check that models have same priors for fixed effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    # check dictionary keys
    assert set(priors0) == set(priors1)

    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [
                np.allclose(a[x], b[x], atol=0, rtol=0.01) for x in a.keys()
            ]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
def test_many_fixed_many_random(crossed_data):
    # delete a few values to also test dropna=True functionality
    crossed_data_missing = crossed_data.copy()
    crossed_data_missing.loc[0, "Y"] = np.nan
    crossed_data_missing.loc[1, "continuous"] = np.nan
    crossed_data_missing.loc[2, "threecats"] = np.nan

    # build model using fit
    model0 = Model(crossed_data_missing, dropna=True)
    fitted = model0.fit(
        "Y ~ continuous + dummy + threecats",
        random=[
            "0+threecats|subj", "1|item", "0+continuous|item", "dummy|item",
            "threecats|site"
        ],
        backend="pymc3",
        init=None,
        tune=10,
        samples=10,
        chains=2,
    )
    # model0.build(backend='pymc3')
    # model0.fit(tune=0, samples=1)

    # build model using add(append=True)
    model1 = Model(crossed_data_missing, dropna=True)
    model1.add("Y ~ 1")
    model1.add("continuous")
    model1.add("dummy")
    model1.add("threecats")
    model1.add(random="0+threecats|subj")
    model1.add(random="1|item")
    model1.add(random="0+continuous|item")
    model1.add(random="dummy|item")
    model1.add(random="threecats|site")
    model1.build(backend="pymc3")

    # check that the random effects design matrices have the same shape
    X0 = pd.concat(
        [
            pd.DataFrame(t.data) if not isinstance(t.data, dict) else
            pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
            for t in model0.random_terms.values()
        ],
        axis=1,
    )
    X1 = pd.concat(
        [
            pd.DataFrame(t.data) if not isinstance(t.data, dict) else
            pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
            for t in model1.random_terms.values()
        ],
        axis=1,
    )
    assert X0.shape == X1.shape

    # check that the random effect design matrix contain the same columns,
    # even if term names / columns names / order of columns is different
    X0_set = set(tuple(X0.iloc[:, i]) for i in range(len(X0.columns)))
    X1_set = set(tuple(X1.iloc[:, i]) for i in range(len(X1.columns)))
    assert X0_set == X1_set

    # check that fixed effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set([
        tuple(t.data[:, lev]) for t in model0.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X1 = set([
        tuple(t.data[:, lev]) for t in model1.fixed_terms.values()
        for lev in range(len(t.levels))
    ])

    assert X0 == X1

    # check that models have same priors for fixed effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    # check dictionary keys
    assert set(priors0) == set(priors1)

    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [
                np.allclose(a[x], b[x], atol=0, rtol=0.01) for x in a.keys()
            ]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])

    # check that fit and add models have same priors for random effects
    priors0 = {
        x.name: x.prior.args["sigma"].args
        for x in model0.terms.values() if x.random
    }
    priors1 = {
        x.name: x.prior.args["sigma"].args
        for x in model1.terms.values() if x.random
    }
    # check dictionary keys
    assert set(priors0) == set(priors1)

    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [
                np.allclose(a[x], b[x], atol=0, rtol=0.01) for x in a.keys()
            ]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
def test_many_fixed_many_random(crossed_data):
    # delete a few values to also test dropna=True functionality
    crossed_data_missing = crossed_data.copy()
    crossed_data_missing['Y'][10] = np.nan
    crossed_data_missing['continuous'][20] = np.nan

    # build model using fit
    model0 = Model(crossed_data_missing, dropna=True)
    fitted = model0.fit('Y ~ continuous + dummy + threecats',
                        random=[
                            '0+threecats|subj', '1|item', '0+continuous|item',
                            'dummy|item', 'threecats|site'
                        ],
                        backend='pymc3',
                        samples=10,
                        chains=2)
    # model0.build(backend='pymc3')
    # model0.fit(samples=1)

    # build model using add(append=True)
    model1 = Model(crossed_data_missing, dropna=True)
    model1.add('Y ~ 1')
    model1.add('continuous')
    model1.add('dummy')
    model1.add('threecats')
    model1.add(random='0+threecats|subj')
    model1.add(random='1|item')
    model1.add(random='0+continuous|item')
    model1.add(random='dummy|item')
    model1.add(random='threecats|site')
    model1.build(backend='pymc3')
    # model1.fit(samples=1)

    # build model using stan backend
    model2 = Model(crossed_data_missing, dropna=True)
    fitted2 = model2.fit('Y ~ continuous + dummy + threecats',
                         random=[
                             '0+threecats|subj', '1|item', '0+continuous|item',
                             'dummy|item', 'threecats|site'
                         ],
                         backend='stan',
                         samples=100,
                         chains=2)

    # check that the random effects design matrices have the same shape
    X0 = pd.concat([
        pd.DataFrame(t.data) if not isinstance(t.data, dict) else pd.concat(
            [pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
        for t in model0.random_terms.values()
    ],
                   axis=1)
    X1 = pd.concat([
        pd.DataFrame(t.data) if not isinstance(t.data, dict) else pd.concat(
            [pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
        for t in model1.random_terms.values()
    ],
                   axis=1)
    X2 = pd.concat([
        pd.DataFrame(t.data) if not isinstance(t.data, dict) else pd.concat(
            [pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1)
        for t in model2.random_terms.values()
    ],
                   axis=1)
    assert X0.shape == X1.shape
    assert X0.shape == X2.shape
    assert X1.shape == X2.shape

    # check that the random effect design matrix contain the same columns,
    # even if term names / columns names / order of columns is different
    X0_set = set(tuple(X0.iloc[:, i]) for i in range(len(X0.columns)))
    X1_set = set(tuple(X1.iloc[:, i]) for i in range(len(X1.columns)))
    X2_set = set(tuple(X1.iloc[:, i]) for i in range(len(X2.columns)))
    assert X0_set == X1_set
    assert X0_set == X2_set
    assert X1_set == X2_set

    # check that fixed effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set([
        tuple(t.data[:, lev]) for t in model0.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X1 = set([
        tuple(t.data[:, lev]) for t in model1.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    X2 = set([
        tuple(t.data[:, lev]) for t in model2.fixed_terms.values()
        for lev in range(len(t.levels))
    ])
    assert X0 == X1
    assert X0 == X2
    assert X1 == X2

    # check that models have same priors for fixed effects
    priors0 = {
        x.name: x.prior.args
        for x in model0.terms.values() if not x.random
    }
    priors1 = {
        x.name: x.prior.args
        for x in model1.terms.values() if not x.random
    }
    priors2 = {
        x.name: x.prior.args
        for x in model2.terms.values() if not x.random
    }
    # check dictionary keys
    assert set(priors0) == set(priors1)
    assert set(priors0) == set(priors2)
    assert set(priors1) == set(priors2)

    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [
                np.allclose(a[x], b[x], atol=0, rtol=.01) for x in a.keys()
            ]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
    assert all([dicts_close(priors0[x], priors2[x]) for x in priors0.keys()])
    assert all([dicts_close(priors1[x], priors2[x]) for x in priors0.keys()])

    # check that fit and add models have same priors for random effects
    priors0 = {
        x.name: x.prior.args['sd'].args
        for x in model0.terms.values() if x.random
    }
    priors1 = {
        x.name: x.prior.args['sd'].args
        for x in model1.terms.values() if x.random
    }
    priors2 = {
        x.name: x.prior.args['sd'].args
        for x in model2.terms.values() if x.random
    }
    # check dictionary keys
    assert set(priors0) == set(priors1)
    assert set(priors0) == set(priors2)
    assert set(priors1) == set(priors2)

    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [
                np.allclose(a[x], b[x], atol=0, rtol=.01) for x in a.keys()
            ]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
    assert all([dicts_close(priors0[x], priors2[x]) for x in priors0.keys()])
    assert all([dicts_close(priors1[x], priors2[x]) for x in priors0.keys()])

    # test consistency between summary and to_df for pymc3
    assert len(set(fitted.to_df().columns)) == 15
    assert set(fitted.to_df().columns) == set(fitted.summary().index)

    # test consistency between summary and to_df for stan
    assert len(set(fitted2.to_df().columns)) == 15
    assert set(fitted2.to_df().columns) == set(fitted2.summary().index)

    # test consistenct between pymc3 and stan
    assert set(fitted.to_df().columns) == set(fitted2.to_df().columns)
    assert set(fitted.summary().index) == set(fitted2.summary().index)

    # check hide_transformed for pymc3
    # it looks like some versions of pymc3 add a trailing '_' to transformed
    # vars and some dont. so here for consistency we strip out any trailing '_'
    # that we find
    full = fitted.summary(ranefs=True, transformed=True).index
    full = set([re.sub(r'_$', r'', x) for x in full])
    test_set = fitted.summary(ranefs=True, transformed=False).index
    test_set = set([re.sub(r'_$', r'', x) for x in test_set])
    answer = {
        '1|item_offset[0]', '1|item_offset[10]', '1|item_offset[11]',
        '1|item_offset[1]', '1|item_offset[2]', '1|item_offset[3]',
        '1|item_offset[4]', '1|item_offset[5]', '1|item_offset[6]',
        '1|item_offset[7]', '1|item_offset[8]', '1|item_offset[9]',
        '1|item_sd_log', '1|site_offset[0]', '1|site_offset[1]',
        '1|site_offset[2]', '1|site_offset[3]', '1|site_offset[4]',
        '1|site_sd_log', 'Y_sd_interval', 'continuous|item_offset[0]',
        'continuous|item_offset[10]', 'continuous|item_offset[11]',
        'continuous|item_offset[1]', 'continuous|item_offset[2]',
        'continuous|item_offset[3]', 'continuous|item_offset[4]',
        'continuous|item_offset[5]', 'continuous|item_offset[6]',
        'continuous|item_offset[7]', 'continuous|item_offset[8]',
        'continuous|item_offset[9]', 'continuous|item_sd_log',
        'dummy|item_offset[0]', 'dummy|item_offset[10]',
        'dummy|item_offset[11]', 'dummy|item_offset[1]',
        'dummy|item_offset[2]', 'dummy|item_offset[3]', 'dummy|item_offset[4]',
        'dummy|item_offset[5]', 'dummy|item_offset[6]', 'dummy|item_offset[7]',
        'dummy|item_offset[8]', 'dummy|item_offset[9]', 'dummy|item_sd_log',
        'threecats[T.b]|site_offset[0]', 'threecats[T.b]|site_offset[1]',
        'threecats[T.b]|site_offset[2]', 'threecats[T.b]|site_offset[3]',
        'threecats[T.b]|site_offset[4]', 'threecats[T.b]|site_sd_log',
        'threecats[T.c]|site_offset[0]', 'threecats[T.c]|site_offset[1]',
        'threecats[T.c]|site_offset[2]', 'threecats[T.c]|site_offset[3]',
        'threecats[T.c]|site_offset[4]', 'threecats[T.c]|site_sd_log',
        'threecats[a]|subj_offset[0]', 'threecats[a]|subj_offset[1]',
        'threecats[a]|subj_offset[2]', 'threecats[a]|subj_offset[3]',
        'threecats[a]|subj_offset[4]', 'threecats[a]|subj_offset[5]',
        'threecats[a]|subj_offset[6]', 'threecats[a]|subj_offset[7]',
        'threecats[a]|subj_offset[8]', 'threecats[a]|subj_offset[9]',
        'threecats[a]|subj_sd_log', 'threecats[b]|subj_offset[0]',
        'threecats[b]|subj_offset[1]', 'threecats[b]|subj_offset[2]',
        'threecats[b]|subj_offset[3]', 'threecats[b]|subj_offset[4]',
        'threecats[b]|subj_offset[5]', 'threecats[b]|subj_offset[6]',
        'threecats[b]|subj_offset[7]', 'threecats[b]|subj_offset[8]',
        'threecats[b]|subj_offset[9]', 'threecats[b]|subj_sd_log',
        'threecats[c]|subj_offset[0]', 'threecats[c]|subj_offset[1]',
        'threecats[c]|subj_offset[2]', 'threecats[c]|subj_offset[3]',
        'threecats[c]|subj_offset[4]', 'threecats[c]|subj_offset[5]',
        'threecats[c]|subj_offset[6]', 'threecats[c]|subj_offset[7]',
        'threecats[c]|subj_offset[8]', 'threecats[c]|subj_offset[9]',
        'threecats[c]|subj_sd_log'
    }
    assert full.difference(test_set) == answer

    # check hide_transformed for stan
    full2 = fitted2.summary(ranefs=True, transformed=True)
    full2 = set(full2.index)
    test_set2 = fitted2.summary(ranefs=True, transformed=False)
    test_set2 = set(test_set2.index)
    answer = set(['lp__'] \
        + ['yhat[{}]'.format(i) for i in range(len(crossed_data.index)-2)] \
        + ['1|item_offset[0]','1|item_offset[10]','1|item_offset[11]',
        '1|item_offset[1]','1|item_offset[2]','1|item_offset[3]',
        '1|item_offset[4]','1|item_offset[5]','1|item_offset[6]',
        '1|item_offset[7]','1|item_offset[8]','1|item_offset[9]',
        '1|site_offset[0]','1|site_offset[1]',
        '1|site_offset[2]','1|site_offset[3]','1|site_offset[4]',
        'continuous|item_offset[0]',
        'continuous|item_offset[10]','continuous|item_offset[11]',
        'continuous|item_offset[1]','continuous|item_offset[2]',
        'continuous|item_offset[3]','continuous|item_offset[4]',
        'continuous|item_offset[5]','continuous|item_offset[6]',
        'continuous|item_offset[7]','continuous|item_offset[8]',
        'continuous|item_offset[9]',
        'dummy|item_offset[0]','dummy|item_offset[10]','dummy|item_offset[11]',
        'dummy|item_offset[1]','dummy|item_offset[2]','dummy|item_offset[3]',
        'dummy|item_offset[4]','dummy|item_offset[5]','dummy|item_offset[6]',
        'dummy|item_offset[7]','dummy|item_offset[8]','dummy|item_offset[9]',
        'threecats[T.b]|site_offset[0]',
        'threecats[T.b]|site_offset[1]','threecats[T.b]|site_offset[2]',
        'threecats[T.b]|site_offset[3]','threecats[T.b]|site_offset[4]',
        'threecats[T.c]|site_offset[0]',
        'threecats[T.c]|site_offset[1]','threecats[T.c]|site_offset[2]',
        'threecats[T.c]|site_offset[3]','threecats[T.c]|site_offset[4]',
        'threecats[a]|subj_offset[0]',
        'threecats[a]|subj_offset[1]','threecats[a]|subj_offset[2]',
        'threecats[a]|subj_offset[3]','threecats[a]|subj_offset[4]',
        'threecats[a]|subj_offset[5]','threecats[a]|subj_offset[6]',
        'threecats[a]|subj_offset[7]','threecats[a]|subj_offset[8]',
        'threecats[a]|subj_offset[9]',
        'threecats[b]|subj_offset[0]','threecats[b]|subj_offset[1]',
        'threecats[b]|subj_offset[2]','threecats[b]|subj_offset[3]',
        'threecats[b]|subj_offset[4]','threecats[b]|subj_offset[5]',
        'threecats[b]|subj_offset[6]','threecats[b]|subj_offset[7]',
        'threecats[b]|subj_offset[8]','threecats[b]|subj_offset[9]',
        'threecats[c]|subj_offset[0]',
        'threecats[c]|subj_offset[1]','threecats[c]|subj_offset[2]',
        'threecats[c]|subj_offset[3]','threecats[c]|subj_offset[4]',
        'threecats[c]|subj_offset[5]','threecats[c]|subj_offset[6]',
        'threecats[c]|subj_offset[7]','threecats[c]|subj_offset[8]',
        'threecats[c]|subj_offset[9]'])
    assert full2.difference(test_set2) == answer

    # check for consistency in parameter names between pymc3 and stan
    assert test_set == test_set2

    # check exclude_ranefs for pymc3
    test_set = fitted.summary(ranefs=False, transformed=True).index
    test_set = set([re.sub(r'_$', r'', x) for x in test_set])
    answer = {
        '1|item[0]', '1|item[10]', '1|item[11]', '1|item[1]', '1|item[2]',
        '1|item[3]', '1|item[4]', '1|item[5]', '1|item[6]', '1|item[7]',
        '1|item[8]', '1|item[9]', '1|item_offset[0]', '1|item_offset[10]',
        '1|item_offset[11]', '1|item_offset[1]', '1|item_offset[2]',
        '1|item_offset[3]', '1|item_offset[4]', '1|item_offset[5]',
        '1|item_offset[6]', '1|item_offset[7]', '1|item_offset[8]',
        '1|item_offset[9]', '1|site[0]', '1|site[1]', '1|site[2]', '1|site[3]',
        '1|site[4]', '1|site_offset[0]', '1|site_offset[1]',
        '1|site_offset[2]', '1|site_offset[3]', '1|site_offset[4]',
        'continuous|item[0]', 'continuous|item[10]', 'continuous|item[11]',
        'continuous|item[1]', 'continuous|item[2]', 'continuous|item[3]',
        'continuous|item[4]', 'continuous|item[5]', 'continuous|item[6]',
        'continuous|item[7]', 'continuous|item[8]', 'continuous|item[9]',
        'continuous|item_offset[0]', 'continuous|item_offset[10]',
        'continuous|item_offset[11]', 'continuous|item_offset[1]',
        'continuous|item_offset[2]', 'continuous|item_offset[3]',
        'continuous|item_offset[4]', 'continuous|item_offset[5]',
        'continuous|item_offset[6]', 'continuous|item_offset[7]',
        'continuous|item_offset[8]', 'continuous|item_offset[9]',
        'dummy|item[0]', 'dummy|item[10]', 'dummy|item[11]', 'dummy|item[1]',
        'dummy|item[2]', 'dummy|item[3]', 'dummy|item[4]', 'dummy|item[5]',
        'dummy|item[6]', 'dummy|item[7]', 'dummy|item[8]', 'dummy|item[9]',
        'dummy|item_offset[0]', 'dummy|item_offset[10]',
        'dummy|item_offset[11]', 'dummy|item_offset[1]',
        'dummy|item_offset[2]', 'dummy|item_offset[3]', 'dummy|item_offset[4]',
        'dummy|item_offset[5]', 'dummy|item_offset[6]', 'dummy|item_offset[7]',
        'dummy|item_offset[8]', 'dummy|item_offset[9]',
        'threecats[T.b]|site[0]', 'threecats[T.b]|site[1]',
        'threecats[T.b]|site[2]', 'threecats[T.b]|site[3]',
        'threecats[T.b]|site[4]', 'threecats[T.b]|site_offset[0]',
        'threecats[T.b]|site_offset[1]', 'threecats[T.b]|site_offset[2]',
        'threecats[T.b]|site_offset[3]', 'threecats[T.b]|site_offset[4]',
        'threecats[T.c]|site[0]', 'threecats[T.c]|site[1]',
        'threecats[T.c]|site[2]', 'threecats[T.c]|site[3]',
        'threecats[T.c]|site[4]', 'threecats[T.c]|site_offset[0]',
        'threecats[T.c]|site_offset[1]', 'threecats[T.c]|site_offset[2]',
        'threecats[T.c]|site_offset[3]', 'threecats[T.c]|site_offset[4]',
        'threecats[a]|subj[0]', 'threecats[a]|subj[1]', 'threecats[a]|subj[2]',
        'threecats[a]|subj[3]', 'threecats[a]|subj[4]', 'threecats[a]|subj[5]',
        'threecats[a]|subj[6]', 'threecats[a]|subj[7]', 'threecats[a]|subj[8]',
        'threecats[a]|subj[9]', 'threecats[a]|subj_offset[0]',
        'threecats[a]|subj_offset[1]', 'threecats[a]|subj_offset[2]',
        'threecats[a]|subj_offset[3]', 'threecats[a]|subj_offset[4]',
        'threecats[a]|subj_offset[5]', 'threecats[a]|subj_offset[6]',
        'threecats[a]|subj_offset[7]', 'threecats[a]|subj_offset[8]',
        'threecats[a]|subj_offset[9]', 'threecats[b]|subj[0]',
        'threecats[b]|subj[1]', 'threecats[b]|subj[2]', 'threecats[b]|subj[3]',
        'threecats[b]|subj[4]', 'threecats[b]|subj[5]', 'threecats[b]|subj[6]',
        'threecats[b]|subj[7]', 'threecats[b]|subj[8]', 'threecats[b]|subj[9]',
        'threecats[b]|subj_offset[0]', 'threecats[b]|subj_offset[1]',
        'threecats[b]|subj_offset[2]', 'threecats[b]|subj_offset[3]',
        'threecats[b]|subj_offset[4]', 'threecats[b]|subj_offset[5]',
        'threecats[b]|subj_offset[6]', 'threecats[b]|subj_offset[7]',
        'threecats[b]|subj_offset[8]', 'threecats[b]|subj_offset[9]',
        'threecats[c]|subj[0]', 'threecats[c]|subj[1]', 'threecats[c]|subj[2]',
        'threecats[c]|subj[3]', 'threecats[c]|subj[4]', 'threecats[c]|subj[5]',
        'threecats[c]|subj[6]', 'threecats[c]|subj[7]', 'threecats[c]|subj[8]',
        'threecats[c]|subj[9]', 'threecats[c]|subj_offset[0]',
        'threecats[c]|subj_offset[1]', 'threecats[c]|subj_offset[2]',
        'threecats[c]|subj_offset[3]', 'threecats[c]|subj_offset[4]',
        'threecats[c]|subj_offset[5]', 'threecats[c]|subj_offset[6]',
        'threecats[c]|subj_offset[7]', 'threecats[c]|subj_offset[8]',
        'threecats[c]|subj_offset[9]'
    }
    assert full.difference(test_set) == answer

    # check exclude_ranefs for stan
    test_set2 = set(fitted2.summary(ranefs=False, transformed=True).index)
    answer = set(['1|item[0]','1|item[10]','1|item[11]','1|item[1]','1|item[2]',
        '1|item[3]','1|item[4]','1|item[5]','1|item[6]','1|item[7]','1|item[8]',
        '1|item[9]','1|site[0]','1|site[1]','1|site[2]','1|site[3]','1|site[4]',
        'continuous|item[0]','continuous|item[10]','continuous|item[11]',
        'continuous|item[1]','continuous|item[2]','continuous|item[3]',
        'continuous|item[4]','continuous|item[5]','continuous|item[6]',
        'continuous|item[7]','continuous|item[8]','continuous|item[9]',
        'dummy|item[0]','dummy|item[10]','dummy|item[11]','dummy|item[1]',
        'dummy|item[2]','dummy|item[3]','dummy|item[4]','dummy|item[5]',
        'dummy|item[6]','dummy|item[7]','dummy|item[8]','dummy|item[9]',
        'threecats[T.b]|site[0]','threecats[T.b]|site[1]',
        'threecats[T.b]|site[2]','threecats[T.b]|site[3]',
        'threecats[T.b]|site[4]','threecats[T.c]|site[0]',
        'threecats[T.c]|site[1]','threecats[T.c]|site[2]',
        'threecats[T.c]|site[3]','threecats[T.c]|site[4]',
        'threecats[a]|subj[0]','threecats[a]|subj[1]','threecats[a]|subj[2]',
        'threecats[a]|subj[3]','threecats[a]|subj[4]','threecats[a]|subj[5]',
        'threecats[a]|subj[6]','threecats[a]|subj[7]','threecats[a]|subj[8]',
        'threecats[a]|subj[9]','threecats[b]|subj[0]','threecats[b]|subj[1]',
        'threecats[b]|subj[2]','threecats[b]|subj[3]','threecats[b]|subj[4]',
        'threecats[b]|subj[5]','threecats[b]|subj[6]','threecats[b]|subj[7]',
        'threecats[b]|subj[8]','threecats[b]|subj[9]','threecats[c]|subj[0]',
        'threecats[c]|subj[1]','threecats[c]|subj[2]','threecats[c]|subj[3]',
        'threecats[c]|subj[4]','threecats[c]|subj[5]','threecats[c]|subj[6]',
        'threecats[c]|subj[7]','threecats[c]|subj[8]','threecats[c]|subj[9]'] \
        + ['1|item_offset[0]','1|item_offset[10]','1|item_offset[11]',
        '1|item_offset[1]','1|item_offset[2]','1|item_offset[3]',
        '1|item_offset[4]','1|item_offset[5]','1|item_offset[6]',
        '1|item_offset[7]','1|item_offset[8]','1|item_offset[9]',
        '1|site_offset[0]','1|site_offset[1]',
        '1|site_offset[2]','1|site_offset[3]','1|site_offset[4]',
        'continuous|item_offset[0]',
        'continuous|item_offset[10]','continuous|item_offset[11]',
        'continuous|item_offset[1]','continuous|item_offset[2]',
        'continuous|item_offset[3]','continuous|item_offset[4]',
        'continuous|item_offset[5]','continuous|item_offset[6]',
        'continuous|item_offset[7]','continuous|item_offset[8]',
        'continuous|item_offset[9]',
        'dummy|item_offset[0]','dummy|item_offset[10]','dummy|item_offset[11]',
        'dummy|item_offset[1]','dummy|item_offset[2]','dummy|item_offset[3]',
        'dummy|item_offset[4]','dummy|item_offset[5]','dummy|item_offset[6]',
        'dummy|item_offset[7]','dummy|item_offset[8]','dummy|item_offset[9]',
        'threecats[T.b]|site_offset[0]',
        'threecats[T.b]|site_offset[1]','threecats[T.b]|site_offset[2]',
        'threecats[T.b]|site_offset[3]','threecats[T.b]|site_offset[4]',
        'threecats[T.c]|site_offset[0]',
        'threecats[T.c]|site_offset[1]','threecats[T.c]|site_offset[2]',
        'threecats[T.c]|site_offset[3]','threecats[T.c]|site_offset[4]',
        'threecats[a]|subj_offset[0]',
        'threecats[a]|subj_offset[1]','threecats[a]|subj_offset[2]',
        'threecats[a]|subj_offset[3]','threecats[a]|subj_offset[4]',
        'threecats[a]|subj_offset[5]','threecats[a]|subj_offset[6]',
        'threecats[a]|subj_offset[7]','threecats[a]|subj_offset[8]',
        'threecats[a]|subj_offset[9]',
        'threecats[b]|subj_offset[0]','threecats[b]|subj_offset[1]',
        'threecats[b]|subj_offset[2]','threecats[b]|subj_offset[3]',
        'threecats[b]|subj_offset[4]','threecats[b]|subj_offset[5]',
        'threecats[b]|subj_offset[6]','threecats[b]|subj_offset[7]',
        'threecats[b]|subj_offset[8]','threecats[b]|subj_offset[9]',
        'threecats[c]|subj_offset[0]',
        'threecats[c]|subj_offset[1]','threecats[c]|subj_offset[2]',
        'threecats[c]|subj_offset[3]','threecats[c]|subj_offset[4]',
        'threecats[c]|subj_offset[5]','threecats[c]|subj_offset[6]',
        'threecats[c]|subj_offset[7]','threecats[c]|subj_offset[8]',
        'threecats[c]|subj_offset[9]'])
    assert full2.difference(test_set2) == answer

    # test plots for pymc3
    fitted.plot(kind='priors')
    fitted.plot()

    # test plots for stan
    fitted2.plot(kind='priors')
    fitted2.plot()
예제 #26
0
def test_model_term_names_property(diabetes_data):
    model = Model(diabetes_data)
    model.add('BMI')
    model.add('BP')
    model.add('S1')
    assert model.term_names == ['Intercept', 'BMI', 'BP', 'S1']