def test_auto_scale(diabetes_data): # By default, should scale everything except custom Prior() objects priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)} model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors) p1 = model.terms["S1"].prior p2 = model.terms["S2"].prior p3 = model.terms["BP"].prior assert p1.name == p2.name == "Normal" assert 0 < p1.args["sigma"] < 1 assert p2.args["sigma"] > p1.args["sigma"] assert p3.name == "Cauchy" assert p3.args["beta"] == 17.5 # With auto_scale off, custom priors are considered, but not custom scaling. # Prior has no effect, and prior for BP has effect. priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)} model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors, auto_scale=False) p1_off = model.terms["S1"].prior p2_off = model.terms["S2"].prior p3_off = model.terms["BP"].prior assert p1_off.name == "Normal" assert p2_off.name == "Flat" assert p1_off.args["sigma"] == 1 assert "sigma" not in p2_off.args assert p3_off.name == "Cauchy"
def test_prior_class(): prior = Prior("CheeseWhiz", holes=0, taste=-10) assert prior.name == "CheeseWhiz" assert isinstance(prior.args, dict) assert prior.args["taste"] == -10 prior.update(taste=-100, return_to_store=1) assert prior.args["return_to_store"] == 1
def test_prior_class(): prior = Prior('CheeseWhiz', holes=0, taste=-10) assert prior.name == 'CheeseWhiz' assert isinstance(prior.args, dict) assert prior.args['taste'] == -10 prior.update(taste=-100, return_to_store=1) assert prior.args['return_to_store'] == 1
def test_prior_str(): # Tests __str__ method prior1 = Prior("Normal", mu=0, sigma=1) prior2 = Prior("Normal", mu=0, sigma=Prior("HalfNormal", sigma=1)) assert str(prior1) == "Normal(mu: 0, sigma: 1)" assert str(prior2) == "Normal(mu: 0, sigma: HalfNormal(sigma: 1))" assert str(prior1) == repr(prior1)
def test_update_term_priors_after_init(diabetes_data): model = Model(diabetes_data) model.add('BMI') model.add('S1') model.add(random='age_grp|BP') p1 = Prior('Normal', mu=-10, sd=10) p2 = Prior('Beta', alpha=2, beta=2) model.set_priors({'BMI': 0.3, 'S1': p2}) assert model.terms['S1'].prior.args['beta'] == 2 assert model.terms['BMI'].prior == 0.3 model.set_priors({('S1', 'BMI'): p1}) assert model.terms['S1'].prior.args['sd'] == 10 assert model.terms['BMI'].prior.args['mu'] == -10 p3 = Prior('Normal', mu=0, sd=Prior('Normal', mu=0, sd=7)) model.set_priors(fixed=0.4, random=p3) assert model.terms['BMI'].prior == 0.4 assert model.terms['age_grp|BP'].prior.args['sd'].args['sd'] == 7 # Invalid names should raise error with pytest.raises(ValueError): model.set_priors({'nonexistent_term': 0.3}) # Test for partial names, e.g., 'threecats' should match 'threecats[0]'. model = Model(diabetes_data) model.add(random='age_grp|BP', categorical='age_grp') model.set_priors({'age_grp|BP': 0.5}) assert model.terms['age_grp[T.1]|BP'].prior == 0.5 assert model.terms['1|BP'].prior == 0.5
def test_prior_eq(): # Tests __eq__ method prior1 = Prior("Normal", mu=0, sigma=1) prior2 = Prior("Normal", mu=0, sigma=Prior("HalfNormal", sigma=1)) assert prior1 == prior1 assert prior2 == prior2 assert prior1 != prior2 assert prior1 != "Prior"
def test_hyperprior_on_common_effect(): data = pd.DataFrame({ "y": np.random.normal(size=100), "x1": np.random.normal(size=100), "g1": ["a"] * 50 + ["b"] * 50, }) slope = Prior("Normal", mu=0, sd=Prior("HalfCauchy", beta=2)) priors = {"x1": slope} with pytest.raises(ValueError): Model("y ~ x1 + (x1|g1)", data, priors=priors) priors = {"common": slope} with pytest.raises(ValueError): Model("y ~ x1 + (x1|g1)", data, priors=priors)
def test_auto_scale(diabetes_data): # By default, should scale everything except custom Prior() objects model = Model(diabetes_data) priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)} model.fit("BMI ~ S1 + S2 + BP", run=False, priors=priors) model.build(backend="pymc3") p1 = model.terms["S1"].prior p2 = model.terms["S2"].prior p3 = model.terms["BP"].prior assert p1.name == p2.name == "Normal" assert 0 < p1.args["sd"] < 1 assert p2.args["sd"] > p1.args["sd"] assert p3.name == "Cauchy" assert p3.args["beta"] == 17.5 # With auto_scale off, everything should be flat unless explicitly named # in priors model = Model(diabetes_data, auto_scale=False) model.fit("BMI ~ S1 + S2 + BP", run=False, priors=priors) model.build(backend="pymc3") p1_off = model.terms["S1"].prior p2_off = model.terms["S2"].prior p3_off = model.terms["BP"].prior assert p1_off.name == "Normal" assert p2_off.name == "Flat" assert 0 < p1_off.args["sd"] < 1 assert "sd" not in p2_off.args assert p3_off.name == "Cauchy" assert p3_off.args["beta"] == 17.5
def test_set_priors(): data = pd.DataFrame( { "y": np.random.normal(size=100), "x": np.random.normal(size=100), "g": np.random.choice(["A", "B"], size=100), } ) model = Model("y ~ x + (1|g)", data) prior = Prior("Uniform", lower=0, upper=50) # Common model.set_priors(common=prior) assert model.terms["Intercept"].prior == prior assert model.terms["x"].prior == prior # Group-specific model.set_priors(group_specific=prior) assert model.terms["1|g"].prior == prior # By name model = Model("y ~ x + (1|g)", data) model.set_priors(priors={"x": prior}) model.set_priors(priors={"1|g": prior}) assert model.terms["x"].prior == prior assert model.terms["1|g"].prior == prior
def test_auto_scale(diabetes_data): # By default, should scale everything except custom Prior() objects model = Model(diabetes_data) priors = {'S1': 0.3, 'BP': Prior('Cauchy', alpha=1, beta=17.5)} model.fit('BMI ~ S1 + S2 + BP', run=False, priors=priors) model.build(backend='pymc3') p1 = model.terms['S1'].prior p2 = model.terms['S2'].prior p3 = model.terms['BP'].prior assert p1.name == p2.name == 'Normal' assert 0 < p1.args['sd'] < 1 assert p2.args['sd'] > p1.args['sd'] assert p3.name == 'Cauchy' assert p3.args['beta'] == 17.5 # With auto_scale off, everything should be flat unless explicitly named # in priors model = Model(diabetes_data, auto_scale=False) model.fit('BMI ~ S1 + S2 + BP', run=False, priors=priors) model.build(backend='pymc3') p1_off = model.terms['S1'].prior p2_off = model.terms['S2'].prior p3_off = model.terms['BP'].prior assert p1_off.name == 'Normal' assert p2_off.name == 'Flat' assert 0 < p1_off.args['sd'] < 1 assert 'sd' not in p2_off.args assert p3_off.name == 'Cauchy' assert p3_off.args['beta'] == 17.5
def test_family_class(): cheese = Prior("CheeseWhiz", holes=0, taste=-10) likelihood = Likelihood("Cheese", parent="holes", cheese=cheese) family = Family("cheese", likelihood=likelihood, link="ferment") for name in ["name", "likelihood", "link"]: assert hasattr(family, name)
def test_response_prior_fail(): data = pd.DataFrame( {"y": np.random.randint(3, 10, size=50), "sigma": np.random.normal(size=50)} ) priors = {"sigma": Prior("Uniform", lower=0, upper=50)} with pytest.raises(ValueError): Model("y ~ sigma", data, priors=priors) data.rename(columns={"sigma": "alpha"}, inplace=True) priors = {"alpha": Prior("Uniform", lower=0, upper=50)} with pytest.raises(ValueError): Model("y ~ alpha", data, family="negativebinomial", priors=priors) with pytest.raises(ValueError): Model("y ~ alpha", data, family="gamma", priors=priors)
def test_complete_separation(): data = pd.DataFrame({"y": [0] * 5 + [1] * 5, "g": ["a"] * 5 + ["b"] * 5}) with pytest.raises(PerfectSeparationError): Model("y ~ g", data, family="bernoulli", automatic_priors="mle") # No error is raised priors = {"common": Prior("Normal", mu=0, sigma=10)} Model("y ~ g", data, family="bernoulli", priors=priors)
def test_laplace(): data = pd.DataFrame(np.repeat((0, 1), (30, 60)), columns=["w"]) priors = {"Intercept": Prior("Uniform", lower=0, upper=1)} model = Model("w ~ 1", data=data, family="bernoulli", priors=priors, link="identity") results = model.fit(method="laplace") mode_n = np.round(results["Intercept"][0], 2) std_n = np.round(results["Intercept"][1][0], 2) mode_a = data.mean() std_a = data.std() / len(data) ** 0.5 np.testing.assert_array_almost_equal((mode_n, std_n), (mode_a.item(), std_a.item()), decimal=2)
def add_y(self, variable, prior=None, family='gaussian', link=None, *args, **kwargs): ''' Add a dependent (or outcome) variable to the model. Args: variable (str): the name of the dataset column containing the y values. prior (Prior, int, float, str): Optional specification of prior. Can be an instance of class Prior, a numeric value, or a string describing the width. In the numeric case, the distribution specified in the defaults will be used, and the passed value will be used to scale the appropriate variance parameter. For strings (e.g., 'wide', 'narrow', 'medium', or 'superwide'), predefined values will be used. family (str, Family): A specification of the model family (analogous to the family object in R). Either a string, or an instance of class priors.Family. If a string is passed, a family with the corresponding name must be defined in the defaults loaded at Model initialization. Valid pre-defined families are 'gaussian', 'binomial', 'poisson', and 't'. link (str): The model link function to use. Can be either a string (must be one of the options defined in the current backend; typically this will include at least 'identity', 'logit', 'inverse', and 'exp'), or a callable that takes a 1D ndarray or theano tensor as the sole argument and returns one with the same shape. args, kwargs: Optional positional and keyword arguments to pass onto add_term(). ''' if isinstance(family, string_types): family = self.default_priors.get(family=family) self.family = family # Override family's link if another is explicitly passed if link is not None: self.family.link = link if prior is None: prior = self.family.prior # implement default Uniform [0, sd(Y)] prior for residual SD if self.family.name == 'gaussian': prior.update( sd=Prior('Uniform', lower=0, upper=self.data[variable].std())) self.add_term(variable, prior=prior, *args, **kwargs) # use last-added term name b/c it could have been changed by add_term name = list(self.terms.values())[-1].name self.y = self.terms.pop(name) self.built = False
def test_update_term_priors_after_init(diabetes_data): model = Model(diabetes_data) model.add("Y ~ BMI") model.add("S1") model.add(random="age_grp|BP") p1 = Prior("Normal", mu=-10, sigma=10) p2 = Prior("Beta", alpha=2, beta=2) model.set_priors({"BMI": 0.3, "S1": p2}) model.build(backend="pymc") assert model.terms["S1"].prior.args["beta"] == 2 assert model.terms["BMI"].prior.scale == 0.3 assert np.isclose(model.terms["BMI"].prior.args["sigma"], 4.7, rtol=0.1)[0] model.set_priors({("S1", "BMI"): p1}) model.build(backend="pymc") assert model.terms["S1"].prior.args["sigma"] == 10 assert model.terms["BMI"].prior.args["mu"] == -10 p3 = Prior("Normal", mu=0, sigma=Prior("Normal", mu=0, sigma=7)) model.set_priors(fixed=0.3, random=p3) model.build(backend="pymc") assert model.terms["BMI"].prior.scale == 0.3 assert np.isclose(model.terms["BMI"].prior.args["sigma"], 4.7, rtol=0.1)[0] assert model.terms["age_grp|BP"].prior.args["sigma"].args["sigma"] == 7 # Invalid names should raise error with pytest.raises(ValueError): model.set_priors({"nonexistent_term": 0.3}) model.build(backend="pymc") # Test for partial names, e.g., 'threecats' should match 'threecats[0]'. model = Model(diabetes_data) model.add("Y ~ 1", random="age_grp|BP", categorical="age_grp") model.set_priors({"age_grp|BP": 0.5}) model.build(backend="pymc") assert model.terms["age_grp[T.1]|BP"].prior.scale == 0.5 assert np.isclose( model.terms["age_grp[T.1]|BP"].prior.args["sigma"].args["sigma"], 94, rtol=0.2 ) assert model.terms["1|BP"].prior.scale == 0.5
def test_set_prior_unexisting_term(): data = pd.DataFrame( { "y": np.random.normal(size=100), "x": np.random.normal(size=100), } ) prior = Prior("Uniform", lower=0, upper=50) model = Model("y ~ x", data) with pytest.raises(ValueError): model.set_priors(priors={("x", "z"): prior})
def test_update_term_priors_after_init(diabetes_data): model = Model(diabetes_data) model.add_term('BMI') model.add_term('S1') model.add_term('age_grp', random=True, over='BP') p1 = Prior('Normal', mu=-10, sd=10) p2 = Prior('Beta', alpha=2, beta=2) model.set_priors({'BMI': 0.3, 'S1': p2}) assert model.terms['S1'].prior.args['beta'] == 2 assert model.terms['BMI'].prior == 0.3 model.set_priors({('S1', 'BMI'): p1}) assert model.terms['S1'].prior.args['sd'] == 10 assert model.terms['BMI'].prior.args['mu'] == -10 p3 = Prior('Normal', mu=0, sd=Prior('Normal', mu=0, sd=7)) model.set_priors(fixed=0.4, random=p3) assert model.terms['BMI'].prior == 0.4 assert model.terms['age_grp|BP'].prior.args['sd'].args['sd'] == 7
def test_set_response_prior(): data = pd.DataFrame({ "y": np.random.randint(3, 10, size=50), "x": np.random.normal(size=50) }) priors = {"sigma": Prior("Uniform", lower=0, upper=50)} model = Model("y ~ x", data) model.set_priors(priors) assert model.response.prior.args["sigma"] == Prior("Uniform", lower=0, upper=50) priors = {"alpha": Prior("Uniform", lower=1, upper=20)} model = Model("y ~ x", data, family="negativebinomial") model.set_priors(priors) assert model.response.prior.args["alpha"] == Prior("Uniform", lower=1, upper=20) priors = {"alpha": Prior("Uniform", lower=0, upper=50)} model = Model("y ~ x", data, family="gamma") model.set_priors(priors) assert model.response.prior.args["alpha"] == Prior("Uniform", lower=0, upper=50)
def test_response_prior(): data = pd.DataFrame({"y": np.random.randint(3, 10, size=50), "x": np.random.normal(size=50)}) priors = {"sigma": Prior("Uniform", lower=0, upper=50)} model = Model("y ~ x", data, priors=priors) priors["sigma"].auto_scale = False # the one in the model is set to False assert model.family.likelihood.priors["sigma"] == priors["sigma"] priors = {"alpha": Prior("Uniform", lower=1, upper=20)} model = Model("y ~ x", data, family="negativebinomial", priors=priors) priors["alpha"].auto_scale = False assert model.family.likelihood.priors["alpha"] == priors["alpha"] priors = {"alpha": Prior("Uniform", lower=0, upper=50)} model = Model("y ~ x", data, family="gamma", priors=priors) priors["alpha"].auto_scale = False assert model.family.likelihood.priors["alpha"] == priors["alpha"] priors = {"alpha": Prior("Uniform", lower=0, upper=50)} model = Model("y ~ x", data, family="gamma", priors=priors) priors["alpha"].auto_scale = False assert model.family.likelihood.priors["alpha"] == priors["alpha"]
def test_likelihood_class(): # A recognized likelihood sigma = Prior("HalfNormal", sigma=100) likelihood = Likelihood("Normal", parent="mu", sigma=sigma) for name in ["name", "priors", "parent"]: assert hasattr(likelihood, name) # A likelihood with unrecognized name # The class is not going to complain. Whether "Magic" works in PyMC3 is up to the user. likelihood = Likelihood("Magic", parent="Wizard", sigma=sigma) for name in ["name", "priors", "parent"]: assert hasattr(likelihood, name)
def test_set_prior_with_tuple(): data = pd.DataFrame( { "y": np.random.normal(size=100), "x": np.random.normal(size=100), "z": np.random.normal(size=100), } ) prior = Prior("Uniform", lower=0, upper=50) model = Model("y ~ x + z", data) model.set_priors(priors={("x", "z"): prior}) assert model.terms["x"].prior == prior assert model.terms["z"].prior == prior
def test_likelihood_bad_priors(): sigma = Prior("HalfNormal", sigma=100) # Required prior is missing with pytest.raises(ValueError): Likelihood("Normal", parent="mu") # Prior is not a prior with pytest.raises(ValueError): Likelihood("Normal", parent="mu", sigma="HalfNormal") # Passing unnecesary priors with pytest.raises(ValueError): Likelihood("Bernoulli", sigma=sigma) # Passed priors, but not the one needed with pytest.raises(ValueError): Likelihood("Gamma", sigma=sigma)
def test_family_class(): prior = Prior('CheeseWhiz', holes=0, taste=-10) family = Family('cheese', prior, link='ferment', parent='holes') for name in ['name', 'prior', 'link', 'parent']: assert hasattr(family, name)
def test_family_link_unsupported(): cheese = Prior("CheeseWhiz", holes=0, taste=-10) likelihood = Likelihood("Cheese", parent="holes", cheese=cheese) family = Family("cheese", likelihood=likelihood, link="ferment") with pytest.raises(ValueError): family._set_link("Empty")
def test_family_class(): prior = Prior("CheeseWhiz", holes=0, taste=-10) family = Family("cheese", prior, link="ferment", parent="holes") for name in ["name", "prior", "link", "parent"]: assert hasattr(family, name)
def test_likelihood_parent_inferred(): sigma = Prior("HalfNormal", sigma=100) lh1 = Likelihood("Normal", parent="mu", sigma=sigma) lh2 = Likelihood("Normal", sigma=sigma) assert lh1.parent == lh2.parent
def test_likelihood_bad_parent(): with pytest.raises(ValueError): Likelihood("Normal", parent="Mu", sigma=Prior("HalfNormal", sigma=100)) with pytest.raises(ValueError): Likelihood("Bernoulli", parent="mu")