def test_marginals_fitted_smoke(fitargs, formula_str, non_real_cols, family, contrasts): N = 10 S = 4 formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) df = dummy_df(cols, N) model = brm(formula_str, df, family, [], contrasts) fit = model.fit(**fitargs(S)) # Sanity check output for `marginals`. arr = fit.marginals().array num_coefs = len(scalar_parameter_names(fit.model_desc)) assert arr.shape == (num_coefs, 9) # num coefs x num stats # Don't check finiteness of n_eff and r_hat, which are frequently # nan with few samples assert np.all(np.isfinite(arr[:, :-2])) # Sanity check output of `fitted`. def chk(arr, expected_shape): assert np.all(np.isfinite(arr)) assert arr.shape == expected_shape chk(fit.fitted(), (S, N)) chk(fit.fitted('linear'), (S, N)) chk(fit.fitted('response'), (S, N)) chk(fit.fitted('sample'), (S, N)) chk(fit.fitted(data=dummy_df(cols, N)), (S, N))
def test_mu_correctness(formula_str, cols, backend, expected): df = dummy_df(expand_columns(parse(formula_str), cols), 10) fit = brm(formula_str, df).prior(num_samples=1, backend=backend) # Pick out the one (and only) sample drawn. actual_mu = fit.fitted(what='linear')[0] # `expected` is assumed to return a data frame. expected_mu = expected(df, fit.get_scalar_param).to_numpy(np.float32) assert np.allclose(actual_mu, expected_mu)
def test_expectation_correctness(cols, family, expected, backend): formula_str = 'y ~ 1 + x' df = dummy_df(expand_columns(parse(formula_str), cols), 10) fit = brm(formula_str, df, family=family).prior(num_samples=1, backend=backend) actual_expectation = fit.fitted(what='expectation')[0] # We assume (since it's tested elsewhere) that `mu` is computed # correctly by `fitted`. So given that, we check that `fitted` # computes the correct expectation. expected_expectation = expected(fit.fitted('linear')[0]) assert np.allclose(actual_expectation, expected_expectation)
def test_rng_seed(fitargs): df = pd.DataFrame({'y': [0., 0.1, 0.2]}) model = brm('y ~ 1', df) fit0 = model.fit(seed=0, **fitargs) fit1 = model.fit(seed=0, **fitargs) fit2 = model.fit(seed=1, **fitargs) assert (fit0.fitted() == fit1.fitted()).all() assert not (fit1.fitted() == fit2.fitted()).all() fitted0 = fit0.fitted(what='sample', seed=0) fitted1 = fit0.fitted(what='sample', seed=0) fitted2 = fit0.fitted(what='sample', seed=1) assert (fitted0 == fitted1).all() assert not (fitted1 == fitted2).all()
def test_fitted_on_new_data(N2): S = 4 N = 10 formula_str = 'y ~ 1 + a' # Using this contrast means `a` is coded as two columns rather # than (the default) one. Because of this, it's crucial that # `fitted` uses the contrast when coding *new data*. This test # would fail if that didn't happen. contrasts = {'a': np.array([[-1, -1], [1, 1]])} cols = expand_columns(parse(formula_str), [Categorical('a', ['a0', 'a1'])]) df = dummy_df(cols, N) fit = brm(formula_str, df, Normal, contrasts=contrasts).fit(iter=S, backend=pyro_backend) new_data = dummy_df(cols, N2, allow_non_exhaustive=True) arr = fit.fitted(data=new_data) assert np.all(np.isfinite(arr)) assert arr.shape == (S, N2)