def test_marginals_fitted_smoke(fitargs, formula_str, non_real_cols, family, contrasts): N = 10 S = 4 formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) df = dummy_df(cols, N) model = brm(formula_str, df, family, [], contrasts) fit = model.fit(**fitargs(S)) # Sanity check output for `marginals`. arr = fit.marginals().array num_coefs = len(scalar_parameter_names(fit.model_desc)) assert arr.shape == (num_coefs, 9) # num coefs x num stats # Don't check finiteness of n_eff and r_hat, which are frequently # nan with few samples assert np.all(np.isfinite(arr[:, :-2])) # Sanity check output of `fitted`. def chk(arr, expected_shape): assert np.all(np.isfinite(arr)) assert arr.shape == expected_shape chk(fit.fitted(), (S, N)) chk(fit.fitted('linear'), (S, N)) chk(fit.fitted('response'), (S, N)) chk(fit.fitted('sample'), (S, N)) chk(fit.fitted(data=dummy_df(cols, N)), (S, N))
def test_numpyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors, expected): # Make dummy data. formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) # Generate model function and data. modelfn = numpyro_backend.gen(desc).fn df = dummy_df(cols, N) data = data_from_numpy(numpyro_backend, makedata(formula, df, metadata, contrasts)) # Check sample sites. rng = random.PRNGKey(0) trace = numpyro.trace(numpyro.seed(modelfn, rng)).get_trace(**data) expected_sites = [site for (site, _, _) in expected] sample_sites = [ name for name, node in trace.items() if not node['is_observed'] ] assert set(sample_sites) == set(expected_sites) for (site, family_name, maybe_params) in expected: numpyro_family_name = dict(LKJ='LKJCholesky').get( family_name, family_name) fn = trace[site]['fn'] params = maybe_params or default_params[family_name] assert type(fn).__name__ == numpyro_family_name for (name, expected_val) in params.items(): if family_name == 'LKJ': assert name == 'eta' name = 'concentration' val = fn.__getattribute__(name) assert_equal(val._value, np.broadcast_to(expected_val, val.shape))
def test_parameter_shapes(formula_str, non_real_cols, contrasts, family, priors, expected, fitargs): # Make dummy data. N = 5 formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) df = dummy_df(cols, N, allow_non_exhaustive=True) # Define model, and generate a single posterior sample. metadata = metadata_from_cols(cols) model = define_model(formula_str, metadata, family, priors, contrasts).gen(fitargs['backend']) data = model.encode(df) fit = model.run_algo('prior', data, num_samples=1, seed=None) num_chains = fitargs.get('num_chains', 1) # Check parameter sizes. for parameter in parameters(fit.model_desc): expected_param_shape = parameter.shape samples = fit.get_param(parameter.name) # A single sample is collected by each chain for all cases. assert samples.shape == (num_chains, ) + expected_param_shape samples_with_chain_dim = fit.get_param(parameter.name, True) assert samples_with_chain_dim.shape == (num_chains, 1) + expected_param_shape
def test_pyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors, expected): # Make dummy data. formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) # Generate the model from the column information rather than from # the metadata extracted from `df`. Since N is small, the metadata # extracted from `df` might loose information compared to the full # metadata derived from `cols` (e.g. levels of a categorical # column) leading to unexpected results. e.g. Missing levels might # cause correlations not to be modelled, even thought they ought # to be given the full metadata. metadata = metadata_from_cols(cols) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) # Generate model function and data. modelfn = pyro_backend.gen(desc).fn df = dummy_df(cols, N) data = data_from_numpy(pyro_backend, makedata(formula, df, metadata, contrasts)) # Check sample sites. trace = poutine.trace(modelfn).get_trace(**data) expected_sites = [site for (site, _, _) in expected] assert set(trace.stochastic_nodes) - {'obs'} == set(expected_sites) for (site, family_name, maybe_params) in expected: pyro_family_name = dict(LKJ='LKJCorrCholesky').get( family_name, family_name) fn = unwrapfn(trace.nodes[site]['fn']) params = maybe_params or default_params[family_name] assert type(fn).__name__ == pyro_family_name for (name, expected_val) in params.items(): val = fn.__getattribute__(name) assert_equal(val, torch.tensor(expected_val).expand(val.shape))
def test_mu_correctness(formula_str, cols, backend, expected): df = dummy_df(expand_columns(parse(formula_str), cols), 10) fit = brm(formula_str, df).prior(num_samples=1, backend=backend) # Pick out the one (and only) sample drawn. actual_mu = fit.fitted(what='linear')[0] # `expected` is assumed to return a data frame. expected_mu = expected(df, fit.get_scalar_param).to_numpy(np.float32) assert np.allclose(actual_mu, expected_mu)
def test_fitted_on_new_data(N2): S = 4 N = 10 formula_str = 'y ~ 1 + a' # Using this contrast means `a` is coded as two columns rather # than (the default) one. Because of this, it's crucial that # `fitted` uses the contrast when coding *new data*. This test # would fail if that didn't happen. contrasts = {'a': np.array([[-1, -1], [1, 1]])} cols = expand_columns(parse(formula_str), [Categorical('a', ['a0', 'a1'])]) df = dummy_df(cols, N) fit = brm(formula_str, df, Normal, contrasts=contrasts).fit(iter=S, backend=pyro_backend) new_data = dummy_df(cols, N2, allow_non_exhaustive=True) arr = fit.fitted(data=new_data) assert np.all(np.isfinite(arr)) assert arr.shape == (S, N2)
def test_expectation_correctness(cols, family, expected, backend): formula_str = 'y ~ 1 + x' df = dummy_df(expand_columns(parse(formula_str), cols), 10) fit = brm(formula_str, df, family=family).prior(num_samples=1, backend=backend) actual_expectation = fit.fitted(what='expectation')[0] # We assume (since it's tested elsewhere) that `mu` is computed # correctly by `fitted`. So given that, we check that `fitted` # computes the correct expectation. expected_expectation = expected(fit.fitted('linear')[0]) assert np.allclose(actual_expectation, expected_expectation)
def test_sampling_from_prior_smoke(N, backend, formula_str, non_real_cols, contrasts, family, priors, expected): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols( cols ) # Use full metadata for same reason given in comment in codegen test. desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) model = backend.gen(desc) df = dummy_df(cols, N, allow_non_exhaustive=True) data = data_from_numpy(backend, makedata(formula, df, metadata, contrasts)) samples = backend.prior(data, model, num_samples=10, seed=None) assert type(samples) == Samples
def test_marginals_fitted_smoke(fitargs, formula_str, non_real_cols, family, contrasts, N2): N = 10 S = 4 cols = expand_columns(parse(formula_str), non_real_cols) df = dummy_df(cols, N) print(df) fit = defm(formula_str, df, family, contrasts=contrasts).fit(**fitargs(S)) def chk(arr, expected_shape): assert np.all(np.isfinite(arr)) assert arr.shape == expected_shape num_coefs = len(scalar_parameter_names(fit.model_desc)) chk(marginals(fit).array, (num_coefs, 7)) # num coefs x num stats chk(fitted(fit), (S, N)) chk(fitted(fit, 'linear'), (S, N)) chk(fitted(fit, 'response'), (S, N)) chk(fitted(fit, 'sample'), (S, N)) # Applying `fitted` to new data. df2 = dummy_df(cols, N2) print(df2) chk(fitted(fit, data=df2), (S, N2))
def test_parameter_shapes(formula_str, non_real_cols, contrasts, family, priors, expected, fitargs): # Make dummy data. N = 5 formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) df = dummy_df(cols, N) # Define model, and generate a single posterior sample. model = defm(formula_str, df, family, priors, contrasts) fit = model.fit(**fitargs) # Check parameter sizes. for parameter in parameters(model.desc): # Get the first (and only) sample. samples = get_param(fit, parameter.name) assert samples.shape[ 0] == 1 # Check the test spec. only generated one sample. p = samples[0] shape = p.shape expected_shape = parameter.shape assert shape == expected_shape