def test_family_and_response_type_checks(formula_str, non_real_cols, family, priors): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) with pytest.raises(Exception, match='not compatible'): build_model_pre(formula, metadata, family, {})
def test_parameter_shapes(formula_str, non_real_cols, contrasts, family, priors, expected, fitargs): # Make dummy data. N = 5 formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) df = dummy_df(cols, N, allow_non_exhaustive=True) # Define model, and generate a single posterior sample. metadata = metadata_from_cols(cols) model = define_model(formula_str, metadata, family, priors, contrasts).gen(fitargs['backend']) data = model.encode(df) fit = model.run_algo('prior', data, num_samples=1, seed=None) num_chains = fitargs.get('num_chains', 1) # Check parameter sizes. for parameter in parameters(fit.model_desc): expected_param_shape = parameter.shape samples = fit.get_param(parameter.name) # A single sample is collected by each chain for all cases. assert samples.shape == (num_chains, ) + expected_param_shape samples_with_chain_dim = fit.get_param(parameter.name, True) assert samples_with_chain_dim.shape == (num_chains, 1) + expected_param_shape
def test_numpyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors, expected): # Make dummy data. formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) # Generate model function and data. modelfn = numpyro_backend.gen(desc).fn df = dummy_df(cols, N) data = data_from_numpy(numpyro_backend, makedata(formula, df, metadata, contrasts)) # Check sample sites. rng = random.PRNGKey(0) trace = numpyro.trace(numpyro.seed(modelfn, rng)).get_trace(**data) expected_sites = [site for (site, _, _) in expected] sample_sites = [ name for name, node in trace.items() if not node['is_observed'] ] assert set(sample_sites) == set(expected_sites) for (site, family_name, maybe_params) in expected: numpyro_family_name = dict(LKJ='LKJCholesky').get( family_name, family_name) fn = trace[site]['fn'] params = maybe_params or default_params[family_name] assert type(fn).__name__ == numpyro_family_name for (name, expected_val) in params.items(): if family_name == 'LKJ': assert name == 'eta' name = 'concentration' val = fn.__getattribute__(name) assert_equal(val._value, np.broadcast_to(expected_val, val.shape))
def test_pyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors, expected): # Make dummy data. formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) # Generate the model from the column information rather than from # the metadata extracted from `df`. Since N is small, the metadata # extracted from `df` might loose information compared to the full # metadata derived from `cols` (e.g. levels of a categorical # column) leading to unexpected results. e.g. Missing levels might # cause correlations not to be modelled, even thought they ought # to be given the full metadata. metadata = metadata_from_cols(cols) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) # Generate model function and data. modelfn = pyro_backend.gen(desc).fn df = dummy_df(cols, N) data = data_from_numpy(pyro_backend, makedata(formula, df, metadata, contrasts)) # Check sample sites. trace = poutine.trace(modelfn).get_trace(**data) expected_sites = [site for (site, _, _) in expected] assert set(trace.stochastic_nodes) - {'obs'} == set(expected_sites) for (site, family_name, maybe_params) in expected: pyro_family_name = dict(LKJ='LKJCorrCholesky').get( family_name, family_name) fn = unwrapfn(trace.nodes[site]['fn']) params = maybe_params or default_params[family_name] assert type(fn).__name__ == pyro_family_name for (name, expected_val) in params.items(): val = fn.__getattribute__(name) assert_equal(val, torch.tensor(expected_val).expand(val.shape))
def test_designmatrix(formula_str, df, metadata_cols, contrasts, expected): metadata = metadata_from_cols( metadata_cols) if metadata_cols is not None else metadata_from_df(df) data = makedata(parse(formula_str), df, metadata, contrasts) assert set(data.keys()) == set(expected.keys()) for k in expected.keys(): assert data[k].dtype == expected[k].dtype assert_equal(data[k], expected[k])
def test_prior_checks(formula_str, non_real_cols, family, priors, expected_error): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) design_metadata = build_model_pre(formula, metadata, family, {}) with pytest.raises(Exception, match=expected_error): build_prior_tree(design_metadata, priors)
def test_scalar_parameter_names_smoke(formula_str, non_real_cols, contrasts, family, priors, expected): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) model = define_model(formula_str, metadata, family, priors, contrasts) names = scalar_parameter_names(model.desc) assert type(names) == list
def test_expected_response_codegen(response_meta, family, args, expected, backend): formula = parse('y ~ 1') desc = makedesc(formula, metadata_from_cols([response_meta]), family, [], {}) def expected_response(*args): backend_args = [backend.from_numpy(arg) for arg in args] fn = backend.gen(desc).expected_response_fn return backend.to_numpy(fn(*backend_args)) assert np.allclose(expected_response(*args), expected)
def test_sampling_from_prior_smoke(N, backend, formula_str, non_real_cols, contrasts, family, priors, expected): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols( cols ) # Use full metadata for same reason given in comment in codegen test. desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) model = backend.gen(desc) df = dummy_df(cols, N, allow_non_exhaustive=True) data = data_from_numpy(backend, makedata(formula, df, metadata, contrasts)) samples = backend.prior(data, model, num_samples=10, seed=None) assert type(samples) == Samples
def test_scalar_param_map_consistency(): formula = parse('y ~ 1 + x1 + (1 + x2 + b | a) + (1 + x1 | a:b)') non_real_cols = [ Categorical('a', ['a1', 'a2', 'a3']), Categorical('b', ['b1', 'b2', 'b3']), ] cols = expand_columns(formula, non_real_cols) desc = makedesc(formula, metadata_from_cols(cols), Normal, [], {}) params = parameters(desc) spmap = scalar_parameter_map(desc) # Check that each entry in the map points to a unique parameter # position. param_and_indices_set = set(param_and_indices for (_, param_and_indices) in spmap) assert len(param_and_indices_set) == len(spmap) # Ensure that we have enough entries in the map to cover all of # the scalar parameters. (The L_i parameters have a funny status. # We consider them to be parameters, but not scalar parameters. # This is not planned, rather things just evolved this way. It # does makes some sense though, since we usually look at R_i # instead.) num_scalar_params = sum( np.product(shape) for name, shape in params if not name.startswith('L_')) assert num_scalar_params == len(spmap) # Check that all indices are valid. (i.e. Within the shape of the # parameter.) for scalar_param_name, (param_name, indices) in spmap: ss = [shape for (name, shape) in params if name == param_name] assert len(ss) == 1 param_shape = ss[0] assert len(indices) == len(param_shape) assert all(i < s for (i, s) in zip(indices, param_shape))
def test_coef_names(formula_str, non_real_cols, expected_names): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) assert coef_names(formula.terms, metadata, {}) == expected_names
def test_coding(formula_str, non_real_cols, expected_coding): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) assert code_terms(formula.terms, metadata) == expected_coding