def test_numpyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors, expected): # Make dummy data. formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) # Generate model function and data. modelfn = numpyro_backend.gen(desc).fn df = dummy_df(cols, N) data = data_from_numpy(numpyro_backend, makedata(formula, df, metadata, contrasts)) # Check sample sites. rng = random.PRNGKey(0) trace = numpyro.trace(numpyro.seed(modelfn, rng)).get_trace(**data) expected_sites = [site for (site, _, _) in expected] sample_sites = [ name for name, node in trace.items() if not node['is_observed'] ] assert set(sample_sites) == set(expected_sites) for (site, family_name, maybe_params) in expected: numpyro_family_name = dict(LKJ='LKJCholesky').get( family_name, family_name) fn = trace[site]['fn'] params = maybe_params or default_params[family_name] assert type(fn).__name__ == numpyro_family_name for (name, expected_val) in params.items(): if family_name == 'LKJ': assert name == 'eta' name = 'concentration' val = fn.__getattribute__(name) assert_equal(val._value, np.broadcast_to(expected_val, val.shape))
def test_pyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors, expected): # Make dummy data. formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) # Generate the model from the column information rather than from # the metadata extracted from `df`. Since N is small, the metadata # extracted from `df` might loose information compared to the full # metadata derived from `cols` (e.g. levels of a categorical # column) leading to unexpected results. e.g. Missing levels might # cause correlations not to be modelled, even thought they ought # to be given the full metadata. metadata = metadata_from_cols(cols) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) # Generate model function and data. modelfn = pyro_backend.gen(desc).fn df = dummy_df(cols, N) data = data_from_numpy(pyro_backend, makedata(formula, df, metadata, contrasts)) # Check sample sites. trace = poutine.trace(modelfn).get_trace(**data) expected_sites = [site for (site, _, _) in expected] assert set(trace.stochastic_nodes) - {'obs'} == set(expected_sites) for (site, family_name, maybe_params) in expected: pyro_family_name = dict(LKJ='LKJCorrCholesky').get( family_name, family_name) fn = unwrapfn(trace.nodes[site]['fn']) params = maybe_params or default_params[family_name] assert type(fn).__name__ == pyro_family_name for (name, expected_val) in params.items(): val = fn.__getattribute__(name) assert_equal(val, torch.tensor(expected_val).expand(val.shape))
def test_designmatrix(formula_str, df, metadata_cols, contrasts, expected): metadata = metadata_from_cols( metadata_cols) if metadata_cols is not None else metadata_from_df(df) data = makedata(parse(formula_str), df, metadata, contrasts) assert set(data.keys()) == set(expected.keys()) for k in expected.keys(): assert data[k].dtype == expected[k].dtype assert_equal(data[k], expected[k])
def test_sampling_from_prior_smoke(N, backend, formula_str, non_real_cols, contrasts, family, priors, expected): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols( cols ) # Use full metadata for same reason given in comment in codegen test. desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) model = backend.gen(desc) df = dummy_df(cols, N, allow_non_exhaustive=True) data = data_from_numpy(backend, makedata(formula, df, metadata, contrasts)) samples = backend.prior(data, model, num_samples=10, seed=None) assert type(samples) == Samples
def defm(formula_str, df, family=None, priors=None, contrasts=None): assert type(formula_str) == str assert type(df) == pd.DataFrame assert family is None or type(family) == Family assert priors is None or type(priors) == list assert contrasts is None or type(contrasts) == dict family = family or Normal priors = priors or [] contrasts = contrasts or {} # TODO: Consider accepting nested arrays as well as numpy arrays. # (If we do, convert to numpy arrays here in `defm`?) assert all( type(val) == np.ndarray and len(val.shape) == 2 for val in contrasts.values()) formula = parse(formula_str) # Perhaps design matrices ought to always have metadata (i.e. # column names) associated with them, as in Patsy. metadata = metadata_from_df(df) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) data = makedata(formula, df, metadata, contrasts) return DefmResult(formula, metadata, contrasts, desc, data)
def encode(self, df): return makedata(self.formula, df, self.metadata, self.contrasts)