Esempio n. 1
0
def test_numpyro_codegen(N, formula_str, non_real_cols, contrasts, family,
                         priors, expected):
    # Make dummy data.
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))

    # Generate model function and data.
    modelfn = numpyro_backend.gen(desc).fn

    df = dummy_df(cols, N)
    data = data_from_numpy(numpyro_backend,
                           makedata(formula, df, metadata, contrasts))

    # Check sample sites.
    rng = random.PRNGKey(0)
    trace = numpyro.trace(numpyro.seed(modelfn, rng)).get_trace(**data)
    expected_sites = [site for (site, _, _) in expected]
    sample_sites = [
        name for name, node in trace.items() if not node['is_observed']
    ]
    assert set(sample_sites) == set(expected_sites)
    for (site, family_name, maybe_params) in expected:
        numpyro_family_name = dict(LKJ='LKJCholesky').get(
            family_name, family_name)
        fn = trace[site]['fn']
        params = maybe_params or default_params[family_name]
        assert type(fn).__name__ == numpyro_family_name
        for (name, expected_val) in params.items():
            if family_name == 'LKJ':
                assert name == 'eta'
                name = 'concentration'
            val = fn.__getattribute__(name)
            assert_equal(val._value, np.broadcast_to(expected_val, val.shape))
Esempio n. 2
0
def test_pyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors,
                      expected):
    # Make dummy data.
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    # Generate the model from the column information rather than from
    # the metadata extracted from `df`. Since N is small, the metadata
    # extracted from `df` might loose information compared to the full
    # metadata derived from `cols` (e.g. levels of a categorical
    # column) leading to unexpected results. e.g. Missing levels might
    # cause correlations not to be modelled, even thought they ought
    # to be given the full metadata.
    metadata = metadata_from_cols(cols)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))

    # Generate model function and data.
    modelfn = pyro_backend.gen(desc).fn

    df = dummy_df(cols, N)
    data = data_from_numpy(pyro_backend,
                           makedata(formula, df, metadata, contrasts))

    # Check sample sites.
    trace = poutine.trace(modelfn).get_trace(**data)
    expected_sites = [site for (site, _, _) in expected]
    assert set(trace.stochastic_nodes) - {'obs'} == set(expected_sites)
    for (site, family_name, maybe_params) in expected:
        pyro_family_name = dict(LKJ='LKJCorrCholesky').get(
            family_name, family_name)
        fn = unwrapfn(trace.nodes[site]['fn'])
        params = maybe_params or default_params[family_name]
        assert type(fn).__name__ == pyro_family_name
        for (name, expected_val) in params.items():
            val = fn.__getattribute__(name)
            assert_equal(val, torch.tensor(expected_val).expand(val.shape))
Esempio n. 3
0
def test_family_and_response_type_checks(formula_str, non_real_cols, family,
                                         priors):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    with pytest.raises(Exception, match='not compatible'):
        build_model_pre(formula, metadata, family, {})
Esempio n. 4
0
def test_parameter_shapes(formula_str, non_real_cols, contrasts, family,
                          priors, expected, fitargs):
    # Make dummy data.
    N = 5
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    df = dummy_df(cols, N, allow_non_exhaustive=True)

    # Define model, and generate a single posterior sample.
    metadata = metadata_from_cols(cols)
    model = define_model(formula_str, metadata, family, priors,
                         contrasts).gen(fitargs['backend'])
    data = model.encode(df)
    fit = model.run_algo('prior', data, num_samples=1, seed=None)

    num_chains = fitargs.get('num_chains', 1)

    # Check parameter sizes.
    for parameter in parameters(fit.model_desc):
        expected_param_shape = parameter.shape
        samples = fit.get_param(parameter.name)
        # A single sample is collected by each chain for all cases.
        assert samples.shape == (num_chains, ) + expected_param_shape
        samples_with_chain_dim = fit.get_param(parameter.name, True)
        assert samples_with_chain_dim.shape == (num_chains,
                                                1) + expected_param_shape
Esempio n. 5
0
def test_marginals_fitted_smoke(fitargs, formula_str, non_real_cols, family,
                                contrasts):
    N = 10
    S = 4
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    df = dummy_df(cols, N)
    model = brm(formula_str, df, family, [], contrasts)
    fit = model.fit(**fitargs(S))
    # Sanity check output for `marginals`.
    arr = fit.marginals().array
    num_coefs = len(scalar_parameter_names(fit.model_desc))
    assert arr.shape == (num_coefs, 9)  # num coefs x num stats
    # Don't check finiteness of n_eff and r_hat, which are frequently
    # nan with few samples
    assert np.all(np.isfinite(arr[:, :-2]))

    # Sanity check output of `fitted`.
    def chk(arr, expected_shape):
        assert np.all(np.isfinite(arr))
        assert arr.shape == expected_shape

    chk(fit.fitted(), (S, N))
    chk(fit.fitted('linear'), (S, N))
    chk(fit.fitted('response'), (S, N))
    chk(fit.fitted('sample'), (S, N))
    chk(fit.fitted(data=dummy_df(cols, N)), (S, N))
Esempio n. 6
0
def test_mu_correctness(formula_str, cols, backend, expected):
    df = dummy_df(expand_columns(parse(formula_str), cols), 10)
    fit = brm(formula_str, df).prior(num_samples=1, backend=backend)
    # Pick out the one (and only) sample drawn.
    actual_mu = fit.fitted(what='linear')[0]
    # `expected` is assumed to return a data frame.
    expected_mu = expected(df, fit.get_scalar_param).to_numpy(np.float32)
    assert np.allclose(actual_mu, expected_mu)
Esempio n. 7
0
def test_scalar_parameter_names_smoke(formula_str, non_real_cols, contrasts,
                                      family, priors, expected):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    model = define_model(formula_str, metadata, family, priors, contrasts)
    names = scalar_parameter_names(model.desc)
    assert type(names) == list
Esempio n. 8
0
def test_prior_checks(formula_str, non_real_cols, family, priors,
                      expected_error):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    design_metadata = build_model_pre(formula, metadata, family, {})
    with pytest.raises(Exception, match=expected_error):
        build_prior_tree(design_metadata, priors)
Esempio n. 9
0
def test_designmatrix(formula_str, df, metadata_cols, contrasts, expected):
    metadata = metadata_from_cols(
        metadata_cols) if metadata_cols is not None else metadata_from_df(df)
    data = makedata(parse(formula_str), df, metadata, contrasts)
    assert set(data.keys()) == set(expected.keys())
    for k in expected.keys():
        assert data[k].dtype == expected[k].dtype
        assert_equal(data[k], expected[k])
Esempio n. 10
0
def test_expectation_correctness(cols, family, expected, backend):
    formula_str = 'y ~ 1 + x'
    df = dummy_df(expand_columns(parse(formula_str), cols), 10)
    fit = brm(formula_str, df, family=family).prior(num_samples=1,
                                                    backend=backend)
    actual_expectation = fit.fitted(what='expectation')[0]
    # We assume (since it's tested elsewhere) that `mu` is computed
    # correctly by `fitted`. So given that, we check that `fitted`
    # computes the correct expectation.
    expected_expectation = expected(fit.fitted('linear')[0])
    assert np.allclose(actual_expectation, expected_expectation)
Esempio n. 11
0
def test_expected_response_codegen(response_meta, family, args, expected,
                                   backend):
    formula = parse('y ~ 1')
    desc = makedesc(formula, metadata_from_cols([response_meta]), family, [],
                    {})

    def expected_response(*args):
        backend_args = [backend.from_numpy(arg) for arg in args]
        fn = backend.gen(desc).expected_response_fn
        return backend.to_numpy(fn(*backend_args))

    assert np.allclose(expected_response(*args), expected)
Esempio n. 12
0
def test_sampling_from_prior_smoke(N, backend, formula_str, non_real_cols,
                                   contrasts, family, priors, expected):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(
        cols
    )  # Use full metadata for same reason given in comment in codegen test.
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))
    model = backend.gen(desc)
    df = dummy_df(cols, N, allow_non_exhaustive=True)
    data = data_from_numpy(backend, makedata(formula, df, metadata, contrasts))
    samples = backend.prior(data, model, num_samples=10, seed=None)
    assert type(samples) == Samples
Esempio n. 13
0
def test_fitted_on_new_data(N2):
    S = 4
    N = 10
    formula_str = 'y ~ 1 + a'
    # Using this contrast means `a` is coded as two columns rather
    # than (the default) one. Because of this, it's crucial that
    # `fitted` uses the contrast when coding *new data*. This test
    # would fail if that didn't happen.
    contrasts = {'a': np.array([[-1, -1], [1, 1]])}
    cols = expand_columns(parse(formula_str), [Categorical('a', ['a0', 'a1'])])
    df = dummy_df(cols, N)
    fit = brm(formula_str, df, Normal,
              contrasts=contrasts).fit(iter=S, backend=pyro_backend)
    new_data = dummy_df(cols, N2, allow_non_exhaustive=True)
    arr = fit.fitted(data=new_data)
    assert np.all(np.isfinite(arr))
    assert arr.shape == (S, N2)
Esempio n. 14
0
def define_model(formula_str, metadata, family=None, priors=None, contrasts=None):
    assert type(formula_str) == str
    assert type(metadata) == Metadata
    assert family is None or type(family) == Family
    assert priors is None or type(priors) == list
    assert contrasts is None or type(contrasts) == dict

    family = family or Normal
    priors = priors or []
    contrasts = contrasts or {}

    # TODO: Consider accepting nested arrays as well as numpy arrays.
    # (If we do, convert to numpy arrays here in `define_model`?)
    assert all(type(val) == np.ndarray and len(val.shape) == 2 for val in contrasts.values())

    formula = parse(formula_str)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))
    return Model(formula, metadata, contrasts, desc)
Esempio n. 15
0
def test_parameter_shapes(formula_str, non_real_cols, contrasts, family,
                          priors, expected, fitargs):
    # Make dummy data.
    N = 5
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    df = dummy_df(cols, N)

    # Define model, and generate a single posterior sample.
    model = defm(formula_str, df, family, priors, contrasts)
    fit = model.fit(**fitargs)

    # Check parameter sizes.
    for parameter in parameters(model.desc):
        # Get the first (and only) sample.
        samples = get_param(fit, parameter.name)
        assert samples.shape[
            0] == 1  # Check the test spec. only generated one sample.
        p = samples[0]
        shape = p.shape
        expected_shape = parameter.shape
        assert shape == expected_shape
Esempio n. 16
0
def test_marginals_fitted_smoke(fitargs, formula_str, non_real_cols, family,
                                contrasts, N2):
    N = 10
    S = 4
    cols = expand_columns(parse(formula_str), non_real_cols)
    df = dummy_df(cols, N)
    print(df)
    fit = defm(formula_str, df, family, contrasts=contrasts).fit(**fitargs(S))

    def chk(arr, expected_shape):
        assert np.all(np.isfinite(arr))
        assert arr.shape == expected_shape

    num_coefs = len(scalar_parameter_names(fit.model_desc))
    chk(marginals(fit).array, (num_coefs, 7))  # num coefs x num stats
    chk(fitted(fit), (S, N))
    chk(fitted(fit, 'linear'), (S, N))
    chk(fitted(fit, 'response'), (S, N))
    chk(fitted(fit, 'sample'), (S, N))
    # Applying `fitted` to new data.
    df2 = dummy_df(cols, N2)
    print(df2)
    chk(fitted(fit, data=df2), (S, N2))
Esempio n. 17
0
def defm(formula_str, df, family=None, priors=None, contrasts=None):
    assert type(formula_str) == str
    assert type(df) == pd.DataFrame
    assert family is None or type(family) == Family
    assert priors is None or type(priors) == list
    assert contrasts is None or type(contrasts) == dict

    family = family or Normal
    priors = priors or []
    contrasts = contrasts or {}

    # TODO: Consider accepting nested arrays as well as numpy arrays.
    # (If we do, convert to numpy arrays here in `defm`?)
    assert all(
        type(val) == np.ndarray and len(val.shape) == 2
        for val in contrasts.values())

    formula = parse(formula_str)
    # Perhaps design matrices ought to always have metadata (i.e.
    # column names) associated with them, as in Patsy.
    metadata = metadata_from_df(df)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))
    data = makedata(formula, df, metadata, contrasts)
    return DefmResult(formula, metadata, contrasts, desc, data)
Esempio n. 18
0
def test_scalar_param_map_consistency():
    formula = parse('y ~ 1 + x1 + (1 + x2 + b | a) + (1 + x1 | a:b)')
    non_real_cols = [
        Categorical('a', ['a1', 'a2', 'a3']),
        Categorical('b', ['b1', 'b2', 'b3']),
    ]
    cols = expand_columns(formula, non_real_cols)
    desc = makedesc(formula, metadata_from_cols(cols), Normal, [], {})
    params = parameters(desc)
    spmap = scalar_parameter_map(desc)

    # Check that each entry in the map points to a unique parameter
    # position.
    param_and_indices_set = set(param_and_indices
                                for (_, param_and_indices) in spmap)
    assert len(param_and_indices_set) == len(spmap)

    # Ensure that we have enough entries in the map to cover all of
    # the scalar parameters. (The L_i parameters have a funny status.
    # We consider them to be parameters, but not scalar parameters.
    # This is not planned, rather things just evolved this way. It
    # does makes some sense though, since we usually look at R_i
    # instead.)
    num_scalar_params = sum(
        np.product(shape) for name, shape in params
        if not name.startswith('L_'))
    assert num_scalar_params == len(spmap)

    # Check that all indices are valid. (i.e. Within the shape of the
    # parameter.)
    for scalar_param_name, (param_name, indices) in spmap:
        ss = [shape for (name, shape) in params if name == param_name]
        assert len(ss) == 1
        param_shape = ss[0]
        assert len(indices) == len(param_shape)
        assert all(i < s for (i, s) in zip(indices, param_shape))
Esempio n. 19
0
def test_coef_names(formula_str, non_real_cols, expected_names):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    assert coef_names(formula.terms, metadata, {}) == expected_names
Esempio n. 20
0
def test_coding(formula_str, non_real_cols, expected_coding):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    assert code_terms(formula.terms, metadata) == expected_coding
Esempio n. 21
0
def test_parser(formula_str, expected_formula):
    formula = parse(formula_str)
    assert formula == expected_formula