Ejemplo n.º 1
0
def test_family_and_response_type_checks(formula_str, non_real_cols, family,
                                         priors):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    with pytest.raises(Exception, match='not compatible'):
        build_model_pre(formula, metadata, family, {})
Ejemplo n.º 2
0
def test_parameter_shapes(formula_str, non_real_cols, contrasts, family,
                          priors, expected, fitargs):
    # Make dummy data.
    N = 5
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    df = dummy_df(cols, N, allow_non_exhaustive=True)

    # Define model, and generate a single posterior sample.
    metadata = metadata_from_cols(cols)
    model = define_model(formula_str, metadata, family, priors,
                         contrasts).gen(fitargs['backend'])
    data = model.encode(df)
    fit = model.run_algo('prior', data, num_samples=1, seed=None)

    num_chains = fitargs.get('num_chains', 1)

    # Check parameter sizes.
    for parameter in parameters(fit.model_desc):
        expected_param_shape = parameter.shape
        samples = fit.get_param(parameter.name)
        # A single sample is collected by each chain for all cases.
        assert samples.shape == (num_chains, ) + expected_param_shape
        samples_with_chain_dim = fit.get_param(parameter.name, True)
        assert samples_with_chain_dim.shape == (num_chains,
                                                1) + expected_param_shape
Ejemplo n.º 3
0
def test_numpyro_codegen(N, formula_str, non_real_cols, contrasts, family,
                         priors, expected):
    # Make dummy data.
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))

    # Generate model function and data.
    modelfn = numpyro_backend.gen(desc).fn

    df = dummy_df(cols, N)
    data = data_from_numpy(numpyro_backend,
                           makedata(formula, df, metadata, contrasts))

    # Check sample sites.
    rng = random.PRNGKey(0)
    trace = numpyro.trace(numpyro.seed(modelfn, rng)).get_trace(**data)
    expected_sites = [site for (site, _, _) in expected]
    sample_sites = [
        name for name, node in trace.items() if not node['is_observed']
    ]
    assert set(sample_sites) == set(expected_sites)
    for (site, family_name, maybe_params) in expected:
        numpyro_family_name = dict(LKJ='LKJCholesky').get(
            family_name, family_name)
        fn = trace[site]['fn']
        params = maybe_params or default_params[family_name]
        assert type(fn).__name__ == numpyro_family_name
        for (name, expected_val) in params.items():
            if family_name == 'LKJ':
                assert name == 'eta'
                name = 'concentration'
            val = fn.__getattribute__(name)
            assert_equal(val._value, np.broadcast_to(expected_val, val.shape))
Ejemplo n.º 4
0
def test_pyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors,
                      expected):
    # Make dummy data.
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    # Generate the model from the column information rather than from
    # the metadata extracted from `df`. Since N is small, the metadata
    # extracted from `df` might loose information compared to the full
    # metadata derived from `cols` (e.g. levels of a categorical
    # column) leading to unexpected results. e.g. Missing levels might
    # cause correlations not to be modelled, even thought they ought
    # to be given the full metadata.
    metadata = metadata_from_cols(cols)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))

    # Generate model function and data.
    modelfn = pyro_backend.gen(desc).fn

    df = dummy_df(cols, N)
    data = data_from_numpy(pyro_backend,
                           makedata(formula, df, metadata, contrasts))

    # Check sample sites.
    trace = poutine.trace(modelfn).get_trace(**data)
    expected_sites = [site for (site, _, _) in expected]
    assert set(trace.stochastic_nodes) - {'obs'} == set(expected_sites)
    for (site, family_name, maybe_params) in expected:
        pyro_family_name = dict(LKJ='LKJCorrCholesky').get(
            family_name, family_name)
        fn = unwrapfn(trace.nodes[site]['fn'])
        params = maybe_params or default_params[family_name]
        assert type(fn).__name__ == pyro_family_name
        for (name, expected_val) in params.items():
            val = fn.__getattribute__(name)
            assert_equal(val, torch.tensor(expected_val).expand(val.shape))
Ejemplo n.º 5
0
def test_designmatrix(formula_str, df, metadata_cols, contrasts, expected):
    metadata = metadata_from_cols(
        metadata_cols) if metadata_cols is not None else metadata_from_df(df)
    data = makedata(parse(formula_str), df, metadata, contrasts)
    assert set(data.keys()) == set(expected.keys())
    for k in expected.keys():
        assert data[k].dtype == expected[k].dtype
        assert_equal(data[k], expected[k])
Ejemplo n.º 6
0
def test_prior_checks(formula_str, non_real_cols, family, priors,
                      expected_error):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    design_metadata = build_model_pre(formula, metadata, family, {})
    with pytest.raises(Exception, match=expected_error):
        build_prior_tree(design_metadata, priors)
Ejemplo n.º 7
0
def test_scalar_parameter_names_smoke(formula_str, non_real_cols, contrasts,
                                      family, priors, expected):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    model = define_model(formula_str, metadata, family, priors, contrasts)
    names = scalar_parameter_names(model.desc)
    assert type(names) == list
Ejemplo n.º 8
0
def test_expected_response_codegen(response_meta, family, args, expected,
                                   backend):
    formula = parse('y ~ 1')
    desc = makedesc(formula, metadata_from_cols([response_meta]), family, [],
                    {})

    def expected_response(*args):
        backend_args = [backend.from_numpy(arg) for arg in args]
        fn = backend.gen(desc).expected_response_fn
        return backend.to_numpy(fn(*backend_args))

    assert np.allclose(expected_response(*args), expected)
Ejemplo n.º 9
0
def test_sampling_from_prior_smoke(N, backend, formula_str, non_real_cols,
                                   contrasts, family, priors, expected):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(
        cols
    )  # Use full metadata for same reason given in comment in codegen test.
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))
    model = backend.gen(desc)
    df = dummy_df(cols, N, allow_non_exhaustive=True)
    data = data_from_numpy(backend, makedata(formula, df, metadata, contrasts))
    samples = backend.prior(data, model, num_samples=10, seed=None)
    assert type(samples) == Samples
Ejemplo n.º 10
0
def test_scalar_param_map_consistency():
    formula = parse('y ~ 1 + x1 + (1 + x2 + b | a) + (1 + x1 | a:b)')
    non_real_cols = [
        Categorical('a', ['a1', 'a2', 'a3']),
        Categorical('b', ['b1', 'b2', 'b3']),
    ]
    cols = expand_columns(formula, non_real_cols)
    desc = makedesc(formula, metadata_from_cols(cols), Normal, [], {})
    params = parameters(desc)
    spmap = scalar_parameter_map(desc)

    # Check that each entry in the map points to a unique parameter
    # position.
    param_and_indices_set = set(param_and_indices
                                for (_, param_and_indices) in spmap)
    assert len(param_and_indices_set) == len(spmap)

    # Ensure that we have enough entries in the map to cover all of
    # the scalar parameters. (The L_i parameters have a funny status.
    # We consider them to be parameters, but not scalar parameters.
    # This is not planned, rather things just evolved this way. It
    # does makes some sense though, since we usually look at R_i
    # instead.)
    num_scalar_params = sum(
        np.product(shape) for name, shape in params
        if not name.startswith('L_'))
    assert num_scalar_params == len(spmap)

    # Check that all indices are valid. (i.e. Within the shape of the
    # parameter.)
    for scalar_param_name, (param_name, indices) in spmap:
        ss = [shape for (name, shape) in params if name == param_name]
        assert len(ss) == 1
        param_shape = ss[0]
        assert len(indices) == len(param_shape)
        assert all(i < s for (i, s) in zip(indices, param_shape))
Ejemplo n.º 11
0
def test_coef_names(formula_str, non_real_cols, expected_names):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    assert coef_names(formula.terms, metadata, {}) == expected_names
Ejemplo n.º 12
0
def test_coding(formula_str, non_real_cols, expected_coding):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    assert code_terms(formula.terms, metadata) == expected_coding