Beispiel #1
0
def test_marginals_fitted_smoke(fitargs, formula_str, non_real_cols, family,
                                contrasts):
    N = 10
    S = 4
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    df = dummy_df(cols, N)
    model = brm(formula_str, df, family, [], contrasts)
    fit = model.fit(**fitargs(S))
    # Sanity check output for `marginals`.
    arr = fit.marginals().array
    num_coefs = len(scalar_parameter_names(fit.model_desc))
    assert arr.shape == (num_coefs, 9)  # num coefs x num stats
    # Don't check finiteness of n_eff and r_hat, which are frequently
    # nan with few samples
    assert np.all(np.isfinite(arr[:, :-2]))

    # Sanity check output of `fitted`.
    def chk(arr, expected_shape):
        assert np.all(np.isfinite(arr))
        assert arr.shape == expected_shape

    chk(fit.fitted(), (S, N))
    chk(fit.fitted('linear'), (S, N))
    chk(fit.fitted('response'), (S, N))
    chk(fit.fitted('sample'), (S, N))
    chk(fit.fitted(data=dummy_df(cols, N)), (S, N))
Beispiel #2
0
def test_numpyro_codegen(N, formula_str, non_real_cols, contrasts, family,
                         priors, expected):
    # Make dummy data.
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(cols)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))

    # Generate model function and data.
    modelfn = numpyro_backend.gen(desc).fn

    df = dummy_df(cols, N)
    data = data_from_numpy(numpyro_backend,
                           makedata(formula, df, metadata, contrasts))

    # Check sample sites.
    rng = random.PRNGKey(0)
    trace = numpyro.trace(numpyro.seed(modelfn, rng)).get_trace(**data)
    expected_sites = [site for (site, _, _) in expected]
    sample_sites = [
        name for name, node in trace.items() if not node['is_observed']
    ]
    assert set(sample_sites) == set(expected_sites)
    for (site, family_name, maybe_params) in expected:
        numpyro_family_name = dict(LKJ='LKJCholesky').get(
            family_name, family_name)
        fn = trace[site]['fn']
        params = maybe_params or default_params[family_name]
        assert type(fn).__name__ == numpyro_family_name
        for (name, expected_val) in params.items():
            if family_name == 'LKJ':
                assert name == 'eta'
                name = 'concentration'
            val = fn.__getattribute__(name)
            assert_equal(val._value, np.broadcast_to(expected_val, val.shape))
Beispiel #3
0
def test_parameter_shapes(formula_str, non_real_cols, contrasts, family,
                          priors, expected, fitargs):
    # Make dummy data.
    N = 5
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    df = dummy_df(cols, N, allow_non_exhaustive=True)

    # Define model, and generate a single posterior sample.
    metadata = metadata_from_cols(cols)
    model = define_model(formula_str, metadata, family, priors,
                         contrasts).gen(fitargs['backend'])
    data = model.encode(df)
    fit = model.run_algo('prior', data, num_samples=1, seed=None)

    num_chains = fitargs.get('num_chains', 1)

    # Check parameter sizes.
    for parameter in parameters(fit.model_desc):
        expected_param_shape = parameter.shape
        samples = fit.get_param(parameter.name)
        # A single sample is collected by each chain for all cases.
        assert samples.shape == (num_chains, ) + expected_param_shape
        samples_with_chain_dim = fit.get_param(parameter.name, True)
        assert samples_with_chain_dim.shape == (num_chains,
                                                1) + expected_param_shape
Beispiel #4
0
def test_pyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors,
                      expected):
    # Make dummy data.
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    # Generate the model from the column information rather than from
    # the metadata extracted from `df`. Since N is small, the metadata
    # extracted from `df` might loose information compared to the full
    # metadata derived from `cols` (e.g. levels of a categorical
    # column) leading to unexpected results. e.g. Missing levels might
    # cause correlations not to be modelled, even thought they ought
    # to be given the full metadata.
    metadata = metadata_from_cols(cols)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))

    # Generate model function and data.
    modelfn = pyro_backend.gen(desc).fn

    df = dummy_df(cols, N)
    data = data_from_numpy(pyro_backend,
                           makedata(formula, df, metadata, contrasts))

    # Check sample sites.
    trace = poutine.trace(modelfn).get_trace(**data)
    expected_sites = [site for (site, _, _) in expected]
    assert set(trace.stochastic_nodes) - {'obs'} == set(expected_sites)
    for (site, family_name, maybe_params) in expected:
        pyro_family_name = dict(LKJ='LKJCorrCholesky').get(
            family_name, family_name)
        fn = unwrapfn(trace.nodes[site]['fn'])
        params = maybe_params or default_params[family_name]
        assert type(fn).__name__ == pyro_family_name
        for (name, expected_val) in params.items():
            val = fn.__getattribute__(name)
            assert_equal(val, torch.tensor(expected_val).expand(val.shape))
Beispiel #5
0
def test_mu_correctness(formula_str, cols, backend, expected):
    df = dummy_df(expand_columns(parse(formula_str), cols), 10)
    fit = brm(formula_str, df).prior(num_samples=1, backend=backend)
    # Pick out the one (and only) sample drawn.
    actual_mu = fit.fitted(what='linear')[0]
    # `expected` is assumed to return a data frame.
    expected_mu = expected(df, fit.get_scalar_param).to_numpy(np.float32)
    assert np.allclose(actual_mu, expected_mu)
Beispiel #6
0
def test_fitted_on_new_data(N2):
    S = 4
    N = 10
    formula_str = 'y ~ 1 + a'
    # Using this contrast means `a` is coded as two columns rather
    # than (the default) one. Because of this, it's crucial that
    # `fitted` uses the contrast when coding *new data*. This test
    # would fail if that didn't happen.
    contrasts = {'a': np.array([[-1, -1], [1, 1]])}
    cols = expand_columns(parse(formula_str), [Categorical('a', ['a0', 'a1'])])
    df = dummy_df(cols, N)
    fit = brm(formula_str, df, Normal,
              contrasts=contrasts).fit(iter=S, backend=pyro_backend)
    new_data = dummy_df(cols, N2, allow_non_exhaustive=True)
    arr = fit.fitted(data=new_data)
    assert np.all(np.isfinite(arr))
    assert arr.shape == (S, N2)
Beispiel #7
0
def test_expectation_correctness(cols, family, expected, backend):
    formula_str = 'y ~ 1 + x'
    df = dummy_df(expand_columns(parse(formula_str), cols), 10)
    fit = brm(formula_str, df, family=family).prior(num_samples=1,
                                                    backend=backend)
    actual_expectation = fit.fitted(what='expectation')[0]
    # We assume (since it's tested elsewhere) that `mu` is computed
    # correctly by `fitted`. So given that, we check that `fitted`
    # computes the correct expectation.
    expected_expectation = expected(fit.fitted('linear')[0])
    assert np.allclose(actual_expectation, expected_expectation)
Beispiel #8
0
def test_sampling_from_prior_smoke(N, backend, formula_str, non_real_cols,
                                   contrasts, family, priors, expected):
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    metadata = metadata_from_cols(
        cols
    )  # Use full metadata for same reason given in comment in codegen test.
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))
    model = backend.gen(desc)
    df = dummy_df(cols, N, allow_non_exhaustive=True)
    data = data_from_numpy(backend, makedata(formula, df, metadata, contrasts))
    samples = backend.prior(data, model, num_samples=10, seed=None)
    assert type(samples) == Samples
Beispiel #9
0
def test_marginals_fitted_smoke(fitargs, formula_str, non_real_cols, family,
                                contrasts, N2):
    N = 10
    S = 4
    cols = expand_columns(parse(formula_str), non_real_cols)
    df = dummy_df(cols, N)
    print(df)
    fit = defm(formula_str, df, family, contrasts=contrasts).fit(**fitargs(S))

    def chk(arr, expected_shape):
        assert np.all(np.isfinite(arr))
        assert arr.shape == expected_shape

    num_coefs = len(scalar_parameter_names(fit.model_desc))
    chk(marginals(fit).array, (num_coefs, 7))  # num coefs x num stats
    chk(fitted(fit), (S, N))
    chk(fitted(fit, 'linear'), (S, N))
    chk(fitted(fit, 'response'), (S, N))
    chk(fitted(fit, 'sample'), (S, N))
    # Applying `fitted` to new data.
    df2 = dummy_df(cols, N2)
    print(df2)
    chk(fitted(fit, data=df2), (S, N2))
Beispiel #10
0
def test_parameter_shapes(formula_str, non_real_cols, contrasts, family,
                          priors, expected, fitargs):
    # Make dummy data.
    N = 5
    formula = parse(formula_str)
    cols = expand_columns(formula, non_real_cols)
    df = dummy_df(cols, N)

    # Define model, and generate a single posterior sample.
    model = defm(formula_str, df, family, priors, contrasts)
    fit = model.fit(**fitargs)

    # Check parameter sizes.
    for parameter in parameters(model.desc):
        # Get the first (and only) sample.
        samples = get_param(fit, parameter.name)
        assert samples.shape[
            0] == 1  # Check the test spec. only generated one sample.
        p = samples[0]
        shape = p.shape
        expected_shape = parameter.shape
        assert shape == expected_shape