Exemple #1
0
def test_designmatrix(formula_str, df, metadata_cols, contrasts, expected):
    metadata = metadata_from_cols(
        metadata_cols) if metadata_cols is not None else metadata_from_df(df)
    data = makedata(parse(formula_str), df, metadata, contrasts)
    assert set(data.keys()) == set(expected.keys())
    for k in expected.keys():
        assert data[k].dtype == expected[k].dtype
        assert_equal(data[k], expected[k])
Exemple #2
0
def brm(formula_str, df, family=None, priors=None, contrasts=None):
    """
    Defines a model and encodes data in design matrices.

    By default categorical columns are coded using dummy coding.

    :param formula_str: An lme4 formula. e.g. ``'y ~ 1 + x'``. See
                        :class:`~brmp.formula.Formula` for a description
                        of the supported syntax.
    :type formula_str: str
    :param df: A data frame containing columns for each of the variables in
               ``formula_str``.
    :type df: pandas.DataFrame
    :param family: The model's response family.
    :type family: brmp.family.Family
    :param priors: A list of :class:`~brmp.priors.Prior` instances describing the model's priors.
    :type priors: list
    :param contrasts: A dictionary that optionally maps variable names to contrast matrices describing
                      custom encodings of categorical variables. Each contrast matrix should be
                      a :class:`~numpy.ndarray` of shape ``(L, C)``, where ``L`` is the number of levels
                      present in the categorical variable and ``C`` is the length of the desired
                      encoding.
    :type contrasts: dict
    :return: A wrapper around the model description and the design matrices.
    :rtype: brmp.ModelAndData

    Example::

      df = pd.DataFrame({'y': [1., 2.], 'x': [.5, 0.]})
      model = brm('y ~ 1 + x', df)

    """
    assert type(formula_str) == str
    assert type(df) == pd.DataFrame
    assert family is None or type(family) == Family
    assert priors is None or type(priors) == list
    assert contrasts is None or type(contrasts) == dict
    metadata = metadata_from_df(df)
    model = define_model(formula_str, metadata, family, priors, contrasts)
    data = model.encode(df)
    return ModelAndData(model, df, data)
Exemple #3
0
def defm(formula_str, df, family=None, priors=None, contrasts=None):
    assert type(formula_str) == str
    assert type(df) == pd.DataFrame
    assert family is None or type(family) == Family
    assert priors is None or type(priors) == list
    assert contrasts is None or type(contrasts) == dict

    family = family or Normal
    priors = priors or []
    contrasts = contrasts or {}

    # TODO: Consider accepting nested arrays as well as numpy arrays.
    # (If we do, convert to numpy arrays here in `defm`?)
    assert all(
        type(val) == np.ndarray and len(val.shape) == 2
        for val in contrasts.values())

    formula = parse(formula_str)
    # Perhaps design matrices ought to always have metadata (i.e.
    # column names) associated with them, as in Patsy.
    metadata = metadata_from_df(df)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))
    data = makedata(formula, df, metadata, contrasts)
    return DefmResult(formula, metadata, contrasts, desc, data)