def test_designmatrix(formula_str, df, metadata_cols, contrasts, expected): metadata = metadata_from_cols( metadata_cols) if metadata_cols is not None else metadata_from_df(df) data = makedata(parse(formula_str), df, metadata, contrasts) assert set(data.keys()) == set(expected.keys()) for k in expected.keys(): assert data[k].dtype == expected[k].dtype assert_equal(data[k], expected[k])
def brm(formula_str, df, family=None, priors=None, contrasts=None): """ Defines a model and encodes data in design matrices. By default categorical columns are coded using dummy coding. :param formula_str: An lme4 formula. e.g. ``'y ~ 1 + x'``. See :class:`~brmp.formula.Formula` for a description of the supported syntax. :type formula_str: str :param df: A data frame containing columns for each of the variables in ``formula_str``. :type df: pandas.DataFrame :param family: The model's response family. :type family: brmp.family.Family :param priors: A list of :class:`~brmp.priors.Prior` instances describing the model's priors. :type priors: list :param contrasts: A dictionary that optionally maps variable names to contrast matrices describing custom encodings of categorical variables. Each contrast matrix should be a :class:`~numpy.ndarray` of shape ``(L, C)``, where ``L`` is the number of levels present in the categorical variable and ``C`` is the length of the desired encoding. :type contrasts: dict :return: A wrapper around the model description and the design matrices. :rtype: brmp.ModelAndData Example:: df = pd.DataFrame({'y': [1., 2.], 'x': [.5, 0.]}) model = brm('y ~ 1 + x', df) """ assert type(formula_str) == str assert type(df) == pd.DataFrame assert family is None or type(family) == Family assert priors is None or type(priors) == list assert contrasts is None or type(contrasts) == dict metadata = metadata_from_df(df) model = define_model(formula_str, metadata, family, priors, contrasts) data = model.encode(df) return ModelAndData(model, df, data)
def defm(formula_str, df, family=None, priors=None, contrasts=None): assert type(formula_str) == str assert type(df) == pd.DataFrame assert family is None or type(family) == Family assert priors is None or type(priors) == list assert contrasts is None or type(contrasts) == dict family = family or Normal priors = priors or [] contrasts = contrasts or {} # TODO: Consider accepting nested arrays as well as numpy arrays. # (If we do, convert to numpy arrays here in `defm`?) assert all( type(val) == np.ndarray and len(val.shape) == 2 for val in contrasts.values()) formula = parse(formula_str) # Perhaps design matrices ought to always have metadata (i.e. # column names) associated with them, as in Patsy. metadata = metadata_from_df(df) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) data = makedata(formula, df, metadata, contrasts) return DefmResult(formula, metadata, contrasts, desc, data)