Example #1
0
def test_term_init(diabetes_data):
    design = design_matrices("BMI", diabetes_data)
    term_info = design.common.terms_info["BMI"]
    term = Term("BMI", term_info, diabetes_data["BMI"])
    # Test that all defaults are properly initialized
    assert term.name == "BMI"
    assert not term.categorical
    assert not term.group_specific
    assert term.levels is not None
    assert term.data.shape == (442,)
Example #2
0
    def __init__(
        self,
        formula=None,
        data=None,
        family="gaussian",
        priors=None,
        link=None,
        categorical=None,
        dropna=False,
        auto_scale=True,
        automatic_priors="default",
        noncentered=True,
        priors_cor=None,
        taylor=None,
    ):
        # attributes that are set later
        self.terms = {}
        self.built = False  # build()
        self._backend_name = None

        # build() will loop over this, calling _set_priors()
        self._added_priors = {}

        self._design = None
        self.formula = None
        self.response = None  # _add_response()
        self.family = None  # _add_response()
        self.backend = None  # _set_backend()
        self.priors_cor = {}  # _add_priors_cor()

        self.auto_scale = auto_scale
        self.dropna = dropna
        self.taylor = taylor
        self.noncentered = noncentered

        # Read and clean data
        if isinstance(data, str):
            data = pd.read_csv(data, sep=None, engine="python")
        elif not isinstance(data, pd.DataFrame):
            raise ValueError(
                "data must be a string with a path to a .csv or a pandas DataFrame."
            )

        # To avoid SettingWithCopyWarning when converting object columns to category
        data._is_copy = False

        # Object columns converted to category by default.
        obj_cols = data.select_dtypes(["object"]).columns
        data[obj_cols] = data[obj_cols].apply(lambda x: x.astype("category"))

        # Explicitly convert columns to category if desired--though this
        # can also be done within the formula using C().
        if categorical is not None:
            data = data.copy()
            cats = listify(categorical)
            data[cats] = data[cats].apply(lambda x: x.astype("category"))

        self.data = data

        # Handle priors
        if priors is None:
            priors = {}
        else:
            priors = deepcopy(priors)

        self.automatic_priors = automatic_priors

        # Obtain design matrices and related objects.
        na_action = "drop" if dropna else "error"
        if formula is not None:
            self.formula = formula
            self._design = design_matrices(formula,
                                           data,
                                           na_action,
                                           eval_env=1)
        else:
            raise ValueError(
                "Can't instantiate a model without a model formula.")

        if self._design.response is not None:
            family_prior = extract_family_prior(family, priors)
            if family_prior and self._design.common:
                conflict = [
                    name for name in family_prior
                    if name in self._design.common.terms_info
                ]
                if conflict:
                    raise ValueError(
                        f"The prior name for {', '.join(conflict)} conflicts with the name of a "
                        "parameter in the response distribution.\n"
                        "Please rename the term(s) to prevent an unexpected behaviour."
                    )
            self._add_response(self._design.response, family, link,
                               family_prior)
        else:
            raise ValueError(
                "No outcome variable is set! "
                "Please specify an outcome variable using the formula interface."
            )

        if self._design.common:
            self._add_common(self._design.common, priors)

        if self._design.group:
            self._add_group_specific(self._design.group, priors)

        if priors_cor:
            self._add_priors_cor(priors_cor)

        # Build priors
        self._build_priors()
Example #3
0
def test_parser_invalid_assignment_target():
    f = lambda x: x
    data = pd.DataFrame({"y": [1, 2], "x": [1, 2]})
    with pytest.raises(ParseError, match="Invalid assignment target."):
        design_matrices("y ~ f(1=x)", data)
Example #4
0
def test_unclosed_function_call():
    f = lambda x: x
    data = pd.DataFrame({"y": [1, 2], "x": [1, 2]})
    with pytest.raises(ParseError, match="after arguments"):
        design_matrices("y ~ f(x", data)
Example #5
0
    def fit(
        self,
        formula=None,
        priors=None,
        family="gaussian",
        link=None,
        run=True,
        categorical=None,
        omit_offsets=True,
        backend="pymc",
        **kwargs,
    ):
        """Fit the model using the specified backend.

        Parameters
        ----------
        formula : str
            A model description written in model formula language.
        priors : dict
            Optional specification of priors for one or more terms. A dictionary where the keys are
            the names of terms in the model, 'common' or 'group_specific' and the values are either
            instances of class ``Prior`` or ``int``, ``float``, or ``str`` that specify the
            width of the priors on a standardized scale.
        family : str or Family
            A specification of the model family (analogous to the family object in R). Either
            a string, or an instance of class ``priors.Family``. If a string is passed, a family
            with the corresponding name must be defined in the defaults loaded at ``Model``
            initialization.Valid pre-defined families are ``'gaussian'``, ``'bernoulli'``,
            ``'poisson'``, ``'gama'``, ``'wald'``, and ``'negativebinomial'``.
            Defaults to ``'gaussian'``.
        link : str
            The model link function to use. Can be either a string (must be one of the options
            defined in the current backend; typically this will include at least ``'identity'``,
            ``'logit'``, ``'inverse'``, and ``'log'``), or a callable that takes a 1D ndarray or
            theano tensor as the sole argument and returns one with the same shape.
        run : bool
            Whether or not to immediately begin fitting the model once any set up of passed
            arguments is complete. Defaults to ``True``.
        categorical : str or list
            The names of any variables to treat as categorical. Can be either a single variable
            name, or a list of names. If categorical is ``None``, the data type of the columns in
            the ``DataFrame`` will be used to infer handling. In cases where numeric columns are
            to be treated as categoricals (e.g., group specific factors coded as numerical IDs),
            explicitly passing variable names via this argument is recommended.
        omit_offsets: bool
            Omits offset terms in the ``InferenceData`` object when the model includes group
            specific effects. Defaults to ``True``.
        backend : str
            The name of the backend to use. Currently only ``'pymc'`` backend is supported.
        """

        if priors is None:
            priors = {}
        else:
            priors = deepcopy(priors)

        data = self.data
        # alter this pandas flag to avoid false positive SettingWithCopyWarnings
        data._is_copy = False  # pylint: disable=protected-access

        # Explicitly convert columns to category if desired--though this
        # can also be done within the formula using C().
        if categorical is not None:
            data = data.copy()
            cats = listify(categorical)
            data[cats] = data[cats].apply(lambda x: x.astype("category"))

        na_action = "drop" if self.dropna else "error"
        if formula is not None:
            # Only reset self.terms and self.response (e.g., keep priors)
            self.terms = OrderedDict()
            self.response = None
            self._design = design_matrices(formula,
                                           data,
                                           na_action,
                                           eval_env=1)
        else:
            if self._design is None:
                raise ValueError(
                    "Can't fit a model without a description of the model.")

        if self._design.response is not None:
            _family = family.name if isinstance(family, Family) else family
            self._add_response(self._design.response,
                               family=_family,
                               link=link)

        if self._design.common:
            self._add_common(self._design.common, priors)

        if self._design.group:
            self._add_group_specific(self._design.group, priors)

        if backend is None:
            backend = "pymc" if self._backend_name is None else self._backend_name

        if run:
            if not self.built or backend != self._backend_name:
                self._build(backend)
            return self.backend.run(omit_offsets=omit_offsets, **kwargs)

        self._backend_name = backend
        return None