def test_term_init(diabetes_data): design = design_matrices("BMI", diabetes_data) term_info = design.common.terms_info["BMI"] term = Term("BMI", term_info, diabetes_data["BMI"]) # Test that all defaults are properly initialized assert term.name == "BMI" assert not term.categorical assert not term.group_specific assert term.levels is not None assert term.data.shape == (442,)
def __init__( self, formula=None, data=None, family="gaussian", priors=None, link=None, categorical=None, dropna=False, auto_scale=True, automatic_priors="default", noncentered=True, priors_cor=None, taylor=None, ): # attributes that are set later self.terms = {} self.built = False # build() self._backend_name = None # build() will loop over this, calling _set_priors() self._added_priors = {} self._design = None self.formula = None self.response = None # _add_response() self.family = None # _add_response() self.backend = None # _set_backend() self.priors_cor = {} # _add_priors_cor() self.auto_scale = auto_scale self.dropna = dropna self.taylor = taylor self.noncentered = noncentered # Read and clean data if isinstance(data, str): data = pd.read_csv(data, sep=None, engine="python") elif not isinstance(data, pd.DataFrame): raise ValueError( "data must be a string with a path to a .csv or a pandas DataFrame." ) # To avoid SettingWithCopyWarning when converting object columns to category data._is_copy = False # Object columns converted to category by default. obj_cols = data.select_dtypes(["object"]).columns data[obj_cols] = data[obj_cols].apply(lambda x: x.astype("category")) # Explicitly convert columns to category if desired--though this # can also be done within the formula using C(). if categorical is not None: data = data.copy() cats = listify(categorical) data[cats] = data[cats].apply(lambda x: x.astype("category")) self.data = data # Handle priors if priors is None: priors = {} else: priors = deepcopy(priors) self.automatic_priors = automatic_priors # Obtain design matrices and related objects. na_action = "drop" if dropna else "error" if formula is not None: self.formula = formula self._design = design_matrices(formula, data, na_action, eval_env=1) else: raise ValueError( "Can't instantiate a model without a model formula.") if self._design.response is not None: family_prior = extract_family_prior(family, priors) if family_prior and self._design.common: conflict = [ name for name in family_prior if name in self._design.common.terms_info ] if conflict: raise ValueError( f"The prior name for {', '.join(conflict)} conflicts with the name of a " "parameter in the response distribution.\n" "Please rename the term(s) to prevent an unexpected behaviour." ) self._add_response(self._design.response, family, link, family_prior) else: raise ValueError( "No outcome variable is set! " "Please specify an outcome variable using the formula interface." ) if self._design.common: self._add_common(self._design.common, priors) if self._design.group: self._add_group_specific(self._design.group, priors) if priors_cor: self._add_priors_cor(priors_cor) # Build priors self._build_priors()
def test_parser_invalid_assignment_target(): f = lambda x: x data = pd.DataFrame({"y": [1, 2], "x": [1, 2]}) with pytest.raises(ParseError, match="Invalid assignment target."): design_matrices("y ~ f(1=x)", data)
def test_unclosed_function_call(): f = lambda x: x data = pd.DataFrame({"y": [1, 2], "x": [1, 2]}) with pytest.raises(ParseError, match="after arguments"): design_matrices("y ~ f(x", data)
def fit( self, formula=None, priors=None, family="gaussian", link=None, run=True, categorical=None, omit_offsets=True, backend="pymc", **kwargs, ): """Fit the model using the specified backend. Parameters ---------- formula : str A model description written in model formula language. priors : dict Optional specification of priors for one or more terms. A dictionary where the keys are the names of terms in the model, 'common' or 'group_specific' and the values are either instances of class ``Prior`` or ``int``, ``float``, or ``str`` that specify the width of the priors on a standardized scale. family : str or Family A specification of the model family (analogous to the family object in R). Either a string, or an instance of class ``priors.Family``. If a string is passed, a family with the corresponding name must be defined in the defaults loaded at ``Model`` initialization.Valid pre-defined families are ``'gaussian'``, ``'bernoulli'``, ``'poisson'``, ``'gama'``, ``'wald'``, and ``'negativebinomial'``. Defaults to ``'gaussian'``. link : str The model link function to use. Can be either a string (must be one of the options defined in the current backend; typically this will include at least ``'identity'``, ``'logit'``, ``'inverse'``, and ``'log'``), or a callable that takes a 1D ndarray or theano tensor as the sole argument and returns one with the same shape. run : bool Whether or not to immediately begin fitting the model once any set up of passed arguments is complete. Defaults to ``True``. categorical : str or list The names of any variables to treat as categorical. Can be either a single variable name, or a list of names. If categorical is ``None``, the data type of the columns in the ``DataFrame`` will be used to infer handling. In cases where numeric columns are to be treated as categoricals (e.g., group specific factors coded as numerical IDs), explicitly passing variable names via this argument is recommended. omit_offsets: bool Omits offset terms in the ``InferenceData`` object when the model includes group specific effects. Defaults to ``True``. backend : str The name of the backend to use. Currently only ``'pymc'`` backend is supported. """ if priors is None: priors = {} else: priors = deepcopy(priors) data = self.data # alter this pandas flag to avoid false positive SettingWithCopyWarnings data._is_copy = False # pylint: disable=protected-access # Explicitly convert columns to category if desired--though this # can also be done within the formula using C(). if categorical is not None: data = data.copy() cats = listify(categorical) data[cats] = data[cats].apply(lambda x: x.astype("category")) na_action = "drop" if self.dropna else "error" if formula is not None: # Only reset self.terms and self.response (e.g., keep priors) self.terms = OrderedDict() self.response = None self._design = design_matrices(formula, data, na_action, eval_env=1) else: if self._design is None: raise ValueError( "Can't fit a model without a description of the model.") if self._design.response is not None: _family = family.name if isinstance(family, Family) else family self._add_response(self._design.response, family=_family, link=link) if self._design.common: self._add_common(self._design.common, priors) if self._design.group: self._add_group_specific(self._design.group, priors) if backend is None: backend = "pymc" if self._backend_name is None else self._backend_name if run: if not self.built or backend != self._backend_name: self._build(backend) return self.backend.run(omit_offsets=omit_offsets, **kwargs) self._backend_name = backend return None