Ejemplo n.º 1
0
    def _first_time_fit(
        self,
        use_cache: bool,
        absorb_options: Optional[Dict[str, Union[bool, str, ArrayLike, None,
                                                 Dict[str, Any]]]],
        method: str,
    ) -> None:
        weights = (cast(Float64Array, self.weights.ndarray)
                   if self._is_weighted else None)

        use_hdfe = weights is None and method in ("auto", "hdfe")
        use_hdfe = use_hdfe and not self._absorb_inter.cont.shape[1]
        use_hdfe = use_hdfe and not self._interaction_list

        if not use_hdfe and method == "hdfe":
            raise RuntimeError(
                "HDFE has been set as the method but the model cannot be estimated "
                "using HDFE. HDFE requires that the model is unweighted and that the "
                "absorbed regressors include only fixed effects (dummy variables)."
            )
        areg = AbsorbingRegressor(
            cat=self._absorb_inter.cat,
            cont=self._absorb_inter.cont,
            interactions=self._interaction_list,
            weights=weights,
        )
        areg_constant = areg.has_constant
        self._regressors = areg.regressors
        self._num_params += areg.approx_rank
        # Do not double count intercept-like terms
        self._has_constant = self._has_constant_exog or areg_constant
        self._num_params -= min(self._has_constant_exog, areg_constant)
        self._regressors_hash = areg.hash
        self._constant_absorbed = self._has_constant_exog and areg_constant

        dep = self._dependent.ndarray
        exog = cast(Float64Array, self._exog.ndarray)

        root_w = sqrt(self._weight_data.ndarray)
        dep = root_w * dep
        exog = root_w * exog
        denom = root_w.T @ root_w
        mu_dep = (root_w.T @ dep) / denom
        mu_exog = (root_w.T @ exog) / denom

        absorb_options = {} if absorb_options is None else absorb_options
        assert isinstance(self._regressors, sp.csc_matrix)
        if self._regressors.shape[1] > 0:
            if use_hdfe:
                from pyhdfe import create

                absorb_options["drop_singletons"] = False
                algo = create(self._absorb_inter.cat, **absorb_options)
                dep_exog = column_stack((dep, exog))
                resids = algo.residualize(dep_exog)
                dep_resid = resids[:, :1]
                exog_resid = resids[:, 1:]
            else:
                self._regressors = preconditioner(self._regressors)[0]
                dep_exog = column_stack((dep, exog))
                resid = lsmr_annihilate(
                    self._regressors,
                    dep_exog,
                    use_cache,
                    self._regressors_hash,
                    **absorb_options,
                )
                dep_resid = resid[:, :1]
                exog_resid = resid[:, 1:]
        else:
            dep_resid = dep
            exog_resid = exog

        if self._constant_absorbed:
            dep_resid += root_w * mu_dep
            exog_resid += root_w * mu_exog

        if not self._drop_absorbed:
            check_absorbed(exog_resid, self.exog.cols, exog)
        else:
            ncol = exog_resid.shape[1]
            retain = not_absorbed(exog_resid)
            if not retain:
                raise ValueError(
                    "All columns in exog have been fully absorbed by the "
                    "included effects. This model cannot be estimated.")
            elif len(retain) < ncol:
                drop = set(range(ncol)).difference(retain)
                dropped = ", ".join([str(self.exog.cols[i]) for i in drop])
                warnings.warn(
                    absorbing_warn_msg.format(absorbed_variables=dropped),
                    AbsorbingEffectWarning,
                )

            exog_resid = exog_resid[:, retain]
            self._columns = [self._columns[i] for i in retain]

        self._absorbed_dependent = DataFrame(
            dep_resid,
            index=self._dependent.pandas.index,
            columns=self._dependent.pandas.columns,
        )
        self._absorbed_exog = DataFrame(exog_resid,
                                        index=self._exog.pandas.index,
                                        columns=self._columns)
Ejemplo n.º 2
0
    def fit(
        self,
        *,
        cov_type: str = "robust",
        debiased: bool = False,
        lsmr_options: Optional[Dict[str, Union[float, bool]]] = None,
        use_cache: bool = True,
        **cov_config: Any,
    ) -> AbsorbingLSResults:
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, optional
            Name of covariance estimator to use. Supported covariance
            estimators are:

            * 'unadjusted', 'homoskedastic' - Classic homoskedastic inference
            * 'robust', 'heteroskedastic' - Heteroskedasticity robust inference
            * 'kernel' - Heteroskedasticity and autocorrelation robust
              inference
            * 'cluster' - One-way cluster dependent inference.
              Heteroskedasticity robust

        debiased : bool, optional
            Flag indicating whether to debiased the covariance estimator using
            a degree of freedom adjustment.
        **cov_config
            Additional parameters to pass to covariance estimator. The list
            of optional parameters differ according to ``cov_type``. See
            the documentation of the alternative covariance estimators for
            the complete list of available commands.
        lsmr_options : dict
            Dictionary of options to pass to scipy.sparse.linalg.lsmr
        use_cache : bool
            Flag indicating whether the variables, once purged from the
            absorbed variables and interactions, should be stored in the cache,
            and retrieved if available. Cache can dramatically speed up
            re-fitting large models when the set of absorbed variables and
            interactions are identical.

        Returns
        -------
        AbsorbingLSResults
            Results container

        Notes
        -----
        Additional covariance parameters depend on specific covariance used.
        The see the docstring of specific covariance estimator for a list of
        supported options. Defaults are used if no covariance configuration
        is provided.

        If use_cache is True, then variables are hashed based on their
        contents using either a 64 bit value (if xxhash is installed) or
        a 256 bit value. This allows variables to be reused in different
        models if the set of absorbing variables and interactions is held
        constant.

        See also
        --------
        linearmodels.iv.covariance.HomoskedasticCovariance
        linearmodels.iv.covariance.HeteroskedasticCovariance
        linearmodels.iv.covariance.KernelCovariance
        linearmodels.iv.covariance.ClusteredCovariance
        """

        if self._absorbed_dependent is None:
            self._first_time_fit(use_cache, lsmr_options)

        self._x = exog_resid = to_numpy(self.absorbed_exog)
        dep_resid = to_numpy(self.absorbed_dependent)
        if self._exog.shape[1] == 0:
            params = empty((0, 1))
        else:
            if exog_resid.shape[1]:
                check_absorbed(exog_resid, self.exog.cols)
            params = lstsq(exog_resid, dep_resid, rcond=None)[0]
            self._num_params += exog_resid.shape[1]

        cov_estimator = COVARIANCE_ESTIMATORS[cov_type]
        cov_config["debiased"] = debiased
        cov_config["kappa"] = 0.0
        cov_config_copy = {k: v for k, v in cov_config.items()}
        if "center" in cov_config_copy:
            del cov_config_copy["center"]
        cov_estimator_inst = cov_estimator(exog_resid, dep_resid, exog_resid,
                                           params, **cov_config_copy)

        results = {"kappa": 0.0, "liml_kappa": 0.0}
        pe = self._post_estimation(params, cov_estimator_inst, cov_type)
        results.update(pe)
        results["df_model"] = self._num_params

        return AbsorbingLSResults(results, self)
Ejemplo n.º 3
0
def test_all_absorbed_exception():
    x_orig = np.random.standard_normal((200, 3))
    x = x_orig * 1e-32
    with pytest.raises(AbsorbingEffectError,
                       match="All exog variables have been"):
        check_absorbed(x, ["a", "b", "c"], x_orig)
Ejemplo n.º 4
0
    def _first_time_fit(
            self, use_cache: bool,
            lsmr_options: Optional[Dict[str, Union[float, bool]]]) -> None:
        weights = self.weights.ndarray if self._is_weighted else None

        areg = AbsorbingRegressor(
            cat=self._absorb_inter.cat,
            cont=self._absorb_inter.cont,
            interactions=self._interaction_list,
            weights=weights,
        )
        areg_constant = areg.has_constant
        self._regressors = preconditioner(areg.regressors)[0]
        self._num_params += areg.approx_rank
        # Do not double count intercept-like terms
        self._has_constant = self._has_constant_exog or areg_constant
        self._num_params -= min(self._has_constant_exog, areg_constant)
        self._regressors_hash = areg.hash
        self._constant_absorbed = self._has_constant_exog and areg_constant

        dep = self._dependent.ndarray
        exog = self._exog.ndarray

        root_w = sqrt(self._weight_data.ndarray)
        dep = root_w * dep
        exog = root_w * exog
        denom = root_w.T @ root_w
        mu_dep = (root_w.T @ dep) / denom
        mu_exog = (root_w.T @ exog) / denom

        lsmr_options = {} if lsmr_options is None else lsmr_options
        assert isinstance(self._regressors, sp.csc_matrix)
        if self._regressors.shape[1] > 0:
            dep_resid = lsmr_annihilate(self._regressors, dep, use_cache,
                                        self._regressors_hash, **lsmr_options)
            exog_resid = lsmr_annihilate(self._regressors, exog, use_cache,
                                         self._regressors_hash, **lsmr_options)
        else:
            dep_resid = dep
            exog_resid = exog

        if self._constant_absorbed:
            dep_resid += root_w * mu_dep
            exog_resid += root_w * mu_exog

        if not self._drop_absorbed:
            check_absorbed(exog_resid, self.exog.cols, exog)
        else:
            ncol = exog_resid.shape[1]
            retain = not_absorbed(exog_resid)
            if not retain:
                raise ValueError(
                    "All columns in exog have been fully absorbed by the "
                    "included effects. This model cannot be estimated.")
            elif len(retain) < ncol:
                drop = set(range(ncol)).difference(retain)
                dropped = ", ".join([str(self.exog.cols[i]) for i in drop])
                warnings.warn(
                    absorbing_warn_msg.format(absorbed_variables=dropped),
                    AbsorbingEffectWarning,
                )

            exog_resid = exog_resid[:, retain]
            self._columns = [self._columns[i] for i in retain]

        self._absorbed_dependent = DataFrame(
            dep_resid,
            index=self._dependent.pandas.index,
            columns=self._dependent.pandas.columns,
        )
        self._absorbed_exog = DataFrame(exog_resid,
                                        index=self._exog.pandas.index,
                                        columns=self._columns)