コード例 #1
0
def test_preconditioner_subclass():
    class SubArray(np.ndarray):
        pass

    rs = np.random.RandomState(0)
    values = rs.standard_normal((100, 10))
    values = values.view(SubArray)
    val_cond, cond = preconditioner(values, copy=True)
    assert_allclose(np.sqrt((values**2).sum(0)), cond)
    assert type(val_cond) == type(values)
    # Test in-place
    val_cond, cond = preconditioner(values, copy=False)
    assert_allclose(np.sqrt((values**2).sum(0)), np.ones(10))
    assert type(val_cond) == type(values)
コード例 #2
0
def category_continuous_interaction(
        cat: AnyPandas,
        cont: AnyPandas,
        precondition: bool = True) -> sp.csc_matrix:
    """
    Parameters
    ----------
    cat : Series
        Categorical series to convert to dummy variables
    cont : {Series, DataFrame}
        Continuous variable values to use in the dummy interaction
    precondition : bool
        Flag whether dummies should be preconditioned

    Returns
    -------
    csc_matrix
        Sparse matrix of dummy interactions with unit column norm
    """
    codes = category_product(cat).cat.codes
    interact = sp.csc_matrix(
        (cont.to_numpy().flat, (arange(codes.shape[0]), codes)))
    if not precondition:
        return interact
    else:
        contioned = preconditioner(interact)[0]
        assert isinstance(contioned, sp.csc_matrix)
        return contioned
コード例 #3
0
def test_preconditioner_sparse():
    rs = np.random.RandomState(0)
    values = scipy.sparse.csc_matrix(rs.standard_normal((100, 10)))
    orig = values.copy()
    val_cond, cond = preconditioner(values, copy=True)
    assert_allclose(np.sqrt((values.multiply(values)).sum(0).A1), cond)
    assert id(val_cond) != id(values)
    assert_array_equal(orig.A, values.A)
コード例 #4
0
def test_preconditioner_copy():
    rs = np.random.RandomState(0)
    values = rs.standard_normal((100, 10))
    orig = values.copy()
    val_cond, cond = preconditioner(values, copy=True)
    assert_allclose(np.sqrt((orig ** 2).sum(0)), cond)
    assert id(val_cond) != id(values)
    assert_array_equal(orig, values)
コード例 #5
0
    def _first_time_fit(
            self, use_cache: bool,
            lsmr_options: Optional[Dict[str, Union[float, bool]]]) -> None:
        weights = self.weights.ndarray if self._is_weighted else None

        areg = AbsorbingRegressor(
            cat=self._absorb_inter.cat,
            cont=self._absorb_inter.cont,
            interactions=self._interaction_list,
            weights=weights,
        )
        areg_constant = areg.has_constant
        self._regressors = preconditioner(areg.regressors)[0]
        self._num_params += areg.approx_rank
        # Do not double count intercept-like terms
        self._has_constant = self._has_constant_exog or areg_constant
        self._num_params -= min(self._has_constant_exog, areg_constant)
        self._regressors_hash = areg.hash
        self._constant_absorbed = self._has_constant_exog and areg_constant

        dep = self._dependent.ndarray
        exog = self._exog.ndarray

        root_w = sqrt(self._weight_data.ndarray)
        dep = root_w * dep
        exog = root_w * exog
        denom = root_w.T @ root_w
        mu_dep = (root_w.T @ dep) / denom
        mu_exog = (root_w.T @ exog) / denom

        lsmr_options = {} if lsmr_options is None else lsmr_options
        assert isinstance(self._regressors, csc_matrix)
        if self._regressors.shape[1] > 0:
            dep_resid = lsmr_annihilate(self._regressors, dep, use_cache,
                                        self._regressors_hash, **lsmr_options)
            exog_resid = lsmr_annihilate(self._regressors, exog, use_cache,
                                         self._regressors_hash, **lsmr_options)
        else:
            dep_resid = dep
            exog_resid = exog

        if self._constant_absorbed:
            dep_resid += root_w * mu_dep
            exog_resid += root_w * mu_exog

        self._absorbed_dependent = DataFrame(
            dep_resid,
            index=self._dependent.pandas.index,
            columns=self._dependent.pandas.columns,
        )
        self._absorbed_exog = DataFrame(exog_resid,
                                        index=self._exog.pandas.index,
                                        columns=self._exog.pandas.columns)
コード例 #6
0
    def _first_time_fit(
        self,
        use_cache: bool,
        absorb_options: Optional[Dict[str, Union[bool, str, ArrayLike, None,
                                                 Dict[str, Any]]]],
        method: str,
    ) -> None:
        weights = (cast(Float64Array, self.weights.ndarray)
                   if self._is_weighted else None)

        use_hdfe = weights is None and method in ("auto", "hdfe")
        use_hdfe = use_hdfe and not self._absorb_inter.cont.shape[1]
        use_hdfe = use_hdfe and not self._interaction_list

        if not use_hdfe and method == "hdfe":
            raise RuntimeError(
                "HDFE has been set as the method but the model cannot be estimated "
                "using HDFE. HDFE requires that the model is unweighted and that the "
                "absorbed regressors include only fixed effects (dummy variables)."
            )
        areg = AbsorbingRegressor(
            cat=self._absorb_inter.cat,
            cont=self._absorb_inter.cont,
            interactions=self._interaction_list,
            weights=weights,
        )
        areg_constant = areg.has_constant
        self._regressors = areg.regressors
        self._num_params += areg.approx_rank
        # Do not double count intercept-like terms
        self._has_constant = self._has_constant_exog or areg_constant
        self._num_params -= min(self._has_constant_exog, areg_constant)
        self._regressors_hash = areg.hash
        self._constant_absorbed = self._has_constant_exog and areg_constant

        dep = self._dependent.ndarray
        exog = cast(Float64Array, self._exog.ndarray)

        root_w = sqrt(self._weight_data.ndarray)
        dep = root_w * dep
        exog = root_w * exog
        denom = root_w.T @ root_w
        mu_dep = (root_w.T @ dep) / denom
        mu_exog = (root_w.T @ exog) / denom

        absorb_options = {} if absorb_options is None else absorb_options
        assert isinstance(self._regressors, sp.csc_matrix)
        if self._regressors.shape[1] > 0:
            if use_hdfe:
                from pyhdfe import create

                absorb_options["drop_singletons"] = False
                algo = create(self._absorb_inter.cat, **absorb_options)
                dep_exog = column_stack((dep, exog))
                resids = algo.residualize(dep_exog)
                dep_resid = resids[:, :1]
                exog_resid = resids[:, 1:]
            else:
                self._regressors = preconditioner(self._regressors)[0]
                dep_exog = column_stack((dep, exog))
                resid = lsmr_annihilate(
                    self._regressors,
                    dep_exog,
                    use_cache,
                    self._regressors_hash,
                    **absorb_options,
                )
                dep_resid = resid[:, :1]
                exog_resid = resid[:, 1:]
        else:
            dep_resid = dep
            exog_resid = exog

        if self._constant_absorbed:
            dep_resid += root_w * mu_dep
            exog_resid += root_w * mu_exog

        if not self._drop_absorbed:
            check_absorbed(exog_resid, self.exog.cols, exog)
        else:
            ncol = exog_resid.shape[1]
            retain = not_absorbed(exog_resid)
            if not retain:
                raise ValueError(
                    "All columns in exog have been fully absorbed by the "
                    "included effects. This model cannot be estimated.")
            elif len(retain) < ncol:
                drop = set(range(ncol)).difference(retain)
                dropped = ", ".join([str(self.exog.cols[i]) for i in drop])
                warnings.warn(
                    absorbing_warn_msg.format(absorbed_variables=dropped),
                    AbsorbingEffectWarning,
                )

            exog_resid = exog_resid[:, retain]
            self._columns = [self._columns[i] for i in retain]

        self._absorbed_dependent = DataFrame(
            dep_resid,
            index=self._dependent.pandas.index,
            columns=self._dependent.pandas.columns,
        )
        self._absorbed_exog = DataFrame(exog_resid,
                                        index=self._exog.pandas.index,
                                        columns=self._columns)
コード例 #7
0
    def _first_time_fit(
            self, use_cache: bool,
            lsmr_options: Optional[Dict[str, Union[float, bool]]]) -> None:
        weights = self.weights.ndarray if self._is_weighted else None

        areg = AbsorbingRegressor(
            cat=self._absorb_inter.cat,
            cont=self._absorb_inter.cont,
            interactions=self._interaction_list,
            weights=weights,
        )
        areg_constant = areg.has_constant
        self._regressors = preconditioner(areg.regressors)[0]
        self._num_params += areg.approx_rank
        # Do not double count intercept-like terms
        self._has_constant = self._has_constant_exog or areg_constant
        self._num_params -= min(self._has_constant_exog, areg_constant)
        self._regressors_hash = areg.hash
        self._constant_absorbed = self._has_constant_exog and areg_constant

        dep = self._dependent.ndarray
        exog = self._exog.ndarray

        root_w = sqrt(self._weight_data.ndarray)
        dep = root_w * dep
        exog = root_w * exog
        denom = root_w.T @ root_w
        mu_dep = (root_w.T @ dep) / denom
        mu_exog = (root_w.T @ exog) / denom

        lsmr_options = {} if lsmr_options is None else lsmr_options
        assert isinstance(self._regressors, sp.csc_matrix)
        if self._regressors.shape[1] > 0:
            dep_resid = lsmr_annihilate(self._regressors, dep, use_cache,
                                        self._regressors_hash, **lsmr_options)
            exog_resid = lsmr_annihilate(self._regressors, exog, use_cache,
                                         self._regressors_hash, **lsmr_options)
        else:
            dep_resid = dep
            exog_resid = exog

        if self._constant_absorbed:
            dep_resid += root_w * mu_dep
            exog_resid += root_w * mu_exog

        if not self._drop_absorbed:
            check_absorbed(exog_resid, self.exog.cols, exog)
        else:
            ncol = exog_resid.shape[1]
            retain = not_absorbed(exog_resid)
            if not retain:
                raise ValueError(
                    "All columns in exog have been fully absorbed by the "
                    "included effects. This model cannot be estimated.")
            elif len(retain) < ncol:
                drop = set(range(ncol)).difference(retain)
                dropped = ", ".join([str(self.exog.cols[i]) for i in drop])
                warnings.warn(
                    absorbing_warn_msg.format(absorbed_variables=dropped),
                    AbsorbingEffectWarning,
                )

            exog_resid = exog_resid[:, retain]
            self._columns = [self._columns[i] for i in retain]

        self._absorbed_dependent = DataFrame(
            dep_resid,
            index=self._dependent.pandas.index,
            columns=self._dependent.pandas.columns,
        )
        self._absorbed_exog = DataFrame(exog_resid,
                                        index=self._exog.pandas.index,
                                        columns=self._columns)