Beispiel #1
0
 def hash(self) -> Tuple[Tuple[str, ...], ...]:
     hashes: List[Tuple[str, ...]] = []
     hasher = hash_func()
     if self._cat is not None:
         for col in self._cat:
             hasher.update(
                 ascontiguousarray(
                     self._cat[col].cat.codes.to_numpy()).data)
             hashes.append((hasher.hexdigest(), ))
             hasher = _reset(hasher)
     if self._cont is not None:
         for col in self._cont:
             hasher.update(
                 ascontiguousarray(self._cont[col].to_numpy()).data)
             hashes.append((hasher.hexdigest(), ))
             hasher = _reset(hasher)
     if self._interactions is not None:
         for interact in self._interactions:
             hashes.extend(interact.hash)
     # Add weight hash if provided
     if self._weights is not None:
         hasher = hash_func()
         hasher.update(ascontiguousarray(self._weights.data))
         hashes.append((hasher.hexdigest(), ))
     return tuple(sorted(hashes))
Beispiel #2
0
def lsmr_annihilate(x: csc_matrix, y: ndarray, use_cache: bool = True, x_hash=None,
                    **lsmr_options) -> ndarray:
    r"""
    Removes projection of x on y from y

    Parameters
    ----------
    x : csc_matrix
        Sparse array of regressors
    y : ndarray
        Array with shape (nobs, nvar)
    use_cache : bool
        Flag indicating whether results should be stored in the cache,
        and retrieved if available.
    x_hash : object
        Hashable object representing the values in x
    lsmr_options: dict
        Dictionary of options to pass to scipy.sparse.linalg.lsmr

    Returns
    -------
    resids : ndarray
        Returns the residuals from regressing y on x, (nobs, nvar)

    Notes
    -----
    Residuals are estiamted column-by-column as

    .. math::

        \hat{\epsilon}_{j} = y_{j} - x^\prime \hat{\beta}

    where :math:`\hat{\beta}` is computed using lsmr.
    """

    use_cache = use_cache and x_hash is not None
    regressor_hash = x_hash if x_hash is not None else ''
    default_opts = dict(atol=1e-8, btol=1e-8, show=False)
    default_opts.update(lsmr_options)
    resids = []
    for i in range(y.shape[1]):
        _y = y[:, i:i + 1]

        variable_digest = ''
        if use_cache:
            hasher = hash_func()
            hasher.update(ascontiguousarray(_y.data))
            variable_digest = hasher.hexdigest()

        if use_cache and variable_digest in _VARIABLE_CACHE[regressor_hash]:
            resid = _VARIABLE_CACHE[regressor_hash][variable_digest]
        else:
            beta = lsmr(x, _y, **default_opts)[0]
            resid = y[:, i:i + 1] - (x.dot(csc_matrix(beta[:, None]))).A
            _VARIABLE_CACHE[regressor_hash][variable_digest] = resid
        resids.append(resid)
    if resids:
        return column_stack(resids)
    else:
        return empty_like(y)
Beispiel #3
0
    def hash(self) -> List[Tuple[str, ...]]:
        """
        Construct a hash that will be invariant for any permutation of
        inputs that produce the same fit when used as regressors"""
        # Sorted hashes of any categoricals
        hasher = hash_func()
        cat_hashes = []
        cat = self.cat
        for col in cat:
            hasher.update(ascontiguousarray(self.cat[col].cat.codes.to_numpy().data))
            cat_hashes.append(hasher.hexdigest())
            hasher = _reset(hasher)
        sorted_hashes = tuple(sorted(cat_hashes))

        hashes = []
        cont = self.cont
        for col in cont:
            hasher.update(ascontiguousarray(cont[col].to_numpy()).data)
            hashes.append(sorted_hashes + (hasher.hexdigest(),))
            hasher = _reset(hasher)

        return sorted(hashes)
Beispiel #4
0
def _reset(hasher: Hasher) -> Hasher:
    try:
        hasher.reset()
        return hasher
    except AttributeError:
        return hash_func()