예제 #1
0
 def _get_sliceXarray(self, row: slice,
                      col: Sequence[int]) -> ss.csc_matrix:
     idxs = np.asarray(col)
     if idxs.dtype == bool:
         idxs = np.where(idxs)
     return ss.csc_matrix(get_compressed_vectors(self, idxs),
                          shape=(self.shape[0], len(idxs)))[row, :]
예제 #2
0
def sparse_matrix_from_args(type, arg1, *args, **kwargs):
    if type == "coo":
        return _sp.coo_matrix(arg1, *args, **kwargs)
    elif type == "csr":
        return _sp.csr_matrix(arg1, *args, **kwargs)
    elif type == "csc":
        return _sp.csc_matrix(arg1, *args, **kwargs)
    elif type == "dia":
        return _sp.dia_matrix(arg1, *args, **kwargs)
예제 #3
0
    def _get_missing_features_info(self, X):
        """Compute the imputer mask and the indices of the features
        containing missing values.

        Parameters
        ----------
        X : {ndarray or sparse matrix}, shape (n_samples, n_features)
            The input data with missing values. Note that ``X`` has been
            checked in ``fit`` and ``transform`` before to call this function.

        Returns
        -------
        imputer_mask : {ndarray or sparse matrix}, shape \
        (n_samples, n_features)
            The imputer mask of the original data.

        features_with_missing : ndarray, shape (n_features_with_missing)
            The features containing missing values.

        """
        if sparse.issparse(X):
            mask = _get_mask(X.data, self.missing_values)

            # The imputer mask will be constructed with the same sparse format
            # as X.
            sparse_constructor = (sparse.csr_matrix
                                  if X.format == 'csr' else sparse.csc_matrix)
            imputer_mask = sparse_constructor(
                (mask, X.indices.copy(), X.indptr.copy()),
                shape=X.shape,
                dtype=np.float32)
            # temporarly switch to using float32 as
            # cupy cannot operate with bool as of now

            if self.features == 'missing-only':
                n_missing = imputer_mask.sum(axis=0)

            if self.sparse is False:
                imputer_mask = imputer_mask.toarray()
            elif imputer_mask.format == 'csr':
                imputer_mask = imputer_mask.tocsc()
        else:
            imputer_mask = _get_mask(X, self.missing_values)

            if self.features == 'missing-only':
                n_missing = imputer_mask.sum(axis=0)

            if self.sparse is True:
                imputer_mask = sparse.csc_matrix(imputer_mask)

        if self.features == 'all':
            features_indices = np.arange(X.shape[1])
        else:
            features_indices = np.flatnonzero(n_missing)

        return imputer_mask, features_indices
예제 #4
0
 def _get_sliceXint(self, row: slice, col: int) -> ss.csc_matrix:
     return ss.csc_matrix(get_compressed_vector(self, col),
                          shape=(self.shape[0], 1))[row, :]