Ejemplo n.º 1
0
    def test_issequence(self):
        assert_equal(sputils.issequence((1, )), True)
        assert_equal(sputils.issequence((1, 2, 3)), True)
        assert_equal(sputils.issequence([1]), True)
        assert_equal(sputils.issequence([1, 2, 3]), True)
        assert_equal(sputils.issequence(np.array([1, 2, 3])), True)

        assert_equal(sputils.issequence(np.array([[1], [2], [3]])), False)
        assert_equal(sputils.issequence(3), False)
Ejemplo n.º 2
0
    def test_issequence(self):
        assert_equal(sputils.issequence( (1,) ),True)
        assert_equal(sputils.issequence( (1,2,3) ),True)
        assert_equal(sputils.issequence( [1] ),True)
        assert_equal(sputils.issequence( [1,2,3] ),True)
        assert_equal(sputils.issequence( np.array([1,2,3]) ),True)

        assert_equal(sputils.issequence( np.array([[1],[2],[3]]) ),False)
        assert_equal(sputils.issequence( 3 ),False)
Ejemplo n.º 3
0
    def __getitem__(self, index):
        assert sputils.issequence(index) or sputils.isintlike(index)

        users = np.array(index).reshape(-1, )

        extracted_sparse_matrix = self._extract(self.interactions_matrix,
                                                index)

        if self.target_interactions_matrix is None:
            return UsersInteractions(
                users=users, interactions_matrix=extracted_sparse_matrix), None
        else:
            extracted_target_sparse_matrix = self._extract(
                self.target_interactions_matrix, index)
            return UsersInteractions(users=users, interactions_matrix=extracted_sparse_matrix), \
                   UsersInteractions(users=users, interactions_matrix=extracted_target_sparse_matrix)
Ejemplo n.º 4
0
    def _extract(self, sparse_matrix, index):

        if sputils.issequence(
                index) and len(index) > CSR_MATRIX_INDEX_SIZE_LIMIT:
            # It happens that scipy implements the indexing of a csr_matrix with a list using
            # matrix multiplication, which gets to be an issue if the size of the index list is
            # large and lead to memory issues
            # Reference: https://stackoverflow.com/questions/46034212/sparse-matrix-slicing-memory-error/46040827#46040827

            # In order to solve this issue, simply chunk the index into smaller indices of
            # size CSR_MATRIX_INDEX_SIZE_LIMIT and then stack the extracted chunks

            sparse_matrix_slices = []
            for offset in range(0, len(index), CSR_MATRIX_INDEX_SIZE_LIMIT):
                sparse_matrix_slices.append(
                    sparse_matrix[index[offset:offset +
                                        CSR_MATRIX_INDEX_SIZE_LIMIT]])

            extracted_sparse_matrix = sparse.vstack(sparse_matrix_slices)
        else:
            extracted_sparse_matrix = sparse_matrix[index]

        return extracted_sparse_matrix
Ejemplo n.º 5
0
    def __getitem__(self, key):
        def asindices(x):
            try:
                x = np.asarray(x)

                # Check index contents, to avoid creating 64-bit arrays needlessly
                idx_dtype = get_index_dtype((x, ), check_contents=True)
                if idx_dtype != x.dtype:
                    x = x.astype(idx_dtype)
            except:
                raise IndexError('invalid index')
            else:
                return x

        def check_bounds(indices, N):
            if indices.size == 0:
                return (0, 0)

            max_indx = indices.max()
            if max_indx >= N:
                raise IndexError('index (%d) out of range' % max_indx)

            min_indx = indices.min()
            if min_indx < -N:
                raise IndexError('index (%d) out of range' % (N + min_indx))

            return (min_indx, max_indx)

        def extractor(indices, N):
            """Return a sparse matrix P so that P*self implements
            slicing of the form self[[1,2,3],:]
            """
            indices = asindices(indices)

            (min_indx, max_indx) = check_bounds(indices, N)

            if min_indx < 0:
                indices = indices.copy()
                indices[indices < 0] += N

            indptr = da.arange(len(indices) + 1,
                               dtype=indices.dtype,
                               chunks=self.chunks)
            data = da.ones(len(indices), dtype=self.dtype, chunks=self.chunks)
            shape = (len(indices), N)

            return csr_matrix((data, indices, indptr), shape=shape)

        row, col = self._unpack_index(key)

        # First attempt to use original row optimized methods
        # [1, ?]
        if isintlike(row):
            # [i, j]
            if isintlike(col):
                return self._get_single_element(row, col)
            # [i, 1:2]
            elif isinstance(col, slice):
                return self._get_row_slice(row, col)
            # [i, [1, 2]]
            elif issequence(col):
                P = extractor(col, self.shape[1]).T
                return self[row, :] * P
        elif isinstance(row, slice):
            # [1:2,??]
            if ((isintlike(col) and row.step in (1, None))
                    or (isinstance(col, slice) and col.step in (1, None)
                        and row.step in (1, None))):
                # col is int or slice with step 1, row is slice with step 1.
                return self._get_submatrix(row, col)
            elif issequence(col):
                # row is slice, col is sequence.
                P = extractor(col, self.shape[1]).T  # [1:2,[1,2]]
                sliced = self
                if row != slice(None, None, None):
                    sliced = sliced[row, :]
                return sliced * P

        elif issequence(row):
            # [[1,2],??]
            if isintlike(col) or isinstance(col, slice):
                P = extractor(row, self.shape[0])  # [[1,2],j] or [[1,2],1:2]
                extracted = P * self
                if col == slice(None, None, None):
                    return extracted
                else:
                    return extracted[:, col]

        if not (issequence(col) and issequence(row)):
            # Sample elementwise
            row, col = self._index_to_arrays(row, col)

        row = asindices(row)
        col = asindices(col)
        if row.shape != col.shape:
            raise IndexError('number of row and column indices differ')
        assert row.ndim <= 2

        num_samples = np.size(row)
        if num_samples == 0:
            return csr_matrix(np.atleast_2d(row).shape, dtype=self.dtype)
        check_bounds(row, self.shape[0])
        check_bounds(col, self.shape[1])

        val = np.empty(num_samples, dtype=self.dtype)
        csr_sample_values(self.shape[0], self.shape[1], self.indptr,
                          self.indices, self.data, num_samples, row.ravel(),
                          col.ravel(), val)
        if row.ndim == 1:
            # row and col are 1d
            return np.asmatrix(val)
        return self.__class__(val.reshape(row.shape))