def test_issequence(self): assert_equal(sputils.issequence((1, )), True) assert_equal(sputils.issequence((1, 2, 3)), True) assert_equal(sputils.issequence([1]), True) assert_equal(sputils.issequence([1, 2, 3]), True) assert_equal(sputils.issequence(np.array([1, 2, 3])), True) assert_equal(sputils.issequence(np.array([[1], [2], [3]])), False) assert_equal(sputils.issequence(3), False)
def test_issequence(self): assert_equal(sputils.issequence( (1,) ),True) assert_equal(sputils.issequence( (1,2,3) ),True) assert_equal(sputils.issequence( [1] ),True) assert_equal(sputils.issequence( [1,2,3] ),True) assert_equal(sputils.issequence( np.array([1,2,3]) ),True) assert_equal(sputils.issequence( np.array([[1],[2],[3]]) ),False) assert_equal(sputils.issequence( 3 ),False)
def __getitem__(self, index): assert sputils.issequence(index) or sputils.isintlike(index) users = np.array(index).reshape(-1, ) extracted_sparse_matrix = self._extract(self.interactions_matrix, index) if self.target_interactions_matrix is None: return UsersInteractions( users=users, interactions_matrix=extracted_sparse_matrix), None else: extracted_target_sparse_matrix = self._extract( self.target_interactions_matrix, index) return UsersInteractions(users=users, interactions_matrix=extracted_sparse_matrix), \ UsersInteractions(users=users, interactions_matrix=extracted_target_sparse_matrix)
def _extract(self, sparse_matrix, index): if sputils.issequence( index) and len(index) > CSR_MATRIX_INDEX_SIZE_LIMIT: # It happens that scipy implements the indexing of a csr_matrix with a list using # matrix multiplication, which gets to be an issue if the size of the index list is # large and lead to memory issues # Reference: https://stackoverflow.com/questions/46034212/sparse-matrix-slicing-memory-error/46040827#46040827 # In order to solve this issue, simply chunk the index into smaller indices of # size CSR_MATRIX_INDEX_SIZE_LIMIT and then stack the extracted chunks sparse_matrix_slices = [] for offset in range(0, len(index), CSR_MATRIX_INDEX_SIZE_LIMIT): sparse_matrix_slices.append( sparse_matrix[index[offset:offset + CSR_MATRIX_INDEX_SIZE_LIMIT]]) extracted_sparse_matrix = sparse.vstack(sparse_matrix_slices) else: extracted_sparse_matrix = sparse_matrix[index] return extracted_sparse_matrix
def __getitem__(self, key): def asindices(x): try: x = np.asarray(x) # Check index contents, to avoid creating 64-bit arrays needlessly idx_dtype = get_index_dtype((x, ), check_contents=True) if idx_dtype != x.dtype: x = x.astype(idx_dtype) except: raise IndexError('invalid index') else: return x def check_bounds(indices, N): if indices.size == 0: return (0, 0) max_indx = indices.max() if max_indx >= N: raise IndexError('index (%d) out of range' % max_indx) min_indx = indices.min() if min_indx < -N: raise IndexError('index (%d) out of range' % (N + min_indx)) return (min_indx, max_indx) def extractor(indices, N): """Return a sparse matrix P so that P*self implements slicing of the form self[[1,2,3],:] """ indices = asindices(indices) (min_indx, max_indx) = check_bounds(indices, N) if min_indx < 0: indices = indices.copy() indices[indices < 0] += N indptr = da.arange(len(indices) + 1, dtype=indices.dtype, chunks=self.chunks) data = da.ones(len(indices), dtype=self.dtype, chunks=self.chunks) shape = (len(indices), N) return csr_matrix((data, indices, indptr), shape=shape) row, col = self._unpack_index(key) # First attempt to use original row optimized methods # [1, ?] if isintlike(row): # [i, j] if isintlike(col): return self._get_single_element(row, col) # [i, 1:2] elif isinstance(col, slice): return self._get_row_slice(row, col) # [i, [1, 2]] elif issequence(col): P = extractor(col, self.shape[1]).T return self[row, :] * P elif isinstance(row, slice): # [1:2,??] if ((isintlike(col) and row.step in (1, None)) or (isinstance(col, slice) and col.step in (1, None) and row.step in (1, None))): # col is int or slice with step 1, row is slice with step 1. return self._get_submatrix(row, col) elif issequence(col): # row is slice, col is sequence. P = extractor(col, self.shape[1]).T # [1:2,[1,2]] sliced = self if row != slice(None, None, None): sliced = sliced[row, :] return sliced * P elif issequence(row): # [[1,2],??] if isintlike(col) or isinstance(col, slice): P = extractor(row, self.shape[0]) # [[1,2],j] or [[1,2],1:2] extracted = P * self if col == slice(None, None, None): return extracted else: return extracted[:, col] if not (issequence(col) and issequence(row)): # Sample elementwise row, col = self._index_to_arrays(row, col) row = asindices(row) col = asindices(col) if row.shape != col.shape: raise IndexError('number of row and column indices differ') assert row.ndim <= 2 num_samples = np.size(row) if num_samples == 0: return csr_matrix(np.atleast_2d(row).shape, dtype=self.dtype) check_bounds(row, self.shape[0]) check_bounds(col, self.shape[1]) val = np.empty(num_samples, dtype=self.dtype) csr_sample_values(self.shape[0], self.shape[1], self.indptr, self.indices, self.data, num_samples, row.ravel(), col.ravel(), val) if row.ndim == 1: # row and col are 1d return np.asmatrix(val) return self.__class__(val.reshape(row.shape))