def test_csr_from_coo_novals(data, nrows, ncols): n = nrows * ncols nnz = data.draw(st.integers(0, int(n * 0.75))) _log.info('testing %d×%d (%d nnz) with no values', nrows, ncols, nnz) coords = st.integers(0, max(n - 1, 0)) coords = data.draw(nph.arrays(np.int32, nnz, elements=coords, unique=True)) rows = np.mod(coords, nrows, dtype=np.int32) cols = np.floor_divide(coords, nrows, dtype=np.int32) csr = CSR.from_coo(rows, cols, None, (nrows, ncols)) rowinds = csr.rowinds() assert csr.nrows == nrows assert csr.ncols == ncols assert csr.nnz == nnz for i in range(nrows): sp = csr.rowptrs[i] ep = csr.rowptrs[i + 1] assert ep - sp == np.sum(rows == i) points, = np.nonzero(rows == i) assert len(points) == ep - sp po = np.argsort(cols[points]) points = points[po] assert all(np.sort(csr.colinds[sp:ep]) == cols[points]) assert all(np.sort(csr.row_cs(i)) == cols[points]) assert all(rowinds[sp:ep] == i) row = csr.row(i) assert np.sum(row) == ep - sp
def test_csr_str(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) assert '4x3' in str(csr)
def test_csr_rowinds(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) ris = csr.rowinds() assert all(ris == rows)
def test_csr_row_fixed(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) + 1 csr = CSR.from_coo(rows, cols, vals) assert all(csr.row(0) == np.array([0, 1, 2], dtype=np.float_)) assert all(csr.row(1) == np.array([3, 0, 0], dtype=np.float_)) assert all(csr.row(2) == np.array([0, 0, 0], dtype=np.float_)) assert all(csr.row(3) == np.array([0, 4, 0], dtype=np.float_))
def test_csr_row_extent_fixed(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) + 1 csr = CSR.from_coo(rows, cols, vals) assert csr.row_extent(0) == (0, 2) assert csr.row_extent(1) == (2, 3) assert csr.row_extent(2) == (3, 3) assert csr.row_extent(3) == (3, 4)
def test_csr_set_values(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) v2 = 10 - vals csr.values = v2 assert all(csr.values == v2)
def test_csr_from_coo_fixed(): "Make a CSR from COO data" rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) assert csr.nrows == 4 assert csr.ncols == 3 assert csr.nnz == 4 assert csr.values == approx(vals)
def test_csr_set_values_none(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) csr.values = None assert csr.values is None assert all(csr.row(0) == [0, 1, 1]) assert all(csr.row(1) == [1, 0, 0]) assert all(csr.row(3) == [0, 1, 0])
def test_csr_set_values_oversize(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) v2 = np.arange(6, dtype=np.float_) + 10 csr.values = v2 assert csr.values is not None assert all(csr.values == v2[:4])
def test_csr_set_values_undersize(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) v2 = np.arange(3, dtype=np.float_) + 5 with raises(ValueError): csr.values = v2 assert all(csr.values == vals)
def test_csr_sparse_row(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) assert all(csr.row_cs(0) == np.array([1, 2], dtype=np.int32)) assert all(csr.row_cs(1) == np.array([0], dtype=np.int32)) assert all(csr.row_cs(2) == np.array([], dtype=np.int32)) assert all(csr.row_cs(3) == np.array([1], dtype=np.int32)) assert all(csr.row_vs(0) == np.array([0, 1], dtype=np.float_)) assert all(csr.row_vs(1) == np.array([2], dtype=np.float_)) assert all(csr.row_vs(2) == np.array([], dtype=np.float_)) assert all(csr.row_vs(3) == np.array([3], dtype=np.float_))
def test_csr_transpose_coords(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = CSR.from_coo(rows, cols, vals) csc = csr.transpose(False) assert csc.nrows == csr.ncols assert csc.ncols == csr.nrows assert all(csc.rowptrs == [0, 1, 3, 4]) assert csc.colinds.max() == 3 assert csc.values is None for r, c, v in zip(rows, cols, vals): row = csc.row(c) assert row[r] == 1
def sparse_ratings(ratings, scipy=False, *, users=None, items=None): """ Convert a rating table to a sparse matrix of ratings. Args: ratings(pandas.DataFrame): a data table of (user, item, rating) triples. scipy(bool): if ``True`` or ``'csr'``, return a SciPy csr matrix instead of :py:class:`CSR`. if ``'coo'``, return a SciPy coo matrix. users(pandas.Index): an index of user IDs. items(pandas.Index): an index of items IDs. Returns: RatingMatrix: a named tuple containing the sparse matrix, user index, and item index. """ if users is None: users = pd.Index(np.unique(ratings.user), name='user') if items is None: items = pd.Index(np.unique(ratings.item), name='item') _log.debug('creating matrix with %d ratings for %d items by %d users', len(ratings), len(items), len(users)) row_ind = users.get_indexer(ratings.user).astype(np.intc) if np.any(row_ind < 0): raise ValueError('provided user index does not cover all users') col_ind = items.get_indexer(ratings.item).astype(np.intc) if np.any(col_ind < 0): raise ValueError('provided item index does not cover all users') if 'rating' in ratings.columns: vals = np.require(ratings.rating.values, np.float64) else: vals = None if scipy == 'coo': matrix = sps.coo_matrix((vals, (row_ind, col_ind)), shape=(len(users), len(items))) else: matrix = CSR.from_coo(row_ind, col_ind, vals, (len(users), len(items))) if scipy: matrix = matrix.to_scipy() return RatingMatrix(matrix, users, items)
def test_csr_from_coo(data, nrows, ncols, dtype): dtype = np.dtype(dtype) n = nrows * ncols nnz = data.draw(st.integers(0, int(n * 0.75))) _log.debug('testing %d×%d (%d nnz) of type %s', nrows, ncols, nnz, dtype) coords = st.integers(0, max(n - 1, 0)) coords = data.draw(nph.arrays(np.int32, nnz, elements=coords, unique=True)) rows = np.mod(coords, nrows, dtype=np.int32) cols = np.floor_divide(coords, nrows, dtype=np.int32) finite = nph.from_dtype(dtype, allow_infinity=False, allow_nan=False) vals = data.draw(nph.arrays(dtype, nnz, elements=finite)) csr = CSR.from_coo(rows, cols, vals, (nrows, ncols)) rowinds = csr.rowinds() assert csr.nrows == nrows assert csr.ncols == ncols assert csr.nnz == nnz for i in range(nrows): sp = csr.rowptrs[i] ep = csr.rowptrs[i + 1] assert ep - sp == np.sum(rows == i) points, = np.nonzero(rows == i) assert len(points) == ep - sp po = np.argsort(cols[points]) points = points[po] assert all(np.sort(csr.colinds[sp:ep]) == cols[points]) assert all(np.sort(csr.row_cs(i)) == cols[points]) assert all(csr.values[np.argsort(csr.colinds[sp:ep]) + sp] == vals[points]) assert all(rowinds[sp:ep] == i) row = np.zeros(ncols, dtype) row[cols[points]] = vals[points] assert all(csr.row(i) == row)