def test_csr_save_load(tmp_path, prefix, values): tmp_path = lktu.norm_path(tmp_path) coords = np.random.choice(np.arange(50 * 100, dtype=np.int32), 1000, False) rows = np.mod(coords, 100, dtype=np.int32) cols = np.floor_divide(coords, 100, dtype=np.int32) if values: vals = np.random.randn(1000) else: vals = None csr = lm.csr_from_coo(rows, cols, vals, (100, 50)) assert csr.nrows == 100 assert csr.ncols == 50 assert csr.nnz == 1000 data = lm.csr_save(csr, prefix=prefix) np.savez_compressed(tmp_path / 'matrix.npz', **data) with np.load(tmp_path / 'matrix.npz') as npz: csr2 = lm.csr_load(npz, prefix=prefix) assert csr2.nrows == csr.nrows assert csr2.ncols == csr.ncols assert csr2.nnz == csr.nnz assert all(csr2.rowptrs == csr.rowptrs) assert all(csr2.colinds == csr.colinds) if values: assert all(csr2.values == csr.values) else: assert csr2.values is None
def test_csr_to_sps(): # initialize sparse matrix mat = np.random.randn(10, 5) mat[mat <= 0] = 0 # get COO smat = sps.coo_matrix(mat) # make sure it's sparse assert smat.nnz == np.sum(mat > 0) csr = lm.csr_from_coo(smat.row, smat.col, smat.data, shape=smat.shape) assert csr.nnz == smat.nnz assert csr.nrows == smat.shape[0] assert csr.ncols == smat.shape[1] smat2 = lm.csr_to_scipy(csr) assert sps.isspmatrix(smat2) assert sps.isspmatrix_csr(smat2) for i in range(csr.nrows): assert smat2.indptr[i] == csr.rowptrs[i] assert smat2.indptr[i + 1] == csr.rowptrs[i + 1] sp = smat2.indptr[i] ep = smat2.indptr[i + 1] assert all(smat2.indices[sp:ep] == csr.colinds[sp:ep]) assert all(smat2.data[sp:ep] == csr.values[sp:ep])
def test_csr_from_coo_rand(): for i in range(100): coords = np.random.choice(np.arange(50 * 100, dtype=np.int32), 1000, False) rows = np.mod(coords, 100, dtype=np.int32) cols = np.floor_divide(coords, 100, dtype=np.int32) vals = np.random.randn(1000) csr = lm.csr_from_coo(rows, cols, vals, (100, 50)) assert csr.nrows == 100 assert csr.ncols == 50 assert csr.nnz == 1000 for i in range(100): sp = csr.rowptrs[i] ep = csr.rowptrs[i + 1] assert ep - sp == np.sum(rows == i) points, = np.nonzero(rows == i) assert len(points) == ep - sp po = np.argsort(cols[points]) points = points[po] assert all(np.sort(csr.colinds[sp:ep]) == cols[points]) assert all(np.sort(csr.row_cs(i)) == cols[points]) assert all(csr.values[np.argsort(csr.colinds[sp:ep]) + sp] == vals[points]) row = np.zeros(50) row[cols[points]] = vals[points] assert np.sum(csr.row(i)) == approx(np.sum(vals[points])) assert all(csr.row(i) == row)
def test_csr_row(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) + 1 csr = lm.csr_from_coo(rows, cols, vals) assert all(csr.row(0) == np.array([0, 1, 2], dtype=np.float_)) assert all(csr.row(1) == np.array([3, 0, 0], dtype=np.float_)) assert all(csr.row(2) == np.array([0, 0, 0], dtype=np.float_)) assert all(csr.row(3) == np.array([0, 4, 0], dtype=np.float_))
def test_csr_from_coo(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = lm.csr_from_coo(rows, cols, vals) assert csr.nrows == 4 assert csr.ncols == 3 assert csr.nnz == 4 assert csr.values == approx(vals)
def test_csr_sparse_row(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = lm.csr_from_coo(rows, cols, vals) assert all(csr.row_cs(0) == np.array([1, 2], dtype=np.int32)) assert all(csr.row_cs(1) == np.array([0], dtype=np.int32)) assert all(csr.row_cs(2) == np.array([], dtype=np.int32)) assert all(csr.row_cs(3) == np.array([1], dtype=np.int32)) assert all(csr.row_vs(0) == np.array([0, 1], dtype=np.float_)) assert all(csr.row_vs(1) == np.array([2], dtype=np.float_)) assert all(csr.row_vs(2) == np.array([], dtype=np.float_)) assert all(csr.row_vs(3) == np.array([3], dtype=np.float_))
def test_csr_transpose(): rows = np.array([0, 0, 1, 3], dtype=np.int32) cols = np.array([1, 2, 0, 1], dtype=np.int32) vals = np.arange(4, dtype=np.float_) csr = lm.csr_from_coo(rows, cols, vals) csc = csr.transpose() assert csc.nrows == csr.ncols assert csc.ncols == csr.nrows assert all(csc.rowptrs == [0, 1, 3, 4]) assert csc.colinds.max() == 3 assert csc.values.sum() == approx(vals.sum()) for r, c, v in zip(rows, cols, vals): row = csc.row(c) assert row[r] == v
def test_csr_from_coo_novals(): for i in range(50): coords = np.random.choice(np.arange(50 * 100, dtype=np.int32), 1000, False) rows = np.mod(coords, 100, dtype=np.int32) cols = np.floor_divide(coords, 100, dtype=np.int32) csr = lm.csr_from_coo(rows, cols, None, (100, 50)) assert csr.nrows == 100 assert csr.ncols == 50 assert csr.nnz == 1000 for i in range(100): sp = csr.rowptrs[i] ep = csr.rowptrs[i + 1] assert ep - sp == np.sum(rows == i) points, = np.nonzero(rows == i) po = np.argsort(cols[points]) points = points[po] assert all(np.sort(csr.colinds[sp:ep]) == cols[points]) assert np.sum(csr.row(i)) == len(points)
def _select_similarities(self, nitems, rows, cols, vals): _logger.info('[%s] ordering similarities', self._timer) csr = matrix.csr_from_coo(rows, cols, vals, shape=(nitems, nitems)) csr.sort_values() if self.save_nbrs is None or self.save_nbrs <= 0: return csr _logger.info('[%s] picking %d top similarities', self._timer, self.save_nbrs) counts = csr.row_nnzs() _logger.debug('have %d rows in size range [%d,%d]', len(counts), np.min(counts), np.max(counts)) ncounts = np.fmin(counts, self.save_nbrs) _logger.debug('will have %d rows in size range [%d,%d]', len(ncounts), np.min(ncounts), np.max(ncounts)) assert np.all(ncounts <= self.save_nbrs) assert np.all(ncounts >= 0) nnz = np.sum(ncounts) rp2 = np.zeros_like(csr.rowptrs) rp2[1:] = np.cumsum(ncounts) ci2 = np.zeros(nnz, np.int32) vs2 = np.zeros(nnz) for i in range(nitems): sp1 = csr.rowptrs[i] sp2 = rp2[i] ep1 = sp1 + ncounts[i] ep2 = sp2 + ncounts[i] assert ep1 - sp1 == ep2 - sp2 ci2[sp2:ep2] = csr.colinds[sp1:ep1] vs2[sp2:ep2] = csr.values[sp1:ep1] return matrix.CSR(csr.nrows, csr.ncols, nnz, rp2, ci2, vs2)