Ejemplo n.º 1
0
def test_large_mult_vec():
    # 10M * 500 = 2.5B >= INT_MAX
    nrows = 10000000
    ncols = 500
    dense = 250
    nnz = nrows * dense

    rowptrs = np.arange(0, nnz + 1, dense, dtype=np.int64)

    assert len(rowptrs) == nrows + 1
    assert rowptrs[-1] == nnz

    try:
        _log.info('allocating indexes')
        colinds = np.empty(nnz, dtype=np.intc)
        _log.info('allocating values')
        values = np.zeros(nnz)
    except MemoryError:
        skip('insufficient memory')

    _log.info('randomizing array contents')
    fill_rows(values, colinds, nrows, ncols, dense)

    csr = CSR(nrows, ncols, nnz, rowptrs, colinds, values)

    v = np.random.randn(ncols)

    res = csr.mult_vec(v)

    assert res.shape == (nrows, )
    assert np.all(~np.isnan(res))
Ejemplo n.º 2
0
Archivo: als.py Proyecto: hhhhzy/lkpy
def _train_matrix_cd(mat: CSR, this: np.ndarray, other: np.ndarray,
                     reg: float):
    """
    One half of an explicit ALS training round using coordinate descent.

    Args:
        mat: the :math:`m \\times n` matrix of ratings
        this: the :math:`m \\times k` matrix to train
        other: the :math:`n \\times k` matrix of sample features
        reg: the regularization term
    """
    nr = mat.nrows
    nf = other.shape[1]
    assert mat.ncols == other.shape[0]
    assert mat.nrows == this.shape[0]
    assert this.shape[1] == nf

    frob = 0.0

    for i in prange(nr):
        cols = mat.row_cs(i)
        if len(cols) == 0:
            continue

        vals = mat.row_vs(i)

        w = this[i, :].copy()
        _rr_solve(other, cols, vals, w, reg * len(cols), 2)
        delta = this[i, :] - w
        frob += np.dot(delta, delta)
        this[i, :] = w

    return np.sqrt(frob)
Ejemplo n.º 3
0
def test_mult_ab_by_size(kernel, benchmark, size):
    A = sps.random(size, size, 0.1, format='csr')
    B = sps.random(size, size, 0.1, format='csr')
    A = CSR.from_scipy(A)
    B = CSR.from_scipy(B)

    # make sure it's compiled
    A.multiply(B)

    def op():
        A.multiply(B)

    benchmark(op)
Ejemplo n.º 4
0
def test_mult_abt_by_density(kernel, benchmark, density):
    A = sps.random(100, 100, density, format='csr')
    B = sps.random(100, 100, density, format='csr')
    A = CSR.from_scipy(A)
    B = CSR.from_scipy(B)

    # make sure it's compiled
    A.multiply(B, transpose=True)

    def op():
        A.multiply(B, transpose=True)

    benchmark(op)
Ejemplo n.º 5
0
def test_mult_ab(kernel, benchmark):
    A = sps.random(100, 500, 0.1, format='csr')
    B = sps.random(500, 200, 0.2, format='csr')
    A = CSR.from_scipy(A)
    B = CSR.from_scipy(B)

    # make sure it's compiled
    A.multiply(B)

    def op():
        A.multiply(B)

    benchmark(op)
Ejemplo n.º 6
0
def test_csr_from_coo_novals(data, nrows, ncols):
    n = nrows * ncols
    nnz = data.draw(st.integers(0, int(n * 0.75)))
    _log.info('testing %d×%d (%d nnz) with no values', nrows, ncols, nnz)

    coords = st.integers(0, max(n - 1, 0))
    coords = data.draw(nph.arrays(np.int32, nnz, elements=coords, unique=True))
    rows = np.mod(coords, nrows, dtype=np.int32)
    cols = np.floor_divide(coords, nrows, dtype=np.int32)

    csr = CSR.from_coo(rows, cols, None, (nrows, ncols))

    rowinds = csr.rowinds()
    assert csr.nrows == nrows
    assert csr.ncols == ncols
    assert csr.nnz == nnz

    for i in range(nrows):
        sp = csr.rowptrs[i]
        ep = csr.rowptrs[i + 1]
        assert ep - sp == np.sum(rows == i)
        points, = np.nonzero(rows == i)
        assert len(points) == ep - sp
        po = np.argsort(cols[points])
        points = points[po]
        assert all(np.sort(csr.colinds[sp:ep]) == cols[points])
        assert all(np.sort(csr.row_cs(i)) == cols[points])
        assert all(rowinds[sp:ep] == i)

        row = csr.row(i)
        assert np.sum(row) == ep - sp
Ejemplo n.º 7
0
def test_shard(csr):
    SHARD_SIZE = 1000

    shards = csr._shard_rows(SHARD_SIZE)
    # we have the whole matrix
    assert sum(s.nnz for s in shards) == csr.nnz
    # everything is in spec
    assert all(s.nnz <= SHARD_SIZE for s in shards)

    # all row counts match
    assert np.all(
        np.concatenate([s.row_nnzs() for s in shards]) == csr.row_nnzs())
    # all column indices match
    assert np.all(np.concatenate([s.colinds for s in shards]) == csr.colinds)
    # all values match
    assert np.all(np.concatenate([s.values for s in shards]) == csr.values)

    # we can reassemble the shards
    csr2 = CSR._assemble_shards(shards)
    assert csr2.nrows == csr.nrows
    assert csr2.ncols == csr.ncols
    assert csr2.nnz == csr.nnz
    assert np.all(csr2.rowptrs == csr.rowptrs)
    assert np.all(csr2.colinds == csr.colinds)
    assert np.all(csr2.values == csr.values)
Ejemplo n.º 8
0
def mult_ab(a_h, b_h):
    """
    Multiply matrices A and B.

    Args:
        a_h: the handle of matrix A
        b_h: the handle of matrix B

    Returns:
        the handle of the product; it must be released when no longer needed.
    """

    assert a_h.ncols == b_h.nrows

    c_rp = np.zeros(a_h.nrows + 1, np.intc)

    # step 1: symbolic multiplication
    c_ci = _sym_mm(a_h, b_h, c_rp)
    c_nnz = c_rp[a_h.nrows]

    # step 2: numeric multiplication
    c_vs = _num_mm(a_h, b_h, c_rp, c_ci)

    # build the result
    return CSR(a_h.nrows, b_h.ncols, c_nnz, c_rp, c_ci, c_vs)
Ejemplo n.º 9
0
def test_csr_str():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)

    assert '4x3' in str(csr)
Ejemplo n.º 10
0
    def mkh(csr):
        vs = csr._required_values().astype(np.float64)
        csr2 = CSR(csr.nrows, csr.ncols, csr.nnz, csr.rowptrs, csr.colinds, vs)

        if csr.nnz == 0:
            return mkl_h(0, csr.nrows, csr.ncols, csr2)

        return _make_handle(csr2)
Ejemplo n.º 11
0
def test_csr_rowinds():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)
    csr = CSR.from_coo(rows, cols, vals)

    ris = csr.rowinds()
    assert all(ris == rows)
Ejemplo n.º 12
0
def test_empty(nrows, ncols):
    csr = CSR.empty(nrows, ncols)
    assert csr.nrows == nrows
    assert csr.ncols == ncols
    assert csr.nnz == 0
    assert all(csr.rowptrs == 0)
    assert len(csr.rowptrs) == nrows + 1
    assert len(csr.colinds) == 0
Ejemplo n.º 13
0
def test_csr_row_extent_fixed():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_) + 1
    csr = CSR.from_coo(rows, cols, vals)

    assert csr.row_extent(0) == (0, 2)
    assert csr.row_extent(1) == (2, 3)
    assert csr.row_extent(2) == (3, 3)
    assert csr.row_extent(3) == (3, 4)
Ejemplo n.º 14
0
def test_csr_row_fixed():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_) + 1

    csr = CSR.from_coo(rows, cols, vals)
    assert all(csr.row(0) == np.array([0, 1, 2], dtype=np.float_))
    assert all(csr.row(1) == np.array([3, 0, 0], dtype=np.float_))
    assert all(csr.row(2) == np.array([0, 0, 0], dtype=np.float_))
    assert all(csr.row(3) == np.array([0, 4, 0], dtype=np.float_))
Ejemplo n.º 15
0
def test_csr_from_coo_fixed():
    "Make a CSR from COO data"
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)
    assert csr.nrows == 4
    assert csr.ncols == 3
    assert csr.nnz == 4
    assert csr.values == approx(vals)
Ejemplo n.º 16
0
def test_unit_norm(csr: CSR):
    # assume(spm.nnz >= 10)
    backup = csr.copy()

    m2 = csr.normalize_rows('unit')
    assert len(m2) == csr.nrows
    assert m2.dtype == csr.values.dtype

    for i in range(csr.nrows):
        vs = csr.row_vs(i)
        bvs = backup.row_vs(i)
        if len(vs) > 0:
            assert m2[i] == approx(np.linalg.norm(bvs))
            if m2[i] > 0:
                assert np.linalg.norm(vs) == approx(1.0)
                assert vs * m2[i] == approx(backup.row_vs(i))
            else:
                assert all(np.isnan(vs))
        else:
            assert m2[i] == 0.0
Ejemplo n.º 17
0
def test_csr_set_values():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)

    v2 = 10 - vals
    csr.values = v2

    assert all(csr.values == v2)
Ejemplo n.º 18
0
def to_handle(csr: CSR) -> mkl_h:
    if csr.nnz > _hpkg.max_nnz:
        raise ValueError('CSR size {} exceeds max nnz {}'.format(
            csr.nnz, _hpkg.max_nnz))

    if csr.nnz == 0:
        # empty matrices don't really work
        return mkl_h(0, csr.nrows, csr.ncols, None)

    norm = csr._normalize(np.float64, np.intc)
    return _make_handle(norm)
Ejemplo n.º 19
0
def mult_vec(h: CSR, v):
    res = np.zeros(h.nrows)

    row = 0
    for i in range(h.nnz):
        # advance the row if necessary
        while i == h.rowptrs[row + 1]:
            row += 1
        col = h.colinds[i]
        res[row] += v[col] * h._e_value(i)

    return res
Ejemplo n.º 20
0
def test_csr_set_values_none():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)
    csr.values = None

    assert csr.values is None
    assert all(csr.row(0) == [0, 1, 1])
    assert all(csr.row(1) == [1, 0, 0])
    assert all(csr.row(3) == [0, 1, 0])
Ejemplo n.º 21
0
def test_csr_set_values_oversize():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)

    v2 = np.arange(6, dtype=np.float_) + 10
    csr.values = v2

    assert csr.values is not None
    assert all(csr.values == v2[:4])
Ejemplo n.º 22
0
def test_mult_vec(kernel, benchmark):
    A = sps.random(100, 100, 0.1, format='csr')
    A = CSR.from_scipy(A)
    x = np.random.randn(100)

    # make sure it's compiled
    y = A.mult_vec(x)
    assert len(y) == A.nrows

    def op():
        A.mult_vec(x)

    benchmark(op)
Ejemplo n.º 23
0
def test_sps_to_csr(data, format):
    mat = data.draw(sparse_matrices(format=format))
    nr, nc = mat.shape
    sp_csr: sps.csr_matrix = mat.tocsr()

    csr = CSR.from_scipy(mat)

    assert csr.ncols == nc
    assert csr.nrows == nr
    assert csr.nnz == mat.nnz
    assert np.all(csr.rowptrs == sp_csr.indptr)
    assert np.all(csr.colinds == sp_csr.indices)
    assert np.all(csr.values == sp_csr.data)
Ejemplo n.º 24
0
def test_csr_set_values_undersize():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)

    v2 = np.arange(3, dtype=np.float_) + 5

    with raises(ValueError):
        csr.values = v2

    assert all(csr.values == vals)
Ejemplo n.º 25
0
def test_csr_from_sps_csr(smat, copy):
    "Test creating a CSR from a SciPy CSR matrix"
    csr = CSR.from_scipy(smat, copy=copy)
    assert csr.nnz == smat.nnz
    assert csr.nrows == smat.shape[0]
    assert csr.ncols == smat.shape[1]

    assert all(csr.rowptrs == smat.indptr)
    assert all(csr.colinds == smat.indices)
    assert all(csr.values == smat.data)
    assert isinstance(csr.rowptrs, np.ndarray)
    assert isinstance(csr.colinds, np.ndarray)
    if csr.nnz > 0:
        assert isinstance(csr.values, np.ndarray)
Ejemplo n.º 26
0
 def _normalize(self, rmat):
     rmat = rmat.to_scipy()
     # compute column norms
     norms = spla.norm(rmat, 2, axis=0)
     # and multiply by a diagonal to normalize columns
     recip_norms = norms.copy()
     is_nz = recip_norms > 0
     recip_norms[is_nz] = np.reciprocal(recip_norms[is_nz])
     norm_mat = rmat @ sps.diags(recip_norms)
     assert norm_mat.shape[1] == rmat.shape[1]
     # and reset NaN
     norm_mat.data[np.isnan(norm_mat.data)] = 0
     _logger.info('[%s] normalized rating matrix columns', self._timer)
     return CSR.from_scipy(norm_mat, False)
Ejemplo n.º 27
0
def test_csr_sparse_row():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)
    assert all(csr.row_cs(0) == np.array([1, 2], dtype=np.int32))
    assert all(csr.row_cs(1) == np.array([0], dtype=np.int32))
    assert all(csr.row_cs(2) == np.array([], dtype=np.int32))
    assert all(csr.row_cs(3) == np.array([1], dtype=np.int32))

    assert all(csr.row_vs(0) == np.array([0, 1], dtype=np.float_))
    assert all(csr.row_vs(1) == np.array([2], dtype=np.float_))
    assert all(csr.row_vs(2) == np.array([], dtype=np.float_))
    assert all(csr.row_vs(3) == np.array([3], dtype=np.float_))
Ejemplo n.º 28
0
def test_csr_row_nnzs(mat):
    nrows, ncols = mat.shape

    # sparsify the matrix
    mat[mat <= 0] = 0
    smat = sps.csr_matrix(mat)
    # make sure it's sparse
    assume(smat.nnz == np.sum(mat > 0))
    csr = CSR.from_scipy(smat)

    nnzs = csr.row_nnzs()
    assert nnzs.sum() == csr.nnz
    for i in range(nrows):
        row = mat[i, :]
        assert nnzs[i] == np.sum(row > 0)
Ejemplo n.º 29
0
    def _compute_similarities(self, rmat):
        trmat = rmat.transpose()
        nitems = trmat.nrows
        m_nbrs = self.save_nbrs
        if m_nbrs is None or m_nbrs < 0:
            m_nbrs = 0

        bounds = _make_blocks(nitems, 1000)
        _logger.info('[%s] splitting %d items (%d ratings) into %d blocks',
                     self._timer, nitems, trmat.nnz, len(bounds))
        blocks = [trmat.subset_rows(sp, ep) for (sp, ep) in bounds]

        _logger.info('[%s] computing similarities', self._timer)
        ptrs = List(bounds)
        nbs = List(blocks)
        if not nbs:
            # oops, this is the bad place
            # in non-JIT node, List doesn't actually make the list
            nbs = blocks
            ptrs = bounds
        s_blocks = _sim_blocks(trmat, nbs, ptrs, self.min_sim, m_nbrs)

        nnz = sum(b.nnz for b in s_blocks)
        tot_rows = sum(b.nrows for b in s_blocks)
        _logger.info('[%s] computed %d similarities for %d items in %d blocks',
                     self._timer, nnz, tot_rows, len(s_blocks))
        row_nnzs = np.concatenate([b.row_nnzs() for b in s_blocks])
        assert len(row_nnzs) == nitems, \
            'only have {} rows for {} items'.format(len(row_nnzs), nitems)

        smat = CSR.empty(nitems, nitems, row_nnzs)
        start = 0
        for bi, b in enumerate(s_blocks):
            bnr = b.nrows
            end = start + bnr
            v_sp = smat.rowptrs[start]
            v_ep = smat.rowptrs[end]
            _logger.debug('block %d (%d:%d) has %d entries, storing in %d:%d',
                          bi, start, end, b.nnz, v_sp, v_ep)
            smat.colinds[v_sp:v_ep] = b.colinds
            smat.values[v_sp:v_ep] = b.values
            start = end

        _logger.info('[%s] sorting similarity matrix with %d entries',
                     self._timer, smat.nnz)
        _sort_nbrs(smat)

        return smat
Ejemplo n.º 30
0
def test_subset_rows(data):
    nrows = data.draw(st.integers(5, 100))
    ncols = data.draw(st.integers(1, 100))
    dens = data.draw(st.floats(0, 1))
    beg = data.draw(st.integers(0, nrows - 1))
    end = data.draw(st.integers(beg, nrows - 1))

    spm = sps.random(nrows, ncols, dens, format='csr')
    csr = CSR.from_scipy(spm)

    m2 = csr.subset_rows(beg, end)
    assert m2.nrows == end - beg

    for i in range(m2.nrows):
        assert all(m2.row_cs(i) == csr.row_cs(beg + i))
        assert all(m2.row_vs(i) == csr.row_vs(beg + i))