Esempio n. 1
0
def test_kvp_add_several():
    ks = np.full(10, -1, dtype=np.int32)
    vs = np.zeros(10)

    n = 0

    for k in range(10):
        v = np.random.randn()
        n = kvp_minheap_insert(0, n, 10, k, v, ks, vs)

    assert n == 10
    # all the keys
    assert all(ks >= 0)
    assert all(np.sort(ks) == list(range(10)))
    # value is the smallest
    assert vs[0] == np.min(vs)

    # it rejects a smaller value; -100 is extremely unlikely
    n2 = kvp_minheap_insert(0, n, 10, 50, -100.0, ks, vs)

    assert n2 == n
    assert all(ks != 50)
    assert all(vs > -100.0)

    # it inserts a larger value; all positive is extremely unlikely
    old_mk = ks[0]
    old_mv = vs[0]
    n2 = kvp_minheap_insert(0, n, 10, 50, 0.0, ks, vs)

    assert n2 == n
    assert all(ks != old_mk)
    assert all(vs > old_mv)
    assert np.count_nonzero(ks == 50) == 1
Esempio n. 2
0
def test_kvp_add_smaller():
    ks = np.empty(10, dtype=np.int32)
    vs = np.empty(10)

    # insert an item
    n = kvp_minheap_insert(0, 0, 10, 5, 3.0, ks, vs)
    n = kvp_minheap_insert(0, n, 10, 1, 1.0, ks, vs)

    # ep has moved
    assert n == 2

    # data is there
    assert all(ks[:2] == [1, 5])
    assert all(vs[:2] == [1.0, 3.0])
Esempio n. 3
0
def _make_sim_block(nitems, bsp, bitems, r_sp, r_ep, r_cs, r_vs, min_sim, max_nbrs):
    # pass 1: compute the size of each row
    sizes = np.zeros(bitems, np.int32)
    for i in range(nitems):
        for j in range(r_sp[i], r_ep[i]):
            # we accept the neighbor if it passes threshold and isn't a self-similarity
            r = r_cs[j]
            if i != bsp + r and r_vs[j] >= min_sim:
                sizes[r] += 1

    if max_nbrs > 0:
        for i in range(bitems):
            if sizes[i] > max_nbrs:
                sizes[i] = max_nbrs

    # if bnc == 0:
    #     # empty resulting matrix, oops
    #     return _empty_csr(bitems, nitems, np.zeros(bitems, np.int32))

    # allocate a matrix
    block_csr = _empty_csr(bitems, nitems, sizes)

    # pass 2: truncate each row into the matrix
    eps = block_csr.rowptrs[:-1].copy()
    for c in range(nitems):
        for j in range(r_sp[c], r_ep[c]):
            v = r_vs[j]
            r = r_cs[j]
            sp, lep = block_csr.row_extent(r)
            lim = lep - sp
            if c != bsp + r and v >= min_sim:
                eps[r] = kvp_minheap_insert(sp, eps[r], lim, c, v,
                                            block_csr.colinds, block_csr.values)
        # we're done!
    return block_csr
Esempio n. 4
0
def test_kvp_add_middle():
    ks = np.full(100, -1, dtype=np.int32)
    vs = np.full(100, np.nan)

    n = 25
    avs = []

    for k in range(25):
        v = np.random.randn()
        avs.append(v)
        n = kvp_minheap_insert(25, n, 10, k, v, ks, vs)

    assert n == 35
    # all the keys
    assert all(ks[25:35] >= 0)
    # value is the smallest
    assert vs[25] == np.min(vs[25:35])
    # highest-ranked keys
    assert all(np.sort(vs[25:35]) == np.sort(avs)[15:])

    # early is untouched
    assert all(ks[:25] == -1)
    assert all(np.isnan(vs[:25]))
    assert all(ks[35:] == -1)
    assert all(np.isnan(vs[35:]))
Esempio n. 5
0
def test_kvp_add_middle(data):
    "Test that KVP works in the middle of an array."
    ks = np.full(100, -1, dtype=np.int32)
    vs = np.full(100, np.nan)

    n = 25
    avs = []

    values = st.floats(-100, 100)
    for k in range(25):
        v = data.draw(values)
        avs.append(v)
        n = kvp_minheap_insert(25, n, 10, k, v, ks, vs)

    assert n == 35
    # all the keys
    assert all(ks[25:35] >= 0)
    # value is the smallest
    assert vs[25] == np.min(vs[25:35])
    # highest-ranked keys
    assert all(np.sort(vs[25:35]) == np.sort(avs)[15:])

    # early is untouched
    assert all(ks[:25] == -1)
    assert all(np.isnan(vs[:25]))
    assert all(ks[35:] == -1)
    assert all(np.isnan(vs[35:]))
Esempio n. 6
0
def test_kvp_add_several(kvp_len, data):
    "Test filling up a KVP."
    ks = np.full(kvp_len, -1, dtype=np.int32)
    vs = np.zeros(kvp_len)

    n = 0

    values = st.floats(-100, 100)

    for k in range(kvp_len):
        v = data.draw(values)
        assume(v not in vs[:n])  # we can't keep drawing the same value
        n = kvp_minheap_insert(0, n, kvp_len, k, v, ks, vs)

    assert n == kvp_len
    # all key slots are used
    assert all(ks >= 0)
    # all keys are there
    assert all(np.sort(ks) == list(range(kvp_len)))
    # value is the smallest
    assert vs[0] == np.min(vs)

    # it rejects a smaller value; -10000 is below our min value
    special_k = 500
    n2 = kvp_minheap_insert(0, n, kvp_len, special_k, -10000.0, ks, vs)

    assert n2 == n
    assert all(ks != special_k)
    assert all(vs > -100.0)

    # it inserts a larger value somewhere
    old_mk = ks[0]
    old_mv = vs[0]
    assume(np.median(vs) < 40)
    nv = data.draw(st.floats(np.median(vs), 50))
    n2 = kvp_minheap_insert(0, n, kvp_len, special_k, nv, ks, vs)

    assert n2 == n
    # the old value minimum key has been removed
    assert all(ks != old_mk)
    # the old minimum value has been removed
    assert all(vs > old_mv)
    assert np.count_nonzero(ks == special_k) == 1
Esempio n. 7
0
def test_kvp_add_to_empty():
    ks = np.empty(10, dtype=np.int32)
    vs = np.empty(10)

    # insert an item
    n = kvp_minheap_insert(0, 0, 10, 5, 3.0, ks, vs)

    # ep has moved
    assert n == 1

    # item is there
    assert ks[0] == 5
    assert vs[0] == 3.0
Esempio n. 8
0
def test_kvp_add_several():
    kvp_len = 50
    ks = np.full(kvp_len, -1, dtype=np.int32)
    vs = np.zeros(kvp_len)

    n = 0

    for k in range(kvp_len):
        v = np.random.randn()
        n = kvp_minheap_insert(0, n, kvp_len, k, v, ks, vs)

    assert n == kvp_len
    # all key slots are used
    assert all(ks >= 0)
    # all keys are there
    assert all(np.sort(ks) == list(range(kvp_len)))
    # value is the smallest
    assert vs[0] == np.min(vs)

    # it rejects a smaller value; -10000 is extremely unlikely
    special_k = 500
    n2 = kvp_minheap_insert(0, n, kvp_len, special_k, -10000.0, ks, vs)

    assert n2 == n
    assert all(ks != special_k)
    assert all(vs > -100.0)

    # it inserts a larger value somewhere; all positive is extremely unlikely
    old_mk = ks[0]
    old_mv = vs[0]
    n2 = kvp_minheap_insert(0, n, kvp_len, special_k, 0.0, ks, vs)

    assert n2 == n
    # the old value minimum key has been removed
    assert all(ks != old_mk)
    # the old minimum value has been removed
    assert all(vs > old_mv)
    assert np.count_nonzero(ks == special_k) == 1
Esempio n. 9
0
def test_kvp_insert_min():
    ks = np.full(10, -1, dtype=np.int32)
    vs = np.zeros(10)

    n = 0

    # something less than existing data
    n = kvp_minheap_insert(0, n, 10, 5, -3.0, ks, vs)
    assert n == 1
    assert ks[0] == 5
    assert vs[0] == -3.0

    # equal to existing data
    n = kvp_minheap_insert(0, 0, 10, 7, -3.0, ks, vs)
    assert n == 1
    assert ks[0] == 7
    assert vs[0] == -3.0

    # greater than to existing data
    n = kvp_minheap_insert(0, 0, 10, 9, 5.0, ks, vs)
    assert n == 1
    assert ks[0] == 9
    assert vs[0] == 5.0
Esempio n. 10
0
def test_kvp_sort():
    ks = np.full(10, -1, dtype=np.int32)
    vs = np.zeros(10)

    n = 0

    for k in range(20):
        v = np.random.randn()
        n = kvp_minheap_insert(0, n, 10, k, v, ks, vs)

    assert n == 10

    ovs = vs.copy()
    oks = ks.copy()
    ord = np.argsort(ovs)
    ord = ord[::-1]

    kvp_minheap_sort(0, n, ks, vs)
    assert vs[0] == np.max(ovs)
    assert vs[-1] == np.min(ovs)
    assert all(ks == oks[ord])
    assert all(vs == ovs[ord])
Esempio n. 11
0
def test_kvp_sort(values):
    "Test that sorting logic works"
    ks = np.full(10, -1, dtype=np.int32)
    vs = np.zeros(10)

    n = 0

    for k in range(20):
        v = values[k]
        n = kvp_minheap_insert(0, n, 10, k, v, ks, vs)

    assert n == 10

    ovs = vs.copy()
    oks = ks.copy()
    ord = np.argsort(ovs)
    ord = ord[::-1]

    kvp_minheap_sort(0, n, ks, vs)
    assert vs[0] == np.max(ovs)
    assert vs[-1] == np.min(ovs)
    assert all(ks == oks[ord])
    assert all(vs == ovs[ord])
Esempio n. 12
0
def _insert(dst, used, limits, i, c, v):
    "Insert one item into a heap"
    sp = dst.rowptrs[i]
    ep = sp + used[i]
    ep = kvp_minheap_insert(sp, ep, limits[i], c, v, dst.colinds, dst.values)
    used[i] = ep - sp
Esempio n. 13
0
def _sim_block(inb, rmh, min_sim, max_nbrs, nitems):
    "Compute a single block of the similarity matrix"
    rmat, bsp, bep = inb
    # assert rmat.nrows == bep - bsp

    with objmode():
        _logger.debug('processing block %d:%d (%d nnz)', bsp, bep, rmat.nnz)

    if rmat.nnz == 0:
        return _empty_csr(rmat.nrows, nitems, np.zeros(rmat.nrows, np.int32))

    # create a matrix handle for the subset matrix
    amh = _mkl_ops._from_csr(rmat)
    _lk_mkl_spopt(amh)

    smh = _lk_mkl_spmabt(rmh, amh)

    _lk_mkl_spfree(amh)

    _lk_mkl_sporder(smh)  # for reproducibility

    block = _lk_mkl_spexport_p(smh)
    bnr = _lk_mkl_spe_nrows(block)
    bnc = _lk_mkl_spe_ncols(block)
    # bnr and bnc should be right
    # assert bnc == bep - bsp

    r_sp = _lk_mkl_spe_row_sp(block)
    r_ep = _lk_mkl_spe_row_ep(block)
    r_cs = _lk_mkl_spe_colinds(block)
    r_vs = _lk_mkl_spe_values(block)

    # pass 1: compute the size of each row
    sizes = np.zeros(rmat.nrows, np.int32)
    for i in range(bnr):
        for j in range(r_sp[i], r_ep[i]):
            # we accept the neighbor if it passes threshold and isn't a self-similarity
            r = r_cs[j]
            if i != bsp + r and r_vs[j] >= min_sim:
                sizes[r] += 1

    if max_nbrs > 0:
        for i in range(rmat.nrows):
            if sizes[i] > max_nbrs:
                sizes[i] = max_nbrs

    if bnc == 0:
        # empty resulting matrix, oops
        return _empty_csr(rmat.nrows, nitems, np.zeros(rmat.nrows, np.int32))

    # allocate a matrix
    block_csr = _empty_csr(bnc, bnr, sizes)

    # pass 2: truncate each row into the matrix
    eps = block_csr.rowptrs[:-1].copy()
    for c in range(bnr):
        for j in range(r_sp[c], r_ep[c]):
            v = r_vs[j]
            r = r_cs[j]
            sp, lep = block_csr.row_extent(r)
            lim = lep - sp
            if c != bsp + r and v >= min_sim:
                eps[r] = kvp_minheap_insert(sp, eps[r], lim, c, v,
                                            block_csr.colinds,
                                            block_csr.values)
        # we're done!
        # assert lim == ep - sp

    _lk_mkl_spe_free(block)
    _lk_mkl_spfree(smh)
    return block_csr