def test_kvp_add_several(): ks = np.full(10, -1, dtype=np.int32) vs = np.zeros(10) n = 0 for k in range(10): v = np.random.randn() n = kvp_minheap_insert(0, n, 10, k, v, ks, vs) assert n == 10 # all the keys assert all(ks >= 0) assert all(np.sort(ks) == list(range(10))) # value is the smallest assert vs[0] == np.min(vs) # it rejects a smaller value; -100 is extremely unlikely n2 = kvp_minheap_insert(0, n, 10, 50, -100.0, ks, vs) assert n2 == n assert all(ks != 50) assert all(vs > -100.0) # it inserts a larger value; all positive is extremely unlikely old_mk = ks[0] old_mv = vs[0] n2 = kvp_minheap_insert(0, n, 10, 50, 0.0, ks, vs) assert n2 == n assert all(ks != old_mk) assert all(vs > old_mv) assert np.count_nonzero(ks == 50) == 1
def test_kvp_add_smaller(): ks = np.empty(10, dtype=np.int32) vs = np.empty(10) # insert an item n = kvp_minheap_insert(0, 0, 10, 5, 3.0, ks, vs) n = kvp_minheap_insert(0, n, 10, 1, 1.0, ks, vs) # ep has moved assert n == 2 # data is there assert all(ks[:2] == [1, 5]) assert all(vs[:2] == [1.0, 3.0])
def _make_sim_block(nitems, bsp, bitems, r_sp, r_ep, r_cs, r_vs, min_sim, max_nbrs): # pass 1: compute the size of each row sizes = np.zeros(bitems, np.int32) for i in range(nitems): for j in range(r_sp[i], r_ep[i]): # we accept the neighbor if it passes threshold and isn't a self-similarity r = r_cs[j] if i != bsp + r and r_vs[j] >= min_sim: sizes[r] += 1 if max_nbrs > 0: for i in range(bitems): if sizes[i] > max_nbrs: sizes[i] = max_nbrs # if bnc == 0: # # empty resulting matrix, oops # return _empty_csr(bitems, nitems, np.zeros(bitems, np.int32)) # allocate a matrix block_csr = _empty_csr(bitems, nitems, sizes) # pass 2: truncate each row into the matrix eps = block_csr.rowptrs[:-1].copy() for c in range(nitems): for j in range(r_sp[c], r_ep[c]): v = r_vs[j] r = r_cs[j] sp, lep = block_csr.row_extent(r) lim = lep - sp if c != bsp + r and v >= min_sim: eps[r] = kvp_minheap_insert(sp, eps[r], lim, c, v, block_csr.colinds, block_csr.values) # we're done! return block_csr
def test_kvp_add_middle(): ks = np.full(100, -1, dtype=np.int32) vs = np.full(100, np.nan) n = 25 avs = [] for k in range(25): v = np.random.randn() avs.append(v) n = kvp_minheap_insert(25, n, 10, k, v, ks, vs) assert n == 35 # all the keys assert all(ks[25:35] >= 0) # value is the smallest assert vs[25] == np.min(vs[25:35]) # highest-ranked keys assert all(np.sort(vs[25:35]) == np.sort(avs)[15:]) # early is untouched assert all(ks[:25] == -1) assert all(np.isnan(vs[:25])) assert all(ks[35:] == -1) assert all(np.isnan(vs[35:]))
def test_kvp_add_middle(data): "Test that KVP works in the middle of an array." ks = np.full(100, -1, dtype=np.int32) vs = np.full(100, np.nan) n = 25 avs = [] values = st.floats(-100, 100) for k in range(25): v = data.draw(values) avs.append(v) n = kvp_minheap_insert(25, n, 10, k, v, ks, vs) assert n == 35 # all the keys assert all(ks[25:35] >= 0) # value is the smallest assert vs[25] == np.min(vs[25:35]) # highest-ranked keys assert all(np.sort(vs[25:35]) == np.sort(avs)[15:]) # early is untouched assert all(ks[:25] == -1) assert all(np.isnan(vs[:25])) assert all(ks[35:] == -1) assert all(np.isnan(vs[35:]))
def test_kvp_add_several(kvp_len, data): "Test filling up a KVP." ks = np.full(kvp_len, -1, dtype=np.int32) vs = np.zeros(kvp_len) n = 0 values = st.floats(-100, 100) for k in range(kvp_len): v = data.draw(values) assume(v not in vs[:n]) # we can't keep drawing the same value n = kvp_minheap_insert(0, n, kvp_len, k, v, ks, vs) assert n == kvp_len # all key slots are used assert all(ks >= 0) # all keys are there assert all(np.sort(ks) == list(range(kvp_len))) # value is the smallest assert vs[0] == np.min(vs) # it rejects a smaller value; -10000 is below our min value special_k = 500 n2 = kvp_minheap_insert(0, n, kvp_len, special_k, -10000.0, ks, vs) assert n2 == n assert all(ks != special_k) assert all(vs > -100.0) # it inserts a larger value somewhere old_mk = ks[0] old_mv = vs[0] assume(np.median(vs) < 40) nv = data.draw(st.floats(np.median(vs), 50)) n2 = kvp_minheap_insert(0, n, kvp_len, special_k, nv, ks, vs) assert n2 == n # the old value minimum key has been removed assert all(ks != old_mk) # the old minimum value has been removed assert all(vs > old_mv) assert np.count_nonzero(ks == special_k) == 1
def test_kvp_add_to_empty(): ks = np.empty(10, dtype=np.int32) vs = np.empty(10) # insert an item n = kvp_minheap_insert(0, 0, 10, 5, 3.0, ks, vs) # ep has moved assert n == 1 # item is there assert ks[0] == 5 assert vs[0] == 3.0
def test_kvp_add_several(): kvp_len = 50 ks = np.full(kvp_len, -1, dtype=np.int32) vs = np.zeros(kvp_len) n = 0 for k in range(kvp_len): v = np.random.randn() n = kvp_minheap_insert(0, n, kvp_len, k, v, ks, vs) assert n == kvp_len # all key slots are used assert all(ks >= 0) # all keys are there assert all(np.sort(ks) == list(range(kvp_len))) # value is the smallest assert vs[0] == np.min(vs) # it rejects a smaller value; -10000 is extremely unlikely special_k = 500 n2 = kvp_minheap_insert(0, n, kvp_len, special_k, -10000.0, ks, vs) assert n2 == n assert all(ks != special_k) assert all(vs > -100.0) # it inserts a larger value somewhere; all positive is extremely unlikely old_mk = ks[0] old_mv = vs[0] n2 = kvp_minheap_insert(0, n, kvp_len, special_k, 0.0, ks, vs) assert n2 == n # the old value minimum key has been removed assert all(ks != old_mk) # the old minimum value has been removed assert all(vs > old_mv) assert np.count_nonzero(ks == special_k) == 1
def test_kvp_insert_min(): ks = np.full(10, -1, dtype=np.int32) vs = np.zeros(10) n = 0 # something less than existing data n = kvp_minheap_insert(0, n, 10, 5, -3.0, ks, vs) assert n == 1 assert ks[0] == 5 assert vs[0] == -3.0 # equal to existing data n = kvp_minheap_insert(0, 0, 10, 7, -3.0, ks, vs) assert n == 1 assert ks[0] == 7 assert vs[0] == -3.0 # greater than to existing data n = kvp_minheap_insert(0, 0, 10, 9, 5.0, ks, vs) assert n == 1 assert ks[0] == 9 assert vs[0] == 5.0
def test_kvp_sort(): ks = np.full(10, -1, dtype=np.int32) vs = np.zeros(10) n = 0 for k in range(20): v = np.random.randn() n = kvp_minheap_insert(0, n, 10, k, v, ks, vs) assert n == 10 ovs = vs.copy() oks = ks.copy() ord = np.argsort(ovs) ord = ord[::-1] kvp_minheap_sort(0, n, ks, vs) assert vs[0] == np.max(ovs) assert vs[-1] == np.min(ovs) assert all(ks == oks[ord]) assert all(vs == ovs[ord])
def test_kvp_sort(values): "Test that sorting logic works" ks = np.full(10, -1, dtype=np.int32) vs = np.zeros(10) n = 0 for k in range(20): v = values[k] n = kvp_minheap_insert(0, n, 10, k, v, ks, vs) assert n == 10 ovs = vs.copy() oks = ks.copy() ord = np.argsort(ovs) ord = ord[::-1] kvp_minheap_sort(0, n, ks, vs) assert vs[0] == np.max(ovs) assert vs[-1] == np.min(ovs) assert all(ks == oks[ord]) assert all(vs == ovs[ord])
def _insert(dst, used, limits, i, c, v): "Insert one item into a heap" sp = dst.rowptrs[i] ep = sp + used[i] ep = kvp_minheap_insert(sp, ep, limits[i], c, v, dst.colinds, dst.values) used[i] = ep - sp
def _sim_block(inb, rmh, min_sim, max_nbrs, nitems): "Compute a single block of the similarity matrix" rmat, bsp, bep = inb # assert rmat.nrows == bep - bsp with objmode(): _logger.debug('processing block %d:%d (%d nnz)', bsp, bep, rmat.nnz) if rmat.nnz == 0: return _empty_csr(rmat.nrows, nitems, np.zeros(rmat.nrows, np.int32)) # create a matrix handle for the subset matrix amh = _mkl_ops._from_csr(rmat) _lk_mkl_spopt(amh) smh = _lk_mkl_spmabt(rmh, amh) _lk_mkl_spfree(amh) _lk_mkl_sporder(smh) # for reproducibility block = _lk_mkl_spexport_p(smh) bnr = _lk_mkl_spe_nrows(block) bnc = _lk_mkl_spe_ncols(block) # bnr and bnc should be right # assert bnc == bep - bsp r_sp = _lk_mkl_spe_row_sp(block) r_ep = _lk_mkl_spe_row_ep(block) r_cs = _lk_mkl_spe_colinds(block) r_vs = _lk_mkl_spe_values(block) # pass 1: compute the size of each row sizes = np.zeros(rmat.nrows, np.int32) for i in range(bnr): for j in range(r_sp[i], r_ep[i]): # we accept the neighbor if it passes threshold and isn't a self-similarity r = r_cs[j] if i != bsp + r and r_vs[j] >= min_sim: sizes[r] += 1 if max_nbrs > 0: for i in range(rmat.nrows): if sizes[i] > max_nbrs: sizes[i] = max_nbrs if bnc == 0: # empty resulting matrix, oops return _empty_csr(rmat.nrows, nitems, np.zeros(rmat.nrows, np.int32)) # allocate a matrix block_csr = _empty_csr(bnc, bnr, sizes) # pass 2: truncate each row into the matrix eps = block_csr.rowptrs[:-1].copy() for c in range(bnr): for j in range(r_sp[c], r_ep[c]): v = r_vs[j] r = r_cs[j] sp, lep = block_csr.row_extent(r) lim = lep - sp if c != bsp + r and v >= min_sim: eps[r] = kvp_minheap_insert(sp, eps[r], lim, c, v, block_csr.colinds, block_csr.values) # we're done! # assert lim == ep - sp _lk_mkl_spe_free(block) _lk_mkl_spfree(smh) return block_csr