Beispiel #1
0
def test_cosine_convertable_dtype():
    assert allclose(
        truncated_sparse_similarity(a1.astype('object'),
                                    metric='cosine',
                                    thresh=0,
                                    diag_value=None,
                                    n_jobs=1).todense(), expected_cosine_sim)
Beispiel #2
0
def test_hamming_no_thresh_dense():
    assert allclose(
        truncated_sparse_similarity(a2,
                                    metric='hamming',
                                    thresh=0,
                                    diag_value=0,
                                    n_jobs=1).todense(), expected_hamming_sim)
Beispiel #3
0
def test_cosine_no_thresh():
    assert allclose(
        truncated_sparse_similarity(a1,
                                    metric='cosine',
                                    thresh=0,
                                    diag_value=None,
                                    n_jobs=1).todense(), expected_cosine_sim)
Beispiel #4
0
def test_sim_sparsity():
    assert issparse(
        truncated_sparse_similarity(a1,
                                    metric='cosine',
                                    thresh=0.9,
                                    diag_value=0,
                                    n_jobs=1))
Beispiel #5
0
def test_cosine_thresh():
    sim_thresh = 0.9
    expected = expected_cosine_sim.copy()
    expected[expected < sim_thresh] = 0

    assert allclose(
        truncated_sparse_similarity(a1,
                                    metric='cosine',
                                    thresh=sim_thresh,
                                    diag_value=None,
                                    n_jobs=1).todense(), expected)
Beispiel #6
0
def test_cosine_parallel():
    if HAS_JOBLIB:
        assert allclose(
            truncated_sparse_similarity(a1,
                                        metric='cosine',
                                        thresh=0,
                                        diag_value=None,
                                        n_jobs=-1).todense(),
            expected_cosine_sim)
    else:
        print('Could not find a Joblib instalation, skipping test')
Beispiel #7
0
def test_hamming_thresh_dense():
    sim_thresh = 0.75
    expected = expected_hamming_sim.copy()
    expected[expected < sim_thresh] = 0

    if HAS_JOBLIB:
        n_cpu = -1
    else:
        n_cpu = 1

    assert allclose(
        truncated_sparse_similarity(a2,
                                    metric='hamming',
                                    thresh=sim_thresh,
                                    diag_value=0,
                                    n_jobs=n_cpu).todense(), expected)
sk_time = []
pss_row_time = []
pss_block_time = []

for r in n_rows:
    m = normal(0, 1, (r, n_features))
    start_time = timeit.default_timer()
    sk = cosine_similarity(m, dense_output=False)
    sk[sk < sim_thresh] = 0
    sk = csr_matrix(sk)
    sk.setdiag(0)
    sk.eliminate_zeros()
    sk_time.append(timeit.default_timer() - start_time)
    start_time = timeit.default_timer()
    pss_row = truncated_sparse_similarity(m,
                                          metric='cosine',
                                          thresh=sim_thresh,
                                          block_size=1)
    pss_row_time.append(timeit.default_timer() - start_time)
    start_time = timeit.default_timer()
    pss_block = truncated_sparse_similarity(m,
                                            metric='cosine',
                                            thresh=sim_thresh,
                                            block_size=1000)
    pss_block_time.append(timeit.default_timer() - start_time)
    print('END {} rows'.format(r))

plt.plot(n_rows,
         sk_time,
         color='black',
         linestyle='dashed',
         linewidth=2,
Beispiel #9
0
def test_cosine_bad_dtype():
    with pytest.raises(TypeError):
        truncated_sparse_similarity(array([lambda x: x, lambda y: y]))
Beispiel #10
0
def test_sim_wrong_metric():
    with pytest.raises(ValueError):
        truncated_sparse_similarity(a1, metric=None, n_jobs=1)