Example #1
0
def test_cos_distance_using_scipy_2():
    '''Test the cos distance calculation against scipy
       (must be installed for this test)'''
    import scipy.spatial.distance
    rows = generate_test_set(100, 1000, 1)
    for i, j, sr in sparse_dot.cos_distance_using_sparse(rows):
        assert np.isclose(sr, scipy.spatial.distance.cosine(rows[i], rows[j]))
Example #2
0
def test_cos_distance_using_scipy_1():
    '''Test the cos distance calculation against scipy
       (must be installed for this test)'''
    import scipy.spatial.distance
    a, b = generate_test_set(2, 1000, 1)
    assert np.isclose(
        scipy.spatial.distance.cosine(a, b),
        sparse_dot.cos_distance_using_sparse([a, b])['sparse_result'][0])
Example #3
0
def test_cos_similarity_using_scipy_1():
    '''Test the cos similarity calculation against scipy
       (must be installed for this test)'''
    import scipy.sparse
    n_rows = 100
    rows = generate_test_set(n_rows, 1000, 1)
    csr = scipy.sparse.csr_matrix(rows)
    res = sparse_dot.cos_similarity_using_sparse(rows)
    res_coo = scipy.sparse.coo_matrix(
        (res['sparse_result'], (res['i'], res['j'])), shape=(n_rows, n_rows))
    sparse_cos_sim = sparse_dot.sparse_cosine_similarity(csr)
    sparse_cos_sim_b = sparse_dot.sparse_cosine_similarity_b(csr)

    assert np.all(
        np.isclose(np.triu(res_coo.toarray(), 1),
                   np.triu(sparse_cos_sim.toarray()), 1))
    assert np.all(
        np.isclose(np.triu(res_coo.toarray(), 1),
                   np.triu(sparse_cos_sim_b.toarray()), 1))
Example #4
0
def test_sparse_dot_100_100_0p1():
    assert is_naive_same(generate_test_set(100, 100, 0.1))
Example #5
0
def test_sparse_dot_basic_100_1():
    assert dot_equal_basic(*generate_test_set(2, 100, 1))
Example #6
0

if __name__ == '__main__':
    test_saf_list_to_csr_matrix()
    test_cos_similarity_using_scipy_1()
    test_validate_saf_1()
    test_validate_saf_2()
    test_sparse_dot_full_validation_1()
    test_sparse_dot_full_validation_2
    test_sparse_dot_simple()
    test_sparse_dot_basic_100()
    test_sparse_dot_basic_100_1()
    test_sparse_dot_10_100_1()
    test_sparse_dot_100_100_0p1()

    is_naive_same(generate_test_set(100, 100, 0.1), print_time=True)
    is_naive_same(generate_test_set(1000, 1000, 0.1), print_time=True)

    test_cos_distance_using_scipy_1()
    test_cos_distance_using_scipy_2()

    print(run_timing_test_v1_1000_1000_0p1())
    print(run_timing_test_1000_1000_100000())
    print(run_timing_test_vs_csr_1000_1000_100000())
    print(run_timing_test_vs_csr_and_coo_1000_1000_100000())

    # These are all run in the benchmarks instead:
    #print run_timing_test_v1_10000_10000_0p1() # ~100s
    #print run_timing_test_10000_10000_10000000() # ~100s
    #print run_timing_test_1000_20000_10000000() # 10s
    #print run_timing_test_5000_20000_10000() # LOL, only 0.1s to run but 8s to generate the initial data :-P