Beispiel #1
0
def test_abund_similarity_zero():
    E1 = MinHash(n=5, ksize=20, track_abundance=True)
    E2 = MinHash(n=5, ksize=20, track_abundance=True)

    for i in [1]:
        E1.add_hash(i)

    assert E1.similarity(E2) == 0.0
Beispiel #2
0
def test_abund_similarity_zero():
    E1 = MinHash(n=5, ksize=20, track_abundance=True)
    E2 = MinHash(n=5, ksize=20, track_abundance=True)

    for i in [1]:
        E1.add_hash(i)

    assert E1.similarity(E2) == 0.0
Beispiel #3
0
def test_diff_seed(track_abundance):
    E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance, seed=1)
    E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance, seed=2)

    for i in [1, 2, 3, 4, 5]:
        E1.add_hash(i)
    for i in [1, 2, 3, 4, 6]:
        E2.add_hash(i)

    with pytest.raises(ValueError):
        E1.count_common(E2)
Beispiel #4
0
def test_common_1(track_abundance):
    E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance)
    E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance)

    for i in [1, 2, 3, 4, 5]:
        E1.add_hash(i)
    for i in [1, 2, 3, 4, 6]:
        E2.add_hash(i)

    assert E1.count_common(E2) == 4
    assert E2.count_common(E1) == 4
Beispiel #5
0
def test_diff_seed(track_abundance):
    E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance, seed=1)
    E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance, seed=2)

    for i in [1, 2, 3, 4, 5]:
        E1.add_hash(i)
    for i in [1, 2, 3, 4, 6]:
        E2.add_hash(i)

    with pytest.raises(ValueError):
        E1.count_common(E2)
Beispiel #6
0
def test_common_1(track_abundance):
    E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance)
    E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance)

    for i in [1, 2, 3, 4, 5]:
        E1.add_hash(i)
    for i in [1, 2, 3, 4, 6]:
        E2.add_hash(i)

    assert E1.count_common(E2) == 4
    assert E2.count_common(E1) == 4
Beispiel #7
0
def test_jaccard_2_difflen(track_abundance):
    E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance)
    E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance)

    for i in [1, 2, 3, 4, 5]:
        E1.add_hash(i)
    for i in [1, 2, 3, 4]:
        E2.add_hash(i)

    print(E1.jaccard(E2))
    assert round(E1.jaccard(E2), 2) == 4 / 5.0
    assert round(E2.jaccard(E1), 2) == 4 / 5.0
Beispiel #8
0
def test_jaccard_2_difflen(track_abundance):
    E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance)
    E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance)

    for i in [1, 2, 3, 4, 5]:
        E1.add_hash(i)
    for i in [1, 2, 3, 4]:
        E2.add_hash(i)

    print(E1.jaccard(E2))
    assert round(E1.jaccard(E2), 2) == 4 / 5.0
    assert round(E2.jaccard(E1), 2) == 4 / 5.0
Beispiel #9
0
def test_abund_similarity():
    E1 = MinHash(n=5, ksize=20, track_abundance=True)
    E2 = MinHash(n=5, ksize=20, track_abundance=True)

    for i in [1]:
        E1.add_hash(i)
    for i in [1, 2]:
        E2.add_hash(i)

    assert round(E1.similarity(E1)) == 1.0
    assert round(E1.similarity(E2), 2) == 0.5

    assert round(E1.similarity(E1, ignore_abundance=True)) == 1.0
    assert round(E1.similarity(E2, ignore_abundance=True), 2) == 0.5
Beispiel #10
0
def test_jaccard_1(track_abundance):
    E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance)
    E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance)

    for i in [1, 2, 3, 4, 5]:
        E1.add_hash(i)
    for i in [1, 2, 3, 4, 6]:
        E2.add_hash(i)

    # here the union is [1, 2, 3, 4, 5]
    # and the intesection is [1, 2, 3, 4] => 4/5.

    assert round(E1.jaccard(E2), 2) == round(4 / 5.0, 2)
    assert round(E2.jaccard(E1), 2) == round(4 / 5.0, 2)
Beispiel #11
0
def test_jaccard_1(track_abundance):
    E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance)
    E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance)

    for i in [1, 2, 3, 4, 5]:
        E1.add_hash(i)
    for i in [1, 2, 3, 4, 6]:
        E2.add_hash(i)

    # here the union is [1, 2, 3, 4, 5]
    # and the intesection is [1, 2, 3, 4] => 4/5.

    assert round(E1.jaccard(E2), 2) == round(4 / 5.0, 2)
    assert round(E2.jaccard(E1), 2) == round(4 / 5.0, 2)
Beispiel #12
0
def test_abund_similarity():
    E1 = MinHash(n=5, ksize=20, track_abundance=True)
    E2 = MinHash(n=5, ksize=20, track_abundance=True)

    for i in [1]:
        E1.add_hash(i)
    for i in [1, 2]:
        E2.add_hash(i)

    assert round(E1.similarity(E1)) == 1.0
    assert round(E1.similarity(E2), 2) == 0.5

    assert round(E1.similarity(E1, ignore_abundance=True)) == 1.0
    assert round(E1.similarity(E2, ignore_abundance=True), 2) == 0.5