def test_jaccard_2_difflen(track_abundance): E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance) E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance) for i in [1, 2, 3, 4, 5]: E1.add_hash(i) for i in [1, 2, 3, 4]: E2.add_hash(i) print(E1.jaccard(E2)) assert round(E1.jaccard(E2), 2) == 4 / 5.0 assert round(E2.jaccard(E1), 2) == 4 / 5.0
def test_jaccard_1(track_abundance): E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance) E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance) for i in [1, 2, 3, 4, 5]: E1.add_hash(i) for i in [1, 2, 3, 4, 6]: E2.add_hash(i) # here the union is [1, 2, 3, 4, 5] # and the intesection is [1, 2, 3, 4] => 4/5. assert round(E1.jaccard(E2), 2) == round(4 / 5.0, 2) assert round(E2.jaccard(E1), 2) == round(4 / 5.0, 2)