def test_abundance_compare(): a = MinHash(20, 10, track_abundance=True) b = MinHash(20, 10, track_abundance=False) a.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA') b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA') assert a.compare(b) == 1.0 assert b.compare(b) == 1.0 assert b.compare(a) == 1.0 assert a.compare(a) == 1.0 # add same sequence again b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA') assert a.compare(b) == 1.0 assert b.compare(b) == 1.0 assert b.compare(a) == 1.0 assert a.compare(a) == 1.0 b.add_sequence('GATTGGTGCACACTTAACTGGGTGCCGCGCTGGTGCTGATCCATGAAGTT') x = a.compare(b) assert x >= 0.3, x x = b.compare(a) assert x >= 0.3, x assert a.compare(a) == 1.0 assert b.compare(b) == 1.0
def test_consume_lowercase(track_abundance): a = MinHash(20, 10, track_abundance=track_abundance) b = MinHash(20, 10, track_abundance=track_abundance) a.add_sequence( 'TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA'.lower()) b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA') assert a.compare(b) == 1.0 assert b.compare(b) == 1.0 assert b.compare(a) == 1.0 assert a.compare(a) == 1.0
def test_mh_asymmetric(track_abundance): a = MinHash(20, 10, track_abundance=track_abundance) for i in range(0, 40, 2): a.add_hash(i) # different size: 10 b = MinHash(10, 10, track_abundance=track_abundance) for i in range(0, 80, 4): b.add_hash(i) assert a.count_common(b) == 10 assert b.count_common(a) == 10 with pytest.raises(TypeError): a.compare(b) a = a.downsample_n(10) assert a.compare(b) == 0.5 assert b.compare(a) == 0.5
def test_mh_compare_diff_max_hash(track_abundance): a = MinHash(0, 5, track_abundance=track_abundance, max_hash=5) b = MinHash(0, 5, track_abundance=track_abundance, max_hash=10) with pytest.raises(ValueError): a.compare(b)
def test_mh_compare_diff_seed(track_abundance): a = MinHash(20, 5, track_abundance=track_abundance, seed=1) b = MinHash(20, 5, track_abundance=track_abundance, seed=2) with pytest.raises(ValueError): a.compare(b)
def test_mh_compare_diff_ksize(track_abundance): a = MinHash(20, 5, track_abundance=track_abundance) b = MinHash(20, 6, track_abundance=track_abundance) with pytest.raises(ValueError): a.compare(b)
def test_mh_compare_diff_protein(track_abundance): a = MinHash(20, 5, False, track_abundance=track_abundance) b = MinHash(20, 5, True, track_abundance=track_abundance) with pytest.raises(ValueError): a.compare(b)