Beispiel #1
0
def test_mh_count_common(track_abundance):
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    b = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    assert a.count_common(b) == 10
    assert b.count_common(a) == 10
Beispiel #2
0
def test_mh_count_common(track_abundance):
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    b = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    assert a.count_common(b) == 10
    assert b.count_common(a) == 10
Beispiel #3
0
def test_abundance_count_common():
    a = MinHash(20, 5, False, track_abundance=True)
    b = MinHash(20, 5, False, track_abundance=False)

    a.add_sequence('AAAAA')
    a.add_sequence('AAAAA')
    assert a.get_mins() == [2110480117637990133]
    assert a.get_mins(with_abundance=True) == {2110480117637990133: 2}

    b.add_sequence('AAAAA')
    b.add_sequence('GGGGG')
    assert a.count_common(b) == 1
    assert a.count_common(b) == b.count_common(a)

    assert b.get_mins(with_abundance=True) == [2110480117637990133,
                                               10798773792509008305]
Beispiel #4
0
def test_mh_asymmetric(track_abundance):
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    # different size: 10
    b = MinHash(10, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    assert a.count_common(b) == 10
    assert b.count_common(a) == 10

    with pytest.raises(TypeError):
        a.compare(b)

    a = a.downsample_n(10)
    assert a.compare(b) == 0.5
    assert b.compare(a) == 0.5
Beispiel #5
0
def test_mh_asymmetric(track_abundance):
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    # different size: 10
    b = MinHash(10, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    assert a.count_common(b) == 10
    assert b.count_common(a) == 10

    with pytest.raises(TypeError):
        a.compare(b)

    a = a.downsample_n(10)
    assert a.compare(b) == 0.5
    assert b.compare(a) == 0.5
Beispiel #6
0
def test_mh_jaccard_asymmetric_num(track_abundance):
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    # different size: 10
    b = MinHash(10, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    assert a.count_common(b) == 10
    assert b.count_common(a) == 10

    # with 'jaccard', this will raise an error b/c different num
    with pytest.raises(TypeError):
        a.jaccard(b)

    a = a.downsample_n(10)
    # CTB note: this used to be 'compare', is now 'jaccard'
    assert a.jaccard(b) == 0.5
    assert b.jaccard(a) == 0.5
Beispiel #7
0
def test_abundance_simple_2():
    a = MinHash(20, 5, False, track_abundance=True)
    b = MinHash(20, 5, False, track_abundance=True)

    a.add_sequence('AAAAA')
    assert a.get_mins() == [2110480117637990133]
    assert a.get_mins(with_abundance=True) == {2110480117637990133: 1}

    a.add_sequence('AAAAA')
    assert a.get_mins() == [2110480117637990133]
    assert a.get_mins(with_abundance=True) == {2110480117637990133: 2}

    b.add_sequence('AAAAA')
    assert a.count_common(b) == 1
Beispiel #8
0
def test_abundance_simple_2():
    a = MinHash(20, 5, False, track_abundance=True)
    b = MinHash(20, 5, False, track_abundance=True)

    a.add_sequence('AAAAA')
    assert a.get_mins() == [2110480117637990133]
    assert a.get_mins(with_abundance=True) == {2110480117637990133: 1}

    a.add_sequence('AAAAA')
    assert a.get_mins() == [2110480117637990133]
    assert a.get_mins(with_abundance=True) == {2110480117637990133: 2}

    b.add_sequence('AAAAA')
    assert a.count_common(b) == 1
Beispiel #9
0
def test_mh_count_common_notmh(track_abundance):
    a = MinHash(20, 5, track_abundance=track_abundance)
    b = set()

    with pytest.raises(TypeError):
        a.count_common(b)
Beispiel #10
0
def test_mh_count_common_diff_ksize(track_abundance):
    a = MinHash(20, 5, track_abundance=track_abundance)
    b = MinHash(20, 6, track_abundance=track_abundance)

    with pytest.raises(ValueError):
        a.count_common(b)
Beispiel #11
0
def test_mh_count_common_diff_seed(track_abundance):
    a = MinHash(20, 5, False, track_abundance=track_abundance, seed=1)
    b = MinHash(20, 5, True, track_abundance=track_abundance, seed=2)

    with pytest.raises(ValueError):
        a.count_common(b)
Beispiel #12
0
def test_mh_count_common_diff_maxhash(track_abundance):
    a = MinHash(0, 5, False, track_abundance=track_abundance, max_hash=1)
    b = MinHash(0, 5, True, track_abundance=track_abundance, max_hash=2)

    with pytest.raises(ValueError):
        a.count_common(b)
Beispiel #13
0
def test_mh_count_common_diff_ksize(track_abundance):
    a = MinHash(20, 5, track_abundance=track_abundance)
    b = MinHash(20, 6, track_abundance=track_abundance)

    with pytest.raises(ValueError):
        a.count_common(b)
Beispiel #14
0
def test_mh_count_common_diff_seed(track_abundance):
    a = MinHash(20, 5, False, track_abundance=track_abundance, seed=1)
    b = MinHash(20, 5, True, track_abundance=track_abundance, seed=2)

    with pytest.raises(ValueError):
        a.count_common(b)
Beispiel #15
0
def test_mh_count_common_diff_maxhash(track_abundance):
    a = MinHash(0, 5, False, track_abundance=track_abundance, max_hash=1)
    b = MinHash(0, 5, True, track_abundance=track_abundance, max_hash=2)

    with pytest.raises(ValueError):
        a.count_common(b)