Beispiel #1
0
def test_similarity_downsample(track_abundance):
    e = sourmash.MinHash(n=0,
                         ksize=20,
                         track_abundance=track_abundance,
                         max_hash=2**63)
    f = sourmash.MinHash(n=0,
                         ksize=20,
                         track_abundance=track_abundance,
                         max_hash=2**2)

    e.add_hash(1)
    e.add_hash(5)
    assert len(e.get_mins()) == 2

    f.add_hash(1)
    f.add_hash(5)  # should be discarded due to max_hash
    assert len(f.get_mins()) == 1

    ee = SourmashSignature(e)
    ff = SourmashSignature(f)

    with pytest.raises(ValueError):  # mismatch in max_hash
        ee.similarity(ff)

    x = ee.similarity(ff, downsample=True)
    assert round(x, 1) == 1.0
Beispiel #2
0
def test_roundtrip(track_abundance):
    e = sourmash.MinHash(n=1, ksize=20, track_abundance=track_abundance)
    e.add("AT" * 10)
    sig = SourmashSignature(e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.minhash

    assert sig.similarity(sig2) == 1.0
    assert sig2.similarity(sig) == 1.0
Beispiel #3
0
def test_roundtrip(track_abundance):
    e = sourmash.MinHash(n=1, ksize=20, track_abundance=track_abundance)
    e.add("AT" * 10)
    sig = SourmashSignature(e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.minhash

    assert sig.similarity(sig2) == 1.0
    assert sig2.similarity(sig) == 1.0
Beispiel #4
0
def test_roundtrip_empty(track_abundance):
    # edge case, but: empty minhash? :)
    e = sourmash.MinHash(n=1, ksize=20, track_abundance=track_abundance)

    sig = SourmashSignature(e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.minhash

    assert sig.similarity(sig2) == 0
    assert sig2.similarity(sig) == 0
Beispiel #5
0
def test_roundtrip_empty(track_abundance):
    # edge case, but: empty minhash? :)
    e = sourmash.MinHash(n=1, ksize=20, track_abundance=track_abundance)

    sig = SourmashSignature(e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.minhash

    assert sig.similarity(sig2) == 0
    assert sig2.similarity(sig) == 0
Beispiel #6
0
def test_similarity_downsample(track_abundance):
    e = sourmash.MinHash(n=0, ksize=20, track_abundance=track_abundance,
                             max_hash=2**63)
    f = sourmash.MinHash(n=0, ksize=20, track_abundance=track_abundance,
                             max_hash=2**2)

    e.add_hash(1)
    e.add_hash(5)
    assert len(e.get_mins()) == 2

    f.add_hash(1)
    f.add_hash(5)                 # should be discarded due to max_hash
    assert len(f.get_mins()) == 1

    ee = SourmashSignature(e)
    ff = SourmashSignature(f)

    with pytest.raises(ValueError):       # mismatch in max_hash
        ee.similarity(ff)

    x = ee.similarity(ff, downsample=True)
    assert round(x, 1) == 1.0
Beispiel #7
0
def test_roundtrip_max_hash(track_abundance):
    e = sourmash.MinHash(n=0, ksize=20, track_abundance=track_abundance,
                             max_hash=10)
    e.add_hash(5)
    sig = SourmashSignature(e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.minhash

    assert e.max_hash == e2.max_hash

    assert sig.similarity(sig2) == 1.0
    assert sig2.similarity(sig) == 1.0