Example #1
0
def test_random_kmers():
    # check for out-of-bounds errors and similar with random kmers
    rng = random.Random(1)

    sct = SmallCounttable(20, 1e2, 4)

    kmers = ["".join(rng.choice("ACGT") for _ in range(20))
             for n in range(400)]
    for kmer in kmers:
        sct.add(kmer)

    for kmer in kmers:
        sct.get(kmer)
def test_split_byte_murmur():
    # check the byte is correctly split when using murmur hash
    sct = SmallCounttable(4, 4, 1)

    # these kmers were carefully chosen to have hash values that produce
    # consecutive indices in the count table.
    a = "AAAC"
    b = "AAAG"

    assert sct.get_kmer_hashes(a) == [11898086063751343884]
    assert sct.get_kmer_hashes(b) == [10548630838975263317]

    sct.add(a)

    assert sct.get(a) == 1
    assert sct.get(b) == 0
Example #3
0
def test_split_byte_murmur():
    # check the byte is correctly split when using murmur hash
    sct = SmallCounttable(4, 4, 1)

    # these kmers were carefully chosen to have hash values that produce
    # consecutive indices in the count table.
    a = "AAAC"
    b = "AAAG"

    assert sct.get_kmer_hashes(a) == [11898086063751343884]
    assert sct.get_kmer_hashes(b) == [10548630838975263317]

    sct.add(a)

    assert sct.get(a) == 1
    assert sct.get(b) == 0
Example #4
0
def test_read_write():
    rng = random.Random(1)

    sct = SmallCounttable(20, 1e2, 4)

    kmers = ["".join(rng.choice("ACGT") for _ in range(20))
             for n in range(400)]
    for kmer in kmers:
        sct.add(kmer)

    fname = utils.get_temp_filename('zzz')

    sct.save(fname)

    # on purpose choose parameters that are different from sct
    sct2 = SmallCounttable.load(fname)
    assert sct.ksize() == sct2.ksize()
    for kmer in kmers:
        assert sct.get(kmer) == sct2.get(kmer)
def test_overflow():
    # check that we do not overflow into other part of the byte
    sct = SmallCounttable(4, 1e6, 4)
    a = "AAAA"
    b = "AAAT"

    # try to overflow our 4bit counter
    for n in range(17):
        sct.add(a)

    assert sct.get(a) == 15
    assert sct.get(b) == 0

    # repeat with the other kmer that hashes to the other half of the byte
    sct = SmallCounttable(4, 1e6, 4)
    a = "AAAA"
    b = "AAAT"

    # try to overflow our 4bit counter
    for n in range(17):
        sct.add(b)

    assert sct.get(b) == 15
    assert sct.get(a) == 0
Example #6
0
def test_overflow():
    # check that we do not overflow into other part of the byte
    sct = SmallCounttable(4, 1e6, 4)
    a = "AAAA"
    b = "AAAT"

    # try to overflow our 4bit counter
    for n in range(17):
        sct.add(a)

    assert sct.get(a) == 15
    assert sct.get(b) == 0

    # repeat with the other kmer that hashes to the other half of the byte
    sct = SmallCounttable(4, 1e6, 4)
    a = "AAAA"
    b = "AAAT"

    # try to overflow our 4bit counter
    for n in range(17):
        sct.add(b)

    assert sct.get(b) == 15
    assert sct.get(a) == 0
def test_single_add():
    sct = SmallCounttable(4, 1e6, 4)
    sct.add("AAAA")
    assert sct.get("AAAA") == 1
Example #8
0
def test_single_add():
    sct = SmallCounttable(4, 1e6, 4)
    sct.add("AAAA")
    assert sct.get("AAAA") == 1