Python _Countgraph Examples, khmer._Countgraph Python Examples

Example #1

0

Show file

File: test_countgraph.py Project: 52teth/khmer

def test_save_load_gz():
    inpath = utils.get_test_data('random-20-a.fa')
    savepath = utils.get_temp_filename('tempcountingsave2.ht.gz')

    sizes = list(PRIMES_1m)
    sizes.append(1000005)

    hi = khmer._Countgraph(12, sizes)
    hi.consume_fasta(inpath)
    hi.save(savepath)

    ht = khmer._Countgraph(12, sizes)
    try:
        ht.load(savepath)
    except OSError as err:
        assert 0, 'Should not produce an OSError: ' + str(err)

    tracking = khmer._Nodegraph(12, sizes)
    x = hi.abundance_distribution(inpath, tracking)

    tracking = khmer._Nodegraph(12, sizes)
    y = ht.abundance_distribution(inpath, tracking)

    assert sum(x) == 3966, sum(x)
    assert x == y, (x, y)

Example #2

0

Show file

def test_save_load_gz():
    inpath = utils.get_test_data('random-20-a.fa')
    savepath = utils.get_temp_filename('tempcountingsave2.ht.gz')

    sizes = list(PRIMES_1m)
    sizes.append(1000005)

    hi = khmer._Countgraph(12, sizes)
    hi.consume_fasta(inpath)
    hi.save(savepath)

    ht = khmer._Countgraph(12, sizes)
    try:
        ht.load(savepath)
    except OSError as err:
        assert 0, 'Should not produce an OSError: ' + str(err)

    tracking = khmer._Nodegraph(12, sizes)
    x = hi.abundance_distribution(inpath, tracking)

    tracking = khmer._Nodegraph(12, sizes)
    y = ht.abundance_distribution(inpath, tracking)

    assert sum(x) == 3966, sum(x)
    assert x == y, (x, y)

Example #3

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_3_tables():
    x = list(PRIMES_1m)
    x.append(1000005)

    hi = khmer._Countgraph(12, x)

    GG = 'G' * 12  # forward_hash: 11184810
    assert khmer.forward_hash(GG, 12) == 11184810

    collision_1 = 'AAACGTATGACT'
    assert khmer.forward_hash(collision_1, 12) == 184777

    collision_2 = 'AAATACCGAGCG'
    assert khmer.forward_hash(collision_2, 12) == 76603

    collision_3 = 'AAACGTATCGAG'
    assert khmer.forward_hash(collision_3, 12) == 184755

    # hash(GG) % 1000003 == hash(collision_1)
    # hash(GG) % 1009837 == hash(collision_2)
    # hash(GG) % 1000005 == hash(collision_3)
    hi.consume(GG)
    assert hi.get(GG) == 1

    hi.consume(collision_1)
    assert hi.get(GG) == 1

    hi.consume(collision_2)
    assert hi.get(GG) == 1

    hi.consume(collision_3)
    assert hi.get(GG) == 2

Example #4

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

def test_3_tables():
    x = list(PRIMES_1m)
    x.append(1000005)

    hi = khmer._Countgraph(12, x)

    GG = 'G' * 12                   # forward_hash: 11184810
    assert khmer.forward_hash(GG, 12) == 11184810

    collision_1 = 'AAACGTATGACT'
    assert khmer.forward_hash(collision_1, 12) == 184777

    collision_2 = 'AAATACCGAGCG'
    assert khmer.forward_hash(collision_2, 12) == 76603

    collision_3 = 'AAACGTATCGAG'
    assert khmer.forward_hash(collision_3, 12) == 184755

    # hash(GG) % 1000003 == hash(collision_1)
    # hash(GG) % 1009837 == hash(collision_2)
    # hash(GG) % 1000005 == hash(collision_3)
    hi.consume(GG)
    assert hi.get(GG) == 1

    hi.consume(collision_1)
    assert hi.get(GG) == 1

    hi.consume(collision_2)
    assert hi.get(GG) == 1

    hi.consume(collision_3)
    assert hi.get(GG) == 2

Example #5

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_load_gz():
    inpath = utils.get_test_data('random-20-a.fa')

    savepath = utils.get_temp_filename('tempcountingsave1.ht')
    loadpath = utils.get_temp_filename('tempcountingsave1.ht.gz')

    sizes = list(PRIMES_1m)
    sizes.append(1000005)

    # save uncompressed hashtable.
    hi = khmer._Countgraph(12, sizes)
    hi.consume_seqfile(inpath)
    hi.save(savepath)

    # compress.
    in_file = open(savepath, 'rb')
    out_file = gzip.open(loadpath, 'wb')
    out_file.writelines(in_file)
    out_file.close()
    in_file.close()

    # load compressed hashtable.
    try:
        ht = khmer.load_countgraph(loadpath)
    except OSError as err:
        assert 0, "Should not produce an OSError: " + str(err)

    tracking = khmer._Nodegraph(12, sizes)
    x = hi.abundance_distribution(inpath, tracking)

    tracking = khmer._Nodegraph(12, sizes)
    y = ht.abundance_distribution(inpath, tracking)

    assert sum(x) == 3966, sum(x)
    assert x == y, (x, y)

Example #6

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

def test_load_gz():
    inpath = utils.get_test_data('random-20-a.fa')

    savepath = utils.get_temp_filename('tempcountingsave1.ht')
    loadpath = utils.get_temp_filename('tempcountingsave1.ht.gz')

    sizes = list(PRIMES_1m)
    sizes.append(1000005)

    # save uncompressed hashtable.
    hi = khmer._Countgraph(12, sizes)
    hi.consume_fasta(inpath)
    hi.save(savepath)

    # compress.
    in_file = open(savepath, 'rb')
    out_file = gzip.open(loadpath, 'wb')
    out_file.writelines(in_file)
    out_file.close()
    in_file.close()

    # load compressed hashtable.
    try:
        ht = khmer.load_countgraph(loadpath)
    except OSError as err:
        assert 0, "Should not produce an OSError: " + str(err)

    tracking = khmer._Nodegraph(12, sizes)
    x = hi.abundance_distribution(inpath, tracking)

    tracking = khmer._Nodegraph(12, sizes)
    y = ht.abundance_distribution(inpath, tracking)

    assert sum(x) == 3966, sum(x)
    assert x == y, (x, y)

Example #7

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_complete_no_collision():
    kh = khmer._Countgraph(4, [4 ** 4])

    n_entries = kh.hashsizes()[0]

    for i in range(0, n_entries):
        s = khmer.reverse_hash(i, 4)
        kh.count(s)

    n_palindromes = 0
    n_rc_filled = 0
    n_fwd_filled = 0

    for i in range(0, n_entries):
        s = khmer.reverse_hash(i, 4)
        if kh.get(s):                   # string hashing is rc aware
            n_rc_filled += 1
        if kh.get(s) == 1:              # palindromes are singular
            n_palindromes += 1
        if kh.get(i):                   # int hashing is not rc aware
            n_fwd_filled += 1

    assert n_rc_filled == n_entries, n_rc_filled
    assert n_palindromes == 16, n_palindromes
    assert n_fwd_filled == n_entries // 2 + n_palindromes // 2, \
        (n_fwd_filled, n_entries // 2 + n_palindromes // 2)

Example #8

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_complete_no_collision():
    kh = khmer._Countgraph(4, [4**4])

    n_entries = kh.hashsizes()[0]

    for i in range(0, n_entries):
        s = khmer.reverse_hash(i, 4)
        kh.count(s)

    n_palindromes = 0
    n_rc_filled = 0
    n_fwd_filled = 0

    for i in range(0, n_entries):
        s = khmer.reverse_hash(i, 4)
        if kh.get(s):  # string hashing is rc aware
            n_rc_filled += 1
        if kh.get(s) == 1:  # palindromes are singular
            n_palindromes += 1
        if kh.get(i):  # int hashing is not rc aware
            n_fwd_filled += 1

    assert n_rc_filled == n_entries, n_rc_filled
    assert n_palindromes == 16, n_palindromes
    assert n_fwd_filled == n_entries // 2 + n_palindromes // 2, \
        (n_fwd_filled, n_entries // 2 + n_palindromes // 2)

Example #9

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_collision():
    kh = khmer._Countgraph(4, [5])

    kh.count('AAAA')
    assert kh.get('AAAA') == 1

    kh.count('TTTT')
    assert kh.get('TTTT') == 2

Example #10

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_collision():
    kh = khmer._Countgraph(4, [5])

    kh.count('AAAA')
    assert kh.get('AAAA') == 1

    kh.count('TTTT')
    assert kh.get('TTTT') == 2

Example #11

0

Show file

File: test_counting_single.py Project: ofanoyi/khmer

def test_collision():
    kh = khmer._Countgraph(4, [5])

    kh.count("AAAA")
    assert kh.get("AAAA") == 1

    kh.count("TTTT")
    assert kh.get("TTTT") == 2

Example #12

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_maxcount_consume():
    # hashtable should saturate at some point so as not to overflow counter
    kh = khmer._Countgraph(4, [5])

    s = "A" * 10000
    kh.consume(s)

    c = kh.get('AAAA')
    assert c == MAX_COUNT, c  # this will depend on HashcountType...

Example #13

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_consume_uniqify_first():
    kh = khmer._Countgraph(4, [5])

    s = "TTTT"
    s_rc = "AAAA"

    kh.consume(s)
    n = kh.get(s_rc)
    assert n == 1

Example #14

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_consume_uniqify_first():
    kh = khmer._Countgraph(4, [5])

    s = "TTTT"
    s_rc = "AAAA"

    kh.consume(s)
    n = kh.get(s_rc)
    assert n == 1

Example #15

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_maxcount_consume():
    # hashtable should saturate at some point so as not to overflow counter
    kh = khmer._Countgraph(4, [5])

    s = "A" * 10000
    kh.consume(s)

    c = kh.get('AAAA')
    assert c == MAX_COUNT, c    # this will depend on HashcountType...

Example #16

0

Show file

File: test_nodegraph.py Project: ofanoyi/khmer

def test_load_notexist_should_fail():
    savepath = utils.get_temp_filename('tempnodegraphsave0.htable')

    hi = khmer._Countgraph(12, [1])
    try:
        hi.load(savepath)
        assert 0, "load should fail"
    except OSError:
        pass

Example #17

0

Show file

File: test_nodegraph.py Project: yunesj/khmer

def test_load_notexist_should_fail():
    savepath = utils.get_temp_filename('tempnodegraphsave0.htable')

    hi = khmer._Countgraph(12, [1])
    try:
        hi.load(savepath)
        assert 0, "load should fail"
    except OSError:
        pass

Example #18

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_maxcount_consume_with_bigcount():
    # use the bigcount hack to avoid saturating the hashtable.
    kh = khmer._Countgraph(4, [5])
    kh.set_use_bigcount(True)

    s = "A" * 10000
    kh.consume(s)

    c = kh.get('AAAA')
    assert c == 10000 - 3, c

Example #19

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_maxcount_consume_with_bigcount():
    # use the bigcount hack to avoid saturating the hashtable.
    kh = khmer._Countgraph(4, [5])
    kh.set_use_bigcount(True)

    s = "A" * 10000
    kh.consume(s)

    c = kh.get('AAAA')
    assert c == 10000 - 3, c

Example #20

0

Show file

def test_nodegraph_file_type_check():
    kh = khmer._Countgraph(12, [1])
    savepath = utils.get_temp_filename('tempcountingsave0.ct')
    kh.save(savepath)

    try:
        nodegraph = khmer.load_nodegraph(savepath)
        assert 0, "this should fail"
    except OSError as e:
        print(str(e))

Example #21

0

Show file

File: test_nodegraph.py Project: costypetrisor/khmer

def test_nodegraph_file_type_check():
    kh = khmer._Countgraph(12, [1])
    savepath = utils.get_temp_filename('tempcountingsave0.ct')
    kh.save(savepath)

    try:
        nodegraph = khmer.load_nodegraph(savepath)
        assert 0, "this should fail"
    except OSError as e:
        print(str(e))

Example #22

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_get_mincount():
    kh = khmer._Countgraph(4, [5])

    s = "AAAAACGT"
    kh.consume(s)

    x = kh.get_min_count(s)
    assert x == 1, x

    kh.consume(s)
    x = kh.get_min_count(s)
    assert x == 2, x

Example #23

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_get_mincount():
    kh = khmer._Countgraph(4, [5])

    s = "AAAAACGT"
    kh.consume(s)

    x = kh.get_min_count(s)
    assert x == 1, x

    kh.consume(s)
    x = kh.get_min_count(s)
    assert x == 2, x

Example #24

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_badcount():
    countgraph = khmer._Countgraph(4, [5])
    try:
        countgraph.count()
        assert 0, "count should require one argument"
    except TypeError as err:
        print(str(err))
    try:
        countgraph.count('ABCDE')
        assert 0, "count should require k-mer size to be equal"
    except ValueError as err:
        print(str(err))

Example #25

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_get_maxcount():
    kh = khmer._Countgraph(4, [7])

    s = "AAAAACGT"
    kh.consume(s)

    x = kh.get_max_count(s)
    assert x == 2

    kh.consume(s)
    x = kh.get_max_count(s)
    assert x == 4

Example #26

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_get_maxcount():
    kh = khmer._Countgraph(4, [7])

    s = "AAAAACGT"
    kh.consume(s)

    x = kh.get_max_count(s)
    assert x == 2

    kh.consume(s)
    x = kh.get_max_count(s)
    assert x == 4

Example #27

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_badcount():
    countgraph = khmer._Countgraph(4, [5])
    try:
        countgraph.count()
        assert 0, "count should require one argument"
    except TypeError as err:
        print(str(err))
    try:
        countgraph.count('ABCDE')
        assert 0, "count should require k-mer size to be equal"
    except ValueError as err:
        print(str(err))

Example #28

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_get_maxcount_rc():
    kh = khmer._Countgraph(4, [7])

    s = "AAAAACGT"
    src = "ACGTTTTT"
    kh.consume(s)

    x = kh.get_max_count(s)
    assert x == 2, x

    kh.consume(src)
    x = kh.get_max_count(s)
    assert x == 4, x

Example #29

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_get_maxcount_rc():
    kh = khmer._Countgraph(4, [7])

    s = "AAAAACGT"
    src = "ACGTTTTT"
    kh.consume(s)

    x = kh.get_max_count(s)
    assert x == 2, x

    kh.consume(src)
    x = kh.get_max_count(s)
    assert x == 4, x

Example #30

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

def test_count_2():
    hi = khmer._Countgraph(12, PRIMES_1m)
    kmer = 'G' * 12
    hashval = hi.hash('G' * 12)

    assert hi.get(kmer) == 0
    assert hi.get(hashval) == 0

    hi.count(kmer)
    assert hi.get(kmer) == 1
    assert hi.get(hashval) == 1

    hi.count(hashval)                     # count hashes same as strings
    assert hi.get(kmer) == 2
    assert hi.get(hashval) == 2

Example #31

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_count_2():
    hi = khmer._Countgraph(12, PRIMES_1m)
    kmer = 'G' * 12
    hashval = hi.hash('G' * 12)

    assert hi.get(kmer) == 0
    assert hi.get(hashval) == 0

    hi.count(kmer)
    assert hi.get(kmer) == 1
    assert hi.get(hashval) == 1

    hi.count(hashval)  # count hashes same as strings
    assert hi.get(kmer) == 2
    assert hi.get(hashval) == 2

Example #32

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_maxcount():
    # hashtable should saturate at some point so as not to overflow counter
    kh = khmer._Countgraph(4, [5])

    last_count = None
    for _ in range(0, 10000):
        kh.count('AAAA')
        c = kh.get('AAAA')

        print(last_count, c)
        if c == last_count:
            break
        last_count = c

    assert c != 10000, "should not be able to count to 10000"
    assert c == MAX_COUNT       # this will depend on HashcountType...

Example #33

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_maxcount():
    # hashtable should saturate at some point so as not to overflow counter
    kh = khmer._Countgraph(4, [5])

    last_count = None
    for _ in range(0, 10000):
        kh.count('AAAA')
        c = kh.get('AAAA')

        print(last_count, c)
        if c == last_count:
            break
        last_count = c

    assert c != 10000, "should not be able to count to 10000"
    assert c == MAX_COUNT  # this will depend on HashcountType...

Example #34

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_save_load_large(ctfile):
    inpath = utils.get_test_data('random-20-a.fa')
    savepath = utils.get_temp_filename(ctfile)

    sizes = khmer.get_n_primes_near_x(1, 2**31 + 1000)

    orig = khmer._Countgraph(12, sizes)
    orig.consume_seqfile(inpath)
    orig.save(savepath)

    loaded = khmer.load_countgraph(savepath)

    orig_count = orig.n_occupied()
    loaded_count = loaded.n_occupied()
    assert orig_count == 3966, orig_count
    assert loaded_count == orig_count, loaded_count

Example #35

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

    def do_test(ctfile):
        inpath = utils.get_test_data('random-20-a.fa')
        savepath = utils.get_temp_filename(ctfile)

        sizes = khmer.get_n_primes_near_x(1, 2 ** 31 + 1000)

        orig = khmer._Countgraph(12, sizes)
        orig.consume_fasta(inpath)
        orig.save(savepath)

        loaded = khmer.load_countgraph(savepath)

        orig_count = orig.n_occupied()
        loaded_count = loaded.n_occupied()
        assert orig_count == 3966, orig_count
        assert loaded_count == orig_count, loaded_count

Example #36

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_maxcount_with_bigcount():
    # hashtable should not saturate, if use_bigcount is set.
    kh = khmer._Countgraph(4, [5])
    kh.set_use_bigcount(True)

    last_count = None
    for _ in range(0, 10000):
        kh.count('AAAA')
        c = kh.get('AAAA')

        print(last_count, c)
        if c == last_count:
            break
        last_count = c

    assert c == 10000, "should be able to count to 10000"
    assert c != MAX_COUNT

Example #37

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_maxcount_with_bigcount():
    # hashtable should not saturate, if use_bigcount is set.
    kh = khmer._Countgraph(4, [5])
    kh.set_use_bigcount(True)

    last_count = None
    for _ in range(0, 10000):
        kh.count('AAAA')
        c = kh.get('AAAA')

        print(last_count, c)
        if c == last_count:
            break
        last_count = c

    assert c == 10000, "should be able to count to 10000"
    assert c != MAX_COUNT

Example #38

0

Show file

File: test_lump.py Project: ofanoyi/khmer

def test_fakelump_load_stop_tags_trunc():
    fakelump_fa = utils.get_test_data('fakelump.fa')
    fakelump_fa_foo = utils.get_temp_filename('fakelump.fa.stopfoo')

    ht = khmer.Nodegraph(32, 1e5, 4)
    ht.consume_fasta_and_tag(fakelump_fa)

    subset = ht.do_subset_partition(0, 0)
    ht.merge_subset(subset)

    (n_partitions, n_singletons) = ht.count_partitions()
    assert n_partitions == 1, n_partitions

    # now, break partitions on any k-mer that you see more than once
    # on big excursions, where big excursions are excursions 40 out
    # that encounter more than 82 k-mers.  This should specifically
    # identify our connected sequences in fakelump...

    EXCURSION_DISTANCE = 40
    EXCURSION_KMER_THRESHOLD = 82
    EXCURSION_KMER_COUNT_THRESHOLD = 1
    counting = khmer._Countgraph(32, [5, 7, 11, 13])

    ht.repartition_largest_partition(None, counting,
                                     EXCURSION_DISTANCE,
                                     EXCURSION_KMER_THRESHOLD,
                                     EXCURSION_KMER_COUNT_THRESHOLD)

    ht.save_stop_tags(fakelump_fa_foo)
    data = open(fakelump_fa_foo, 'rb').read()

    fp = open(fakelump_fa_foo, 'wb')
    fp.write(data[:10])
    fp.close()

    # ok, now try loading these stop tags; should fail.
    ht = khmer._Nodegraph(32, [5, 7, 11, 13])
    ht.consume_fasta_and_tag(fakelump_fa)

    try:
        ht.load_stop_tags(fakelump_fa_foo)
        assert 0, "this test should fail"
    except OSError:
        pass

Example #39

0

Show file

File: test_lump.py Project: wslam/khmer

def test_fakelump_load_stop_tags_trunc():
    fakelump_fa = utils.get_test_data('fakelump.fa')
    fakelump_fa_foo = utils.get_temp_filename('fakelump.fa.stopfoo')

    ht = khmer.Nodegraph(32, 1e5, 4)
    ht.consume_fasta_and_tag(fakelump_fa)

    subset = ht.do_subset_partition(0, 0)
    ht.merge_subset(subset)

    (n_partitions, _) = ht.count_partitions()
    assert n_partitions == 1, n_partitions

    # now, break partitions on any k-mer that you see more than once
    # on big excursions, where big excursions are excursions 40 out
    # that encounter more than 82 k-mers.  This should specifically
    # identify our connected sequences in fakelump...

    EXCURSION_DISTANCE = 40
    EXCURSION_KMER_THRESHOLD = 82
    EXCURSION_KMER_COUNT_THRESHOLD = 1
    counting = khmer._Countgraph(32, [5, 7, 11, 13])

    ht.repartition_largest_partition(None, counting, EXCURSION_DISTANCE,
                                     EXCURSION_KMER_THRESHOLD,
                                     EXCURSION_KMER_COUNT_THRESHOLD)

    ht.save_stop_tags(fakelump_fa_foo)
    data = open(fakelump_fa_foo, 'rb').read()

    fp = open(fakelump_fa_foo, 'wb')
    fp.write(data[:10])
    fp.close()

    # ok, now try loading these stop tags; should fail.
    ht = khmer._Nodegraph(32, [5, 7, 11, 13])
    ht.consume_fasta_and_tag(fakelump_fa)

    try:
        ht.load_stop_tags(fakelump_fa_foo)
        assert 0, "this test should fail"
    except OSError:
        pass

Example #40

0

Show file

File: test_counting_single.py Project: wslam/khmer

def test_complete_2_collision():
    kh = khmer._Countgraph(4, [5])

    n_entries = kh.hashsizes()[0]
    for i in range(0, n_entries):
        s = khmer.reverse_hash(i, 4)
        kh.count(s)

    n_rc_filled = 0
    #  n_fwd_filled = 0

    for i in range(0, 128):
        s = khmer.reverse_hash(i, 4)
        if kh.get(s):  # string hashing is rc aware
            n_rc_filled += 1
    # if kh.get(i):                   # int hashing is not rc aware
    #        n_fwd_filled += 1

    assert n_rc_filled == 128, n_rc_filled

Example #41

0

Show file

File: test_counting_single.py Project: 52teth/khmer

def test_complete_2_collision():
    kh = khmer._Countgraph(4, [5])

    n_entries = kh.hashsizes()[0]
    for i in range(0, n_entries):
        s = khmer.reverse_hash(i, 4)
        kh.count(s)

    n_rc_filled = 0
    #  n_fwd_filled = 0

    for i in range(0, 128):
        s = khmer.reverse_hash(i, 4)
        if kh.get(s):                   # string hashing is rc aware
            n_rc_filled += 1
    # if kh.get(i):                   # int hashing is not rc aware
    #        n_fwd_filled += 1

    assert n_rc_filled == 128, n_rc_filled

Example #42

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_count_1():
    hi = khmer._Countgraph(12, PRIMES_1m)

    kmer = 'G' * 12
    hashval = hi.hash('G' * 12)

    assert hi.get(kmer) == 0
    assert hi.get(hashval) == 0

    hi.count(kmer)
    assert hi.get(kmer) == 1
    assert hi.get(hashval) == 1

    hi.count(kmer)
    assert hi.get(kmer) == 2
    assert hi.get(hashval) == 2

    kmer = 'G' * 11

    with pytest.raises(ValueError):
        hi.hash(kmer)

Example #43

0

Show file

File: test_countgraph.py Project: wltrimbl/khmer

def test_count_1():
    hi = khmer._Countgraph(12, PRIMES_1m)

    kmer = 'G' * 12
    hashval = hi.hash('G' * 12)

    assert hi.get(kmer) == 0
    assert hi.get(hashval) == 0

    hi.count(kmer)
    assert hi.get(kmer) == 1
    assert hi.get(hashval) == 1

    hi.count(kmer)
    assert hi.get(kmer) == 2
    assert hi.get(hashval) == 2

    kmer = 'G' * 11

    with pytest.raises(ValueError):
        hi.hash(kmer)

Example #44

0

Show file

def test_load_gz_truncated_should_fail():
    inpath = utils.get_test_data('random-20-a.fa')
    savepath = utils.get_temp_filename('tempcountingsave0.ht.gz')

    hi = khmer.Countgraph(12, 1000, 2)
    hi.consume_fasta(inpath)
    hi.save(savepath)

    fp = open(savepath, 'rb')
    data = fp.read()
    fp.close()

    fp = open(savepath, 'wb')
    fp.write(data[:1000])
    fp.close()

    hi = khmer._Countgraph(12, [1])
    try:
        hi.load(savepath)
        assert 0, "load should fail"
    except OSError as e:
        print(str(e))

Example #45

0

Show file

File: test_countgraph.py Project: 52teth/khmer

def test_load_gz_truncated_should_fail():
    inpath = utils.get_test_data('random-20-a.fa')
    savepath = utils.get_temp_filename('tempcountingsave0.ht.gz')

    hi = khmer.Countgraph(12, 1000, 2)
    hi.consume_fasta(inpath)
    hi.save(savepath)

    fp = open(savepath, 'rb')
    data = fp.read()
    fp.close()

    fp = open(savepath, 'wb')
    fp.write(data[:1000])
    fp.close()

    hi = khmer._Countgraph(12, [1])
    try:
        hi.load(savepath)
        assert 0, "load should fail"
    except OSError as e:
        print(str(e))

Example #46

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

def test_load_truncated():
    inpath = utils.get_test_data('random-20-a.fa')
    savepath = utils.get_temp_filename('save.ht')
    truncpath = utils.get_temp_filename('trunc.ht')

    sizes = khmer.get_n_primes_near_x(3, 200)

    hi = khmer._Countgraph(12, sizes)
    hi.consume_fasta(inpath)
    hi.save(savepath)

    data = open(savepath, 'rb').read()
    for i in range(len(data)):
        fp = open(truncpath, 'wb')
        fp.write(data[:i])
        fp.close()

        try:
            khmer.load_countgraph(truncpath)
            assert 0, "this should not be reached!"
        except OSError as err:
            print(str(err))

Example #47

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_load_truncated():
    inpath = utils.get_test_data('random-20-a.fa')
    savepath = utils.get_temp_filename('save.ht')
    truncpath = utils.get_temp_filename('trunc.ht')

    sizes = khmer.get_n_primes_near_x(3, 200)

    hi = khmer._Countgraph(12, sizes)
    hi.consume_seqfile(inpath)
    hi.save(savepath)

    data = open(savepath, 'rb').read()
    for i in range(len(data)):
        fp = open(truncpath, 'wb')
        fp.write(data[:i])
        fp.close()

        try:
            khmer.load_countgraph(truncpath)
            assert 0, "this should not be reached!"
        except OSError as err:
            print(str(err))

Example #48

0

Show file

def test_count_1():
    hi = khmer._Countgraph(12, PRIMES_1m)

    kmer = 'G' * 12
    hashval = hi.hash('G' * 12)

    assert hi.get(kmer) == 0
    assert hi.get(hashval) == 0

    hi.count(kmer)
    assert hi.get(kmer) == 1
    assert hi.get(hashval) == 1

    hi.count(kmer)
    assert hi.get(kmer) == 2
    assert hi.get(hashval) == 2

    kmer = 'G' * 11
    try:
        hi.hash(kmer)
        assert 0, "incorrect kmer size should fail"
    except RuntimeError:
        pass

Example #49

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

def test_count_1():
    hi = khmer._Countgraph(12, PRIMES_1m)

    kmer = 'G' * 12
    hashval = hi.hash('G' * 12)

    assert hi.get(kmer) == 0
    assert hi.get(hashval) == 0

    hi.count(kmer)
    assert hi.get(kmer) == 1
    assert hi.get(hashval) == 1

    hi.count(kmer)
    assert hi.get(kmer) == 2
    assert hi.get(hashval) == 2

    kmer = 'G' * 11
    try:
        hi.hash(kmer)
        assert 0, "incorrect kmer size should fail"
    except RuntimeError:
        pass

Example #50

0

Show file

File: test_counting_single.py Project: 52teth/khmer

 def setup(self):
     self.kh = khmer._Countgraph(4, [4 ** 4])

Example #51

0

Show file

File: test_counting_single.py Project: wslam/khmer

 def setup(self):
     self.kh = khmer._Countgraph(4, [4**4])

Example #52

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

def test_counting_bad_primes_list():
    try:
        khmer._Countgraph(12, ["a", "b", "c"], 1)
        assert 0, "bad list of primes should fail"
    except TypeError as e:
        print(str(e))

Example #53

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_bad_create():
    try:
        countgraph = khmer._Countgraph(5, [])
    except ValueError as err:
        assert 'tablesizes needs to be one or more numbers' in str(err)

Example #54

0

Show file

File: test_countgraph.py Project: trilynn/khmer

 def setup(self):
     self.hi = khmer._Countgraph(12, PRIMES_1m)

Example #55

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_counting_bad_primes_list():
    try:
        khmer._Countgraph(12, ["a", "b", "c"], 1)
        assert 0, "bad list of primes should fail"
    except TypeError as e:
        print(str(e))

Example #56

0

Show file

File: test_countgraph.py Project: trilynn/khmer

def test_revhash_1():
    hi = khmer._Countgraph(12, [1])
    kmer = 'C' * 12
    hashval = hi.hash('C' * 12)

    assert hi.reverse_hash(hashval) == kmer

Example #57

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

 def setup(self):
     self.hi = khmer._Countgraph(12, PRIMES_1m)

Example #58

0

Show file

File: test_countgraph.py Project: costypetrisor/khmer

def test_bad_create():
    try:
        countgraph = khmer._Countgraph(5, [])
    except ValueError as err:
        assert 'tablesizes needs to be one or more numbers' in str(err)

Example #59

0

Show file

File: test_counting_single.py Project: 52teth/khmer

 def setup(self):
     self.kh = khmer._Countgraph(4, [5])
     A_filename = utils.get_test_data('all-A.fa')
     self.kh.consume_fasta(A_filename)

Example #60

0

Show file

File: test_counting_single.py Project: wslam/khmer

 def setup(self):
     self.kh = khmer._Countgraph(4, [5])
     A_filename = utils.get_test_data('all-A.fa')
     self.kh.consume_fasta(A_filename)