Exemple #1
0
def test_very_short_read():
    short_filename = utils.get_test_data('test-short.fa')
    kh = khmer.new_hashtable(9, 4)
    n_reads, n_kmers = kh.consume_fasta(short_filename)
    assert n_reads == 1
    assert n_kmers == 0

    kh = khmer.new_hashtable(8, 4)
    n_reads, n_kmers = kh.consume_fasta(short_filename)
    assert n_reads == 1
    assert n_kmers == 1
def test_very_short_read():
    short_filename = os.path.join(thisdir, 'test-short.fa')
    kh = khmer.new_hashtable(9, 4**4+1)
    n_reads, n_kmers = kh.consume_fasta(short_filename)
    assert n_reads == 1
    assert n_kmers == 0

    kh = khmer.new_hashtable(8, 4**4+1)
    n_reads, n_kmers = kh.consume_fasta(short_filename)
    assert n_reads == 1
    assert n_kmers == 1
def test_very_short_read():
    short_filename = utils.get_test_data('test-short.fa')
    kh = khmer.new_hashtable(9, 4)
    n_reads, n_kmers = kh.consume_fasta(short_filename)
    assert n_reads == 1, n_reads
    assert n_kmers == 0, n_kmers

    kh = khmer.new_hashtable(8, 4)
    n_reads, n_kmers = kh.consume_fasta(short_filename)
    assert n_reads == 1, n_reads
    assert n_kmers == 1, n_kmers
Exemple #4
0
def test_complete_no_collision():
    kh = khmer.new_hashtable(4, 4**4)
    kt = khmer.new_ktable(4)

    for i in range(0, kt.n_entries()):
        s = kt.reverse_hash(i)
        kh.count(s)

    n_palindromes = 0
    n_rc_filled = 0
    n_fwd_filled = 0
    
    for i in range(0, kt.n_entries()):
        s = kt.reverse_hash(i)
        if kh.get(s):                   # string hashing is rc aware
            n_rc_filled += 1
        if kh.get(s) == 1:              # palindromes are singular
            n_palindromes += 1
        if kh.get(i):                   # int hashing is not rc aware
            n_fwd_filled += 1

    assert n_rc_filled == kt.n_entries(),  n_rc_filled
    assert n_palindromes == 16, n_palindromes # @CTB check this
    assert n_fwd_filled == kt.n_entries() / 2 + n_palindromes / 2, \
           n_fwd_filled
Exemple #5
0
    def test_abund(self):
        ht = khmer.new_hashtable(10, 4 ** 10)

        filename = utils.get_test_data('test-abund-read.fa')
        outname = utils.get_temp_filename('test_abund.out')

        ht.consume_fasta(filename)
        try:
            ht.consume_fasta()
            assert 0, "should fail"
        except TypeError as err:
            print str(err)
        try:
            ht.consume_fasta("nonexistent")
            assert 0, "should fail"
        except IOError as err:
            print str(err)
        ht.output_fasta_kmer_pos_freq(filename, outname)
        try:
            ht.output_fasta_kmer_pos_freq()
            assert 0, "should fail"
        except TypeError as err:
            print str(err)

        fd = open(outname, "r")

        output = fd.readlines()
        assert len(output) == 1

        output = output[0]
        output = output.strip().split()

        assert ['1'] * (114 - 10 + 1) == output

        fd.close()
def test_64bitshift():
    kh = khmer.new_hashtable(25, 4)
    fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG"
    substr = "ATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGC"

    kh.consume(fullstr)
    assert 0 < kh.get_min_count(substr), kh.get_min_count(substr)
def test_hashtable_n_entries():
    countingtable = khmer.new_hashtable(4, 4)
    try:
        countingtable.n_entries("nope")
        assert 0, "n_entries should accept no arguments"
    except TypeError as err:
        print str(err)
Exemple #8
0
    def test_abund(self):
        ht = khmer.new_hashtable(10, 4**10)

        filename = utils.get_test_data('test-abund-read.fa')
        outname = utils.get_temp_filename('test_abund.out')

        ht.consume_fasta(filename)
        try:
            ht.consume_fasta()
            assert 0, "should fail"
        except TypeError as err:
            print str(err)
        try:
            ht.consume_fasta("nonexistent")
            assert 0, "should fail"
        except IOError as err:
            print str(err)
        ht.output_fasta_kmer_pos_freq(filename, outname)
        try:
            ht.output_fasta_kmer_pos_freq()
            assert 0, "should fail"
        except TypeError as err:
            print str(err)

        fd = open(outname, "r")

        output = fd.readlines()
        assert len(output) == 1

        output = output[0]
        output = output.strip().split()

        assert ['1'] * (114 - 10 + 1) == output

        fd.close()
Exemple #9
0
    def test_filter_limit_n(self):
        ht = khmer.new_hashtable(4, 4**4)

        filename = utils.get_test_data('simple_3.fa')
        outname = utils.get_temp_filename('test_filter.out')

        (total_reads, n_consumed) = ht.consume_fasta(filename)
        assert total_reads == 2, total_reads

        (total_reads, n_seq_kept) = \
            khmer.filter_fasta_file_limit_n(ht, filename,
                                            total_reads,
                                            outname, 2,
                                            7)

        assert total_reads == 2
        assert n_seq_kept == 1


        (total_reads, n_seq_kept) = \
            khmer.filter_fasta_file_limit_n(ht, filename,
                                            total_reads,
                                            outname, 2,
                                            4)

        assert total_reads == 2
        assert n_seq_kept == 2
Exemple #10
0
def test_64bitshift():
    kh = khmer.new_hashtable(25, 4)
    fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG"
    substr = "ATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGC"

    kh.consume(fullstr)
    assert 0 < kh.get_min_count(substr), kh.get_min_count(substr)
Exemple #11
0
    def test_filter_limit_n(self):
        ht = khmer.new_hashtable(4, 4**4)

        filename = os.path.join(thisdir, 'test-data/simple_3.fa')
        outname = os.path.join(self.tempdir, 'test_filter.out')

        (total_reads, n_consumed) = ht.consume_fasta(filename)
        assert total_reads == 2, total_reads

        (total_reads, n_seq_kept) = khmer.filter_fasta_file_limit_n(ht, filename,
                                                                    total_reads,
                                                                    outname, 2,
                                                                    7)


        assert total_reads == 2
        assert n_seq_kept == 1 

 
        (total_reads, n_seq_kept) = khmer.filter_fasta_file_limit_n(ht, filename,
                                                                    total_reads,
                                                                    outname, 2,
                                                                    4)

        assert total_reads == 2
        assert n_seq_kept == 2
def test_badcount():
    countingtable = khmer.new_hashtable(4, 4)
    try:
        countingtable.count()
        assert 0, "count should require one argument"
    except TypeError, err:
        print str(err)
Exemple #13
0
def test_badcount():
    countingtable = khmer.new_hashtable(4, 4)
    try:
        countingtable.count()
        assert 0, "count should require one argument"
    except TypeError, err:
        print str(err)
    def test_filter_limit_n(self):
        ht = khmer.new_hashtable(4, 4**4)

        filename = utils.get_test_data('simple_3.fa')
        outname = utils.get_temp_filename('test_filter.out')

        (total_reads, n_consumed) = ht.consume_fasta(filename)
        assert total_reads == 2, total_reads

        (total_reads, n_seq_kept) = \
            khmer.filter_fasta_file_limit_n(ht, filename,
                                            total_reads,
                                            outname, 2,
                                            7)


        assert total_reads == 2
        assert n_seq_kept == 1 

 
        (total_reads, n_seq_kept) = \
            khmer.filter_fasta_file_limit_n(ht, filename,
                                            total_reads,
                                            outname, 2,
                                            4)

        assert total_reads == 2
        assert n_seq_kept == 2
Exemple #15
0
def test_hashtable_n_entries():
    countingtable = khmer.new_hashtable(4, 4)
    try:
        countingtable.n_entries("nope")
        assert 0, "n_entries should accept no arguments"
    except TypeError, err:
        print str(err)
def test_no_collision():
    kh = khmer.new_hashtable(4, 4)

    kh.count('AAAA')
    assert kh.get('AAAA') == 1

    kh.count('TTTT')                    # reverse complement
    assert kh.get('TTTT') == 2
Exemple #17
0
def test_collision():
    kh = khmer.new_hashtable(4, 4)

    kh.count("AAAA")
    assert kh.get("AAAA") == 1

    kh.count("TTTT")
    assert kh.get("TTTT") == 2
Exemple #18
0
def test_no_collision():
    kh = khmer.new_hashtable(4, 4)

    kh.count("AAAA")
    assert kh.get("AAAA") == 1

    kh.count("TTTT")  # reverse complement
    assert kh.get("TTTT") == 2
Exemple #19
0
def test_no_collision():
    kh = khmer.new_hashtable(4, 4)

    kh.count('AAAA')
    assert kh.get('AAAA') == 1

    kh.count('TTTT')  # reverse complement
    assert kh.get('TTTT') == 2
def test_collision():
    kh = khmer.new_hashtable(4, 4)

    kh.count('AAAA')
    assert kh.get('AAAA') == 1

    kh.count('TTTT')
    assert kh.get('TTTT') == 2
Exemple #21
0
def test_collision():
    kh = khmer.new_hashtable(4, 4)

    kh.count('AAAA')
    assert kh.get('AAAA') == 1

    kh.count('TTTT')
    assert kh.get('TTTT') == 2
def test_64bitshift_2():
    kh = khmer.new_hashtable(25, 4)
    fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG"

    kh.consume(fullstr)
    for i in range(len(fullstr) - 25 + 1):
        substr = fullstr[i:i + 25]
        assert kh.get(substr) > 0
Exemple #23
0
def test_64bitshift_2():
    kh = khmer.new_hashtable(25, 4)
    fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG"

    kh.consume(fullstr)
    for i in range(len(fullstr) - 25 + 1):
        substr = fullstr[i:i + 25]
        assert kh.get(substr) > 0
def test_consume_uniqify_first():
    kh = khmer.new_hashtable(4, 4)

    s = "TTTT"
    s_rc = "AAAA"

    kh.consume(s)
    n = kh.get(s_rc)
    assert n == 1
Exemple #25
0
def test_consume_uniqify_first():
    kh = khmer.new_hashtable(4, 4)

    s = "TTTT"
    s_rc = "AAAA"

    kh.consume(s)
    n = kh.get(s_rc)
    assert n == 1
def test_maxcount_consume():
    # hashtable should saturate at some point so as not to overflow counter
    kh = khmer.new_hashtable(4, 4)

    s = "A" * 10000
    kh.consume(s)

    c = kh.get('AAAA')
    assert c == MAX_COUNT, c    # this will depend on HashcountType...
Exemple #27
0
def consume_fasta_if_intersect(ht, filename, total_reads, orig_readmask):
    #mmt = ht.fasta_file_to_minmax(filename, total_reads, orig_readmask)
    #new_readmask = ht.filter_fasta_file_any(mmt, 2)
    new_readmask = ht.filter_fasta_file_run(filename, total_reads, 1, 5)
    print 'XXX', new_readmask.n_kept()
    new_readmask.save('the_readmask')
    ht = khmer.new_hashtable(K, HTSIZE)
    (t, n) = ht.consume_fasta(filename, 0, 0, new_readmask, False)
    return ht, n
Exemple #28
0
def test_maxcount_consume():
    # hashtable should saturate at some point so as not to overflow counter
    kh = khmer.new_hashtable(4, 4)

    s = "A" * 10000
    kh.consume(s)

    c = kh.get('AAAA')
    assert c == MAX_COUNT, c  # this will depend on HashcountType...
Exemple #29
0
def consume_fasta_if_intersect(ht, filename, total_reads, orig_readmask):
    #mmt = ht.fasta_file_to_minmax(filename, total_reads, orig_readmask)
    #new_readmask = ht.filter_fasta_file_any(mmt, 2)
    new_readmask = ht.filter_fasta_file_run(filename, total_reads, 1, 5)
    print 'XXX', new_readmask.n_kept()
    new_readmask.save('the_readmask')
    ht = khmer.new_hashtable(K, HTSIZE)
    (t, n) = ht.consume_fasta(filename, 0, 0, new_readmask, False)
    return ht, n
Exemple #30
0
def test_maxcount_consume_with_bigcount():
    # use the bigcount hack to avoid saturating the hashtable.
    kh = khmer.new_hashtable(4, 4)
    kh.set_use_bigcount(True)

    s = "A" * 10000
    kh.consume(s)

    c = kh.get('AAAA')
    assert c == 10000 - 3, c
Exemple #31
0
def test_nonbool_in_consume_fasta():
    kh = khmer.new_hashtable(4, 4**4)

    try:
        kh.consume_fasta(reads_filename, 0, 0, "hi", False, callback_raise)
        assert 0
    except TypeError:
        pass
    except:
        raise
Exemple #32
0
def test_raise_in_consume_fasta_build_readmask():
    kh = khmer.new_hashtable(4, 4**4)

    try:
        kh.consume_fasta_build_readmask(reads_filename, 0, 0, callback_raise)
        assert 0
    except GoodException:
        pass
    except:
        raise
Exemple #33
0
def test_bad_mmt_in_filter_fasta_file_max():
    ht = khmer.new_hashtable(4, 4**4)

    try:
        ht.filter_fasta_file_any("hi", 2)
        assert 0
    except TypeError:
        pass  # expected
    except:
        raise
def test_maxcount_consume_with_bigcount():
    # use the bigcount hack to avoid saturating the hashtable.
    kh = khmer.new_hashtable(4, 4)
    kh.set_use_bigcount(True)

    s = "A" * 10000
    kh.consume(s)

    c = kh.get('AAAA')
    assert c == 10000 - 3, c
Exemple #35
0
def test_bad_readmask_in_consume_fasta():
    kh = khmer.new_hashtable(4, 4**4)

    try:
        kh.consume_fasta(reads_filename, 0, 0, None, "hi", callback_raise)
        assert 0
    except TypeError:
        pass
    except:
        raise
Exemple #36
0
def test_raise_in_fasta_file_to_minmax():
    ht = khmer.new_hashtable(4, 4**4)

    try:
        ht.fasta_file_to_minmax(reads_filename, N_READS, None, callback_raise)
        assert 0
    except GoodException:
        pass
    except:
        raise
Exemple #37
0
def test_bad_mmt_in_filter_fasta_file_max():
    ht = khmer.new_hashtable(4, 4**4)

    try:
        ht.filter_fasta_file_any("hi", 2)
        assert 0
    except TypeError:
        pass                            # expected
    except:
        raise
Exemple #38
0
def test_raise_in_consume_fasta_build_readmask():
    return  ## @CTB
    kh = khmer.new_hashtable(4, 4**4)

    try:
        kh.consume_fasta_build_readmask(reads_filename, 0, 0, callback_raise)
        assert 0
    except GoodException:
        pass
    except:
        raise
Exemple #39
0
def test_raise_in_consume_fasta():
    kh = khmer.new_hashtable(4, 4**4)

    try:
        n, _ = kh.consume_fasta(reads_filename, 0, 0, None, False, callback_raise)
        print n
        assert 0
    except GoodException:
        pass
    except:
        raise
Exemple #40
0
def test_bad_readmask_in_filter_fasta_file_limit_n():
    ht = khmer.new_hashtable(4, 4**4)
    mmt = ht.fasta_file_to_minmax(reads_filename, N_READS)

    try:
        ht.filter_fasta_file_limit_n(mmt, 2, 2, "hi")
        assert 0
    except TypeError:
        pass
    except:
        raise
Exemple #41
0
def test_raise_in_fasta_file_to_minmax():
    return  # @@CTB fix
    ht = khmer.new_hashtable(4, 4**4)

    try:
        ht.fasta_file_to_minmax(reads_filename, N_READS, None, callback_raise)
        assert 0
    except GoodException:
        pass
    except:
        raise
def test_raise_in_filter_fasta_file_max():
    return ## @CTB
    ht = khmer.new_hashtable(4, 4**4)
    mmt = ht.fasta_file_to_minmax(reads_filename, N_READS)

    try:
        ht.filter_fasta_file_any(mmt, 2, None, callback_raise)
        assert 0
    except GoodException:
        pass
    except:
        raise
Exemple #43
0
def test_raise_in_consume_fasta():
    return  # @CTB
    kh = khmer.new_hashtable(4, 4**4)

    try:
        n, _ = kh.consume_fasta(reads_filename, 0, 0, callback_raise)
        print n
        assert 0
    except GoodException:
        pass
    except:
        raise
Exemple #44
0
    def test_abund(self):
        ht = khmer.new_hashtable(10, 4 ** 10)

        filename = utils.get_test_data('test-abund-read.fa')
        outname = utils.get_temp_filename('test_abund.out')

        ht.consume_fasta(filename)
        try:
            ht.consume_fasta()
            assert 0, "should fail"
        except TypeError, err:
            print str(err)
def test_get_maxcount():
    kh = khmer.new_hashtable(4, 4)

    s = "AAAAACGT"
    kh.consume(s)

    x = kh.get_max_count(s)
    assert x == 2

    kh.consume(s)
    x = kh.get_max_count(s)
    assert x == 4
Exemple #46
0
    def test_abund(self):
        ht = khmer.new_hashtable(10, 4**10)

        filename = utils.get_test_data('test-abund-read.fa')
        outname = utils.get_temp_filename('test_abund.out')

        ht.consume_fasta(filename)
        try:
            ht.consume_fasta()
            assert 0, "should fail"
        except TypeError, err:
            print str(err)
Exemple #47
0
def test_get_maxcount():
    kh = khmer.new_hashtable(4, 4)

    s = "AAAAACGT"
    kh.consume(s)

    x = kh.get_max_count(s)
    assert x == 2

    kh.consume(s)
    x = kh.get_max_count(s)
    assert x == 4
Exemple #48
0
def test_badcount():
    countingtable = khmer.new_hashtable(4, 4)
    try:
        countingtable.count()
        assert 0, "count should require one argument"
    except TypeError as err:
        print str(err)
    try:
        countingtable.count('ABCDE')
        assert 0, "count should require k-mer size to be equal"
    except ValueError as err:
        print str(err)
Exemple #49
0
def test_get_mincount():
    kh = khmer.new_hashtable(4, 4**4)

    s = "AAAAACGT"
    kh.consume(s)

    x = kh.get_min_count(s)
    assert x == 1
    
    kh.consume(s)
    x = kh.get_min_count(s)
    assert x == 2
Exemple #50
0
def test_nonbool_in_consume_fasta():
    return  ## @CTB

    kh = khmer.new_hashtable(4, 4**4)

    try:
        kh.consume_fasta(reads_filename, 0, 0, "hi", False, callback_raise)
        assert 0
    except TypeError:
        pass
    except:
        raise
Exemple #51
0
def test_get_mincount_rc():
    kh = khmer.new_hashtable(4, 4)

    s = "AAAAACGT"
    src = "ACGTTTTT"

    kh.consume(s)
    x = kh.get_min_count(s)
    assert x == 1

    kh.consume(src)
    x = kh.get_min_count(s)
    assert x == 2
Exemple #52
0
def test_bad_readmask_in_filter_fasta_file_max():
    ht = khmer.new_hashtable(4, 4**4)

    khmer.reset_reporting_callback()

    mmt = ht.fasta_file_to_minmax(reads_filename, N_READS)

    try:
        ht.filter_fasta_file_any(mmt, 2, "hi")
        assert 0
    except TypeError:
        pass  # expected
    except:
        raise
Exemple #53
0
def test_raise_in_filter_fasta_file_max():
    return  ## @CTB
    ht = khmer.new_hashtable(4, 4**4)

    khmer.reset_reporting_callback()

    mmt = ht.fasta_file_to_minmax(reads_filename, N_READS)

    try:
        ht.filter_fasta_file_any(mmt, 2, None, callback_raise)
        assert 0
    except GoodException:
        pass
    except:
        raise
Exemple #54
0
def test_badget():
    kh = khmer.new_hashtable(6, 4**10)

    DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG"

    kh.consume(DNA)

    assert kh.get("AGCTTT") == 1

    assert kh.get("GATGAG") == 0

    try:
        kh.get("AGCTT")
        assert 0, "this should fail"
    except ValueError, err:
        print str(err)
Exemple #55
0
    def test_consume_build_readmask(self):
        ht = khmer.new_hashtable(10, 4**10)

        filename = utils.get_test_data('simple_2.fa')
        outname = utils.get_temp_filename('test_filter.out')

        # sequence #4 (index 3) is bad; the new readmask should have that.
        x = ht.consume_fasta_build_readmask(filename)
        (total_reads, n_consumed, readmask) = x

        assert total_reads == 4, total_reads
        assert n_consumed == 63, n_consumed
        assert readmask.get(0)
        assert readmask.get(1)
        assert readmask.get(2)
        assert not readmask.get(3)
Exemple #56
0
def test_maxcount():
    # hashtable should saturate at some point so as not to overflow counter
    kh = khmer.new_hashtable(4, 4)

    last_count = None
    for _ in range(0, 10000):
        kh.count('AAAA')
        c = kh.get('AAAA')

        print last_count, c
        if c == last_count:
            break
        last_count = c

    assert c != 10000, "should not be able to count to 10000"
    assert c == MAX_COUNT  # this will depend on HashcountType...
Exemple #57
0
def test_maxcount_with_bigcount():
    # hashtable should not saturate, if use_bigcount is set.
    kh = khmer.new_hashtable(4, 4)
    kh.set_use_bigcount(True)

    last_count = None
    for _ in range(0, 10000):
        kh.count('AAAA')
        c = kh.get('AAAA')

        print last_count, c
        if c == last_count:
            break
        last_count = c

    assert c == 10000, "should be able to count to 10000"
    assert c != MAX_COUNT