def test_load_gz(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave1.ht') loadpath = utils.get_temp_filename('tempcountingsave1.ht.gz') sizes = list(PRIMES_1m) sizes.append(1000005) # save uncompressed hashtable. hi = khmer.CountingHash(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) # compress. in_file = open(savepath, 'rb') out_file = gzip.open(loadpath, 'wb') out_file.writelines(in_file) out_file.close() in_file.close() # load compressed hashtable. ht = khmer.CountingHash(12, sizes) ht.load(loadpath) tracking = khmer._Hashbits(12, sizes) x = hi.abundance_distribution(inpath, tracking) tracking = khmer._Hashbits(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) assert x == y, (x, y)
def test_bloom_c_2(): # simple one ksize = 4 # use only 1 hashtableable, no bloom filter htableable = khmer._Hashbits(ksize, [11]) htableable.count('AAAA') # 00 00 00 00 = 0 htableable.count('ACTG') # 00 10 01 11 = assert htableable.n_unique_kmers() == 2 htableable.count('AACG') # 00 00 10 11 = 11 # collision with 1st kmer assert htableable.n_unique_kmers() == 2 htableable.count('AGAC') # 00 11 00 10 # collision with 2nd kmer assert htableable.n_unique_kmers() == 2 # use two hashtableables with 11,13 other_htableable = khmer._Hashbits(ksize, [11, 13]) other_htableable.count('AAAA') # 00 00 00 00 = 0 other_htableable.count('ACTG') # 00 10 01 11 = 2*16 +4 +3 = 39 assert other_htableable.n_unique_kmers() == 2 # 00 00 10 11 = 11 # collision with only 1st kmer other_htableable.count('AACG') assert other_htableable.n_unique_kmers() == 3 other_htableable.count('AGAC') # 00 11 00 10 3*16 +2 = 50 # collision with both 2nd and 3rd kmers assert other_htableable.n_unique_kmers() == 3
def test_bloom_c_2(): # simple one K = 4 # use only 1 hashtable, no bloom filter ht1 = khmer._Hashbits(K, [11]) ht1.count('AAAA') # 00 00 00 00 = 0 ht1.count('ACTG') # 00 10 01 11 = assert ht1.n_unique_kmers() == 2 ht1.count('AACG') # 00 00 10 11 = 11 # collision with 1st kmer assert ht1.n_unique_kmers() == 2 ht1.count('AGAC') # 00 11 00 10 # collision with 2nd kmer assert ht1.n_unique_kmers() == 2 # use two hashtables with 11,13 ht2 = khmer._Hashbits(K, [11, 13]) ht2.count('AAAA') # 00 00 00 00 = 0 ht2.count('ACTG') # 00 10 01 11 = 2*16 +4 +3 = 39 assert ht2.n_unique_kmers() == 2 ht2.count('AACG') # 00 00 10 11 = 11 # collision with only 1st kmer assert ht2.n_unique_kmers() == 3 ht2.count('AGAC') # 00 11 00 10 3*16 +2 = 50 # collision with both 2nd and 3rd kmers assert ht2.n_unique_kmers() == 3
def test_save_load_gz(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave2.ht.gz') sizes = list(PRIMES_1m) sizes.append(1000005) hi = khmer.CountingHash(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) ht = khmer.CountingHash(12, sizes) try: ht.load(savepath) except IOError as err: assert 0, 'Should not produce an IOError: ' + str(err) tracking = khmer._Hashbits(12, sizes) x = hi.abundance_distribution(inpath, tracking) tracking = khmer._Hashbits(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) assert x == y, (x, y)
def test_save_load_gz(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave2.ht.gz') sizes = list(PRIMES_1m) sizes.append(1000005) hi = khmer._CountingHash(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) ht = khmer._CountingHash(12, sizes) try: ht.load(savepath) except OSError as err: assert 0, 'Should not produce an OSError: ' + str(err) tracking = khmer._Hashbits(12, sizes) x = hi.abundance_distribution(inpath, tracking) tracking = khmer._Hashbits(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) assert x == y, (x, y)
def test_bloom_c_2(): # simple one K = 4 # use only 1 hashtable, no bloom filter ht1 = khmer._Hashbits(K, [11]) ht1.count("AAAA") # 00 00 00 00 = 0 ht1.count("ACTG") # 00 10 01 11 = assert ht1.n_unique_kmers() == 2 ht1.count("AACG") # 00 00 10 11 = 11 # collision with 1st kmer assert ht1.n_unique_kmers() == 2 ht1.count("AGAC") # 00 11 00 10 # collision with 2nd kmer assert ht1.n_unique_kmers() == 2 # use two hashtables with 11,13 ht2 = khmer._Hashbits(K, [11, 13]) ht2.count("AAAA") # 00 00 00 00 = 0 ht2.count("ACTG") # 00 10 01 11 = 2*16 +4 +3 = 39 assert ht2.n_unique_kmers() == 2 ht2.count("AACG") # 00 00 10 11 = 11 # collision with only 1st kmer assert ht2.n_unique_kmers() == 3 ht2.count("AGAC") # 00 11 00 10 3*16 +2 = 50 # collision with both 2nd and 3rd kmers assert ht2.n_unique_kmers() == 3
def test_extract_unique_paths_2(): kh = khmer._Hashbits(10, [5, 7, 11, 13]) kh.consume('ATGGAGAGAC') x = kh.extract_unique_paths('ATGGAGAGACACAGATAGACAGGAGTGGCGATG', 10, 1) print(x) assert x == ['TGGAGAGACACAGATAGACAGGAGTGGCGATG'] # all but the 1st k-mer
def test_abund_dist_gz_bigcount(): infile = utils.get_temp_filename('test.fa') shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile) outfile = utils.get_temp_filename('test_ct.gz') script = scriptpath('load-into-counting.py') htfile = utils.get_temp_filename('test_ct') args = ['-x', str(1e7), '-N', str(2), '-k', str(2), htfile, infile] utils.runscript(script, args) # create a bigcount table assert os.path.exists(htfile) data = open(htfile, 'rb').read() f_out = gzip.open(outfile, 'wb') # compress the created bigcount table f_out.write(data) f_out.close() # load the compressed bigcount table counting_hash = khmer.load_counting_hash(outfile) hashsizes = counting_hash.hashsizes() kmer_size = counting_hash.ksize() tracking = khmer._Hashbits(kmer_size, hashsizes) abundances = counting_hash.abundance_distribution(infile, tracking) # calculate abundance distribution for compressed bigcount table flag = False # check if abundance is > 255 # if ok gzipped bigcount was loaded correctly for _, i in enumerate(abundances): print _, i if _ > 255 and i > 0: flag = True break assert flag
def test_save_load_tagset_trunc(): htable = khmer._Hashbits(32, [1]) outfile = utils.get_temp_filename('tagset') htable.add_tag('A' * 32) htable.add_tag('G' * 32) htable.save_tagset(outfile) # truncate tagset file... fp = open(outfile, 'rb') data = fp.read() fp.close() for i in range(len(data)): fp = open(outfile, 'wb') fp.write(data[:i]) fp.close() # try loading it... try: htable.load_tagset(outfile) assert 0, "this test should fail" except OSError as err: print(str(err), i) # try loading it... try: htable.load_tagset(outfile) assert 0, "this test should fail" except OSError: pass
def main(): info('count-kmers.py', ['counting']) args = get_parser().parse_args() print ('hashtable from', args.input_counting_table_filename, file=sys.stderr) counting_hash = khmer.load_counting_hash( args.input_counting_table_filename) kmer_size = counting_hash.ksize() hashsizes = counting_hash.hashsizes() tracking = khmer._Hashbits( # pylint: disable=protected-access kmer_size, hashsizes) if args.output_file is None: args.output_file = sys.stdout writer = csv.writer(args.output_file) for filename in args.input_sequence_filenames: for record in screed.open(filename): seq = record.sequence.replace('N', 'A') for i in range(len(seq) - kmer_size + 1): kmer = seq[i:i+kmer_size] if not tracking.get(kmer): tracking.count(kmer) writer.writerow([kmer, str(counting_hash.get(kmer))]) print ('Total number of unique k-mers: {0}'.format( counting_hash.n_unique_kmers()), file=sys.stderr)
def test_save_load_merge_nexist(self): ht = khmer._Hashbits(20, [1]) try: a = ht.load_subset_partitionmap('this does not exist') assert 0, "this should not succeed" except IOError as e: print(str(e))
def test_abund_dist_gz_bigcount(): infile = utils.get_temp_filename('test.fa') shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile) outfile = utils.get_temp_filename('test_ct.gz') script = 'load-into-counting.py' htfile = utils.get_temp_filename('test_ct') args = ['-x', str(1e7), '-N', str(2), '-k', str(2), htfile, infile] utils.runscript(script, args) # create a bigcount table assert os.path.exists(htfile) data = open(htfile, 'rb').read() f_out = gzip.open(outfile, 'wb') # compress the created bigcount table f_out.write(data) f_out.close() # load the compressed bigcount table try: counting_hash = khmer.load_counting_hash(outfile) except IOError as err: assert 0, 'Should not produce IOError: ' + str(err) hashsizes = counting_hash.hashsizes() kmer_size = counting_hash.ksize() tracking = khmer._Hashbits(kmer_size, hashsizes) abundances = counting_hash.abundance_distribution(infile, tracking) # calculate abundance distribution for compressed bigcount table flag = False # check if abundance is > 255 # if ok gzipped bigcount was loaded correctly for _, i in enumerate(abundances): print(_, i) if _ > 255 and i > 0: flag = True break assert flag
def test__get_set_tag_density(): ht = khmer._Hashbits(32, [1]) orig = ht._get_tag_density() assert orig != 2 ht._set_tag_density(2) assert ht._get_tag_density() == 2
def test_save_load_tagset_trunc(): ht = khmer._Hashbits(32, [1]) outfile = utils.get_temp_filename('tagset') ht.add_tag('A' * 32) ht.add_tag('G' * 32) ht.save_tagset(outfile) # truncate tagset file... fp = open(outfile, 'rb') data = fp.read() fp.close() for i in range(len(data)): fp = open(outfile, 'wb') fp.write(data[:i]) fp.close() # try loading it... try: ht.load_tagset(outfile) assert 0, "this test should fail" except OSError as err: print(str(err), i) # try loading it... try: ht.load_tagset(outfile) assert 0, "this test should fail" except OSError: pass
def test_save_load_merge_nexist(self): ht = khmer._Hashbits(20, [1]) try: a = ht.load_subset_partitionmap('this does not exist') assert 0, "this should not succeed" except OSError as e: print(str(e))
def main(): info('count-kmers.py', ['counting']) args = get_parser().parse_args() print('hashtable from', args.input_counting_table_filename, file=sys.stderr) counting_hash = khmer.load_counting_hash( args.input_counting_table_filename) kmer_size = counting_hash.ksize() hashsizes = counting_hash.hashsizes() tracking = khmer._Hashbits( # pylint: disable=protected-access kmer_size, hashsizes) if args.output_file is None: args.output_file = sys.stdout writer = csv.writer(args.output_file) for filename in args.input_sequence_filenames: for record in screed.open(filename): seq = record.sequence.replace('N', 'A') for i in range(len(seq) - kmer_size + 1): kmer = seq[i:i + kmer_size] if not tracking.get(kmer): tracking.count(kmer) writer.writerow([kmer, str(counting_hash.get(kmer))]) print('Total number of unique k-mers: {0}'.format( counting_hash.n_unique_kmers()), file=sys.stderr)
def test_extract_unique_paths_1(): kh = khmer._Hashbits(10, [5, 7, 11, 13]) kh.consume("AGTGGCGATG") x = kh.extract_unique_paths("ATGGAGAGACACAGATAGACAGGAGTGGCGATG", 10, 1) print(x) assert x == ["ATGGAGAGACACAGATAGACAGGAGTGGCGAT"] # all but the last k-mer
def test__get_set_tag_density(): htableable = khmer._Hashbits(32, [1]) orig = htableable._get_tag_density() assert orig != 2 htableable._set_tag_density(2) assert htableable._get_tag_density() == 2
def test_find_stoptags(): ht = khmer._Hashbits(5, [1]) ht.add_stop_tag("AAAAA") assert ht.identify_stoptags_by_position("AAAAA") == [0] assert ht.identify_stoptags_by_position("AAAAAA") == [0, 1] assert ht.identify_stoptags_by_position("TTTTT") == [0] assert ht.identify_stoptags_by_position("TTTTTT") == [0, 1]
def test_count_A(self): A_filename = utils.get_test_data('all-A.fa') tracking = khmer._Hashbits(4, [5]) dist = self.kh.abundance_distribution(A_filename, tracking) assert sum(dist) == 1 assert dist[10] == 1
def test_find_stoptags(): htable = khmer._Hashbits(5, [1]) htable.add_stop_tag("AAAAA") assert htable.identify_stoptags_by_position("AAAAA") == [0] assert htable.identify_stoptags_by_position("AAAAAA") == [0, 1] assert htable.identify_stoptags_by_position("TTTTT") == [0] assert htable.identify_stoptags_by_position("TTTTTT") == [0, 1]
def test_count_kmer_degree(): inpfile = utils.get_test_data('all-A.fa') ht = khmer._Hashbits(4, [3, 5]) ht.consume_fasta(inpfile) assert ht.kmer_degree('AAAA') == 2 assert ht.kmer_degree('AAAT') == 1 assert ht.kmer_degree('AATA') == 0 assert ht.kmer_degree('TAAA') == 1
def test_tagset_ksize_check(): htable = khmer._Hashbits(31, [1]) inpath = utils.get_test_data('goodversion-k32.tagset') try: htable.load_tagset(inpath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_tagset_ksize_check(): ht = khmer._Hashbits(31, [1]) inpath = utils.get_test_data("goodversion-k32.tagset") try: ht.load_tagset(inpath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_count_kmer_degree(): inpfile = utils.get_test_data("all-A.fa") ht = khmer._Hashbits(4, [3, 5]) ht.consume_fasta(inpfile) assert ht.kmer_degree("AAAA") == 2 assert ht.kmer_degree("AAAT") == 1 assert ht.kmer_degree("AATA") == 0 assert ht.kmer_degree("TAAA") == 1
def test_extract_unique_paths_0(): kh = khmer._Hashbits(10, [5, 7, 11, 13]) x = kh.extract_unique_paths('ATGGAGAGACACAGATAGACAGGAGTGGCGATG', 10, 1) assert x == ['ATGGAGAGACACAGATAGACAGGAGTGGCGATG'] kh.consume('ATGGAGAGACACAGATAGACAGGAGTGGCGATG') x = kh.extract_unique_paths('ATGGAGAGACACAGATAGACAGGAGTGGCGATG', 10, 1) assert not x
def test_stop_tags_filetype_check(): ht = khmer._Hashbits(31, [1]) inpath = utils.get_test_data('goodversion-k32.tagset') try: ht.load_stop_tags(inpath) assert 0, "this should fail" except IOError as e: print(str(e))
def test_count_kmer_degree(): inpfile = utils.get_test_data('all-A.fa') htable = khmer._Hashbits(4, [3, 5]) htable.consume_fasta(inpfile) assert htable.kmer_degree('AAAA') == 2 assert htable.kmer_degree('AAAT') == 1 assert htable.kmer_degree('AATA') == 0 assert htable.kmer_degree('TAAA') == 1
def test_tagset_filetype_check(): ht = khmer._Hashbits(31, [1]) inpath = utils.get_test_data('goodversion-k32.stoptags') try: ht.load_tagset(inpath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_save_load_tagset_notexist(): htable = khmer._Hashbits(32, [1]) outfile = utils.get_temp_filename('tagset') try: htable.load_tagset(outfile) assert 0, "this test should fail" except OSError as e: print(str(e))
def test_tagset_filetype_check(): htable = khmer._Hashbits(31, [1]) inpath = utils.get_test_data('goodversion-k32.stoptags') try: htable.load_tagset(inpath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_save_load_tagset_notexist(): ht = khmer._Hashbits(32, [1]) outfile = utils.get_temp_filename('tagset') try: ht.load_tagset(outfile) assert 0, "this test should fail" except OSError as e: print(str(e))
def test_count_within_radius_simple(): inpfile = utils.get_test_data('all-A.fa') htable = khmer._Hashbits(4, [3, 5]) print(htable.consume_fasta(inpfile)) n = htable.count_kmers_within_radius('AAAA', 1) assert n == 1 n = htable.count_kmers_within_radius('AAAA', 10) assert n == 1
def test_stoptags_file_version_check(): htable = khmer._Hashbits(32, [1]) inpath = utils.get_test_data('badversion-k32.stoptags') try: htable.load_stop_tags(inpath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_hashbits_file_version_check(): htable = khmer._Hashbits(12, [1]) inpath = utils.get_test_data('badversion-k12.htable') try: htable.load(inpath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_tagset_file_version_check(): ht = khmer._Hashbits(32, [1]) inpath = utils.get_test_data('badversion-k32.tagset') try: ht.load_tagset(inpath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_count_within_radius_simple(): inpfile = utils.get_test_data('all-A.fa') ht = khmer._Hashbits(4, [3, 5]) print(ht.consume_fasta(inpfile)) n = ht.count_kmers_within_radius('AAAA', 1) assert n == 1 n = ht.count_kmers_within_radius('AAAA', 10) assert n == 1
def test_tagset_file_version_check(): ht = khmer._Hashbits(32, [1]) inpath = utils.get_test_data('badversion-k32.tagset') try: ht.load_tagset(inpath) assert 0, "this should fail" except IOError as e: print(str(e))
def test_fakelump_load_stop_tags_notexist(): fakelump_fa_foo = utils.get_temp_filename('fakelump.fa.stopfoo') # ok, now try loading these stop tags; should fail. ht = khmer._Hashbits(32, [5, 7, 11, 13]) try: ht.load_stop_tags(fakelump_fa_foo) assert 0, "this test should fail" except OSError: pass
def test_fakelump_load_stop_tags_notexist(): fakelump_fa_foo = utils.get_temp_filename("fakelump.fa.stopfoo") # ok, now try loading these stop tags; should fail. ht = khmer._Hashbits(32, [5, 7, 11, 13]) try: ht.load_stop_tags(fakelump_fa_foo) assert 0, "this test should fail" except OSError: pass
def test_hashbits_file_type_check(): kh = khmer._CountingHash(12, [1]) savepath = utils.get_temp_filename('tempcountingsave0.ct') kh.save(savepath) ht = khmer._Hashbits(12, [1]) try: ht.load(savepath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_hashbits_file_type_check(): kh = khmer._CountingHash(12, [1]) savepath = utils.get_temp_filename('tempcountingsave0.ct') kh.save(savepath) htable = khmer._Hashbits(12, [1]) try: htable.load(savepath) assert 0, "this should fail" except OSError as e: print(str(e))
def test_filter_if_present(): ht = khmer._Hashbits(32, [3, 5]) maskfile = utils.get_test_data("filter-test-A.fa") inputfile = utils.get_test_data("filter-test-B.fa") outfile = utils.get_temp_filename("filter") ht.consume_fasta(maskfile) ht.filter_if_present(inputfile, outfile) records = list(fasta_iter(open(outfile))) assert len(records) == 1 assert records[0]["name"] == "3"
def test_save_load(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave0.ht') sizes = list(PRIMES_1m) sizes.append(1000005) hi = khmer.CountingHash(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) ht = khmer.CountingHash(12, sizes) ht.load(savepath) tracking = khmer._Hashbits(12, sizes) x = hi.abundance_distribution(inpath, tracking) tracking = khmer._Hashbits(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) assert x == y, (x, y)
def test_filter_if_present(): ht = khmer._Hashbits(32, [3, 5]) maskfile = utils.get_test_data('filter-test-A.fa') inputfile = utils.get_test_data('filter-test-B.fa') outfile = utils.get_temp_filename('filter') ht.consume_fasta(maskfile) ht.filter_if_present(inputfile, outfile) records = list(fasta_iter(open(outfile))) assert len(records) == 1 assert records[0]['name'] == '3'
def test_filter_if_present(): htable = khmer._Hashbits(32, [3, 5]) maskfile = utils.get_test_data('filter-test-A.fa') inputfile = utils.get_test_data('filter-test-B.fa') outfile = utils.get_temp_filename('filter') htable.consume_fasta(maskfile) htable.filter_if_present(inputfile, outfile) records = list(fasta_iter(open(outfile))) assert len(records) == 1 assert records[0]['name'] == '3'
def main(): info('count-kmers-single.py', ['counting']) args = get_parser().parse_args() check_input_files(args.input_sequence_filename, False) print ('making k-mer counting table', file=sys.stderr) counting_hash = khmer.CountingHash(args.ksize, args.max_tablesize, args.n_tables) # @CTB counting_hash.set_use_bigcount(args.bigcount) kmer_size = counting_hash.ksize() hashsizes = counting_hash.hashsizes() tracking = khmer._Hashbits( # pylint: disable=protected-access kmer_size, hashsizes) print ('kmer_size: %s' % counting_hash.ksize(), file=sys.stderr) print ('k-mer counting table sizes: %s' % (counting_hash.hashsizes(),), file=sys.stderr) if args.output_file is None: args.output_file = sys.stdout writer = csv.writer(args.output_file) # start loading rparser = khmer.ReadParser(args.input_sequence_filename) threads = [] print ('consuming input, round 1 -- %s' % (args.input_sequence_filename), file=sys.stderr) for _ in range(args.threads): thread = \ threading.Thread( target=counting_hash.consume_fasta_with_reads_parser, args=(rparser, ) ) threads.append(thread) thread.start() for thread in threads: thread.join() for record in screed.open(args.input_sequence_filename): seq = record.sequence.replace('N', 'A') for i in range(len(seq) - kmer_size + 1): kmer = seq[i:i+kmer_size] if not tracking.get(kmer): tracking.count(kmer) writer.writerow([kmer, str(counting_hash.get(kmer))]) print ('Total number of unique k-mers: {0}'.format( counting_hash.n_unique_kmers()), file=sys.stderr)
def test_load_partitioned(): inpfile = utils.get_test_data('combine_parts_1.fa') htable = khmer._Hashbits(32, [1]) htable.consume_partitioned_fasta(inpfile) assert htable.count_partitions() == (2, 0) first_seq = "CATGCAGAAGTTCCGCAACCATACCGTTCAGT" assert htable.get(first_seq) second_seq = "CAAATGTACATGCACTTAAAATCATCCAGCCG" assert htable.get(second_seq) third_s = "CATGCAGAAGTTCCGCAACCATACCGTTCAGTTCCTGGTGGCTA"[-32:] assert htable.get(third_s)
def test_consume_absentfasta_with_reads_parser(): presencetable = khmer._Hashbits(31, [1]) try: presencetable.consume_fasta_with_reads_parser() assert 0, "this should fail" except TypeError as err: print(str(err)) try: readparser = ReadParser(utils.get_test_data('empty-file')) presencetable.consume_fasta_with_reads_parser(readparser) assert 0, "this should fail" except OSError as err: print(str(err)) except ValueError as err: print(str(err))
def test_load_partitioned(): inpfile = utils.get_test_data('combine_parts_1.fa') ht = khmer._Hashbits(32, [1]) ht.consume_partitioned_fasta(inpfile) assert ht.count_partitions() == (2, 0) s1 = "CATGCAGAAGTTCCGCAACCATACCGTTCAGT" assert ht.get(s1) s2 = "CAAATGTACATGCACTTAAAATCATCCAGCCG" assert ht.get(s2) s3 = "CATGCAGAAGTTCCGCAACCATACCGTTCAGTTCCTGGTGGCTA"[-32:] assert ht.get(s3)
def test_not_output_unassigned(self): import screed filename = utils.get_test_data('random-20-a.fa') ht = khmer._Hashbits(21, [5, 7, 11, 13]) ht.consume_fasta_and_tag(filename) output_file = utils.get_temp_filename('parttest') ht.output_partitions(filename, output_file, False) len1 = len(list(screed.open(filename))) len2 = len(list(screed.open(output_file))) assert len1 > 0 assert len2 == 0, len2