def test_count_first_pass(): mask = Nodetable.load(data_file('minitrio/mask.nt')) counts = Counttable(31, 1e4, 4) seqfile = data_file('minitrio/trio-proband.fq.gz') count_first_pass([seqfile], counts, mask) with NamedTemporaryFile(suffix='.ct') as countfile: counts.save(countfile.name) testcountfile = data_file('minitrio/trio-proband-mask-counts.ct') assert filecmp.cmp(testcountfile, countfile.name) is True
def get_unique_seeds(recordstream, seedsize): """Grab all unique seeds from the specified sequence file.""" ct = Counttable(seedsize, 1, 1) kmers = set() for record in recordstream: for kmer in ct.get_kmers(record.sequence): minkmer = kevlar.revcommin(kmer) if minkmer not in kmers: kmers.add(minkmer) yield kmer
def test_save_load(Tabletype): kh = Tabletype(5) ttype = type(kh) savefile = utils.get_temp_filename('tablesave.out') # test add(dna) x = kh.add("ATGGC") z = kh.get("ATGGC") assert z == 1 kh.save(savefile) # should we provide a single load function here? yes, probably. @CTB if ttype == _Countgraph: loaded = khmer.load_countgraph(savefile) elif ttype == Counttable: loaded = Counttable.load(savefile) elif ttype == _SmallCountgraph: loaded = khmer.load_countgraph(savefile, small=True) elif ttype == SmallCounttable: loaded = SmallCounttable.load(savefile) elif ttype == _Nodegraph: loaded = khmer.load_nodegraph(savefile) elif ttype == Nodetable: loaded = Nodetable.load(savefile) else: raise Exception("unknown tabletype") z = loaded.get('ATGGC') assert z == 1
def test_count_second_pass(): mask = Nodetable.load(data_file('minitrio/mask.nt')) counts = Counttable.load(data_file('minitrio/trio-proband-mask-counts.ct')) seqfile = data_file('minitrio/trio-proband.fq.gz') abund = count_second_pass([seqfile], counts) assert abund == {10: 6, 11: 10, 12: 12, 13: 18, 14: 16, 15: 11, 16: 9, 17: 9, 18: 11, 19: 8, 20: 9, 21: 7, 22: 3}
def test_assumptions(kmer): ct = Counttable(27, 1e5, 2) kmer_rc = kevlar.revcom(kmer) assert ct.hash(kmer) == ct.hash(kmer_rc) assert ct.get_kmer_hashes(kmer)[0] == ct.get_kmer_hashes(kmer_rc)[0]