Esempio n. 1
0
def test_save_load(Tabletype):
    kh = Tabletype(5)
    ttype = type(kh)
    savefile = utils.get_temp_filename('tablesave.out')

    # test add(dna)
    x = kh.add("ATGGC")
    z = kh.get("ATGGC")
    assert z == 1

    kh.save(savefile)

    # should we provide a single load function here? yes, probably. @CTB
    if ttype == _Countgraph:
        loaded = khmer.load_countgraph(savefile)
    elif ttype == Counttable:
        loaded = Counttable.load(savefile)
    elif ttype == _SmallCountgraph:
        loaded = khmer.load_countgraph(savefile, small=True)
    elif ttype == SmallCounttable:
        loaded = SmallCounttable.load(savefile)
    elif ttype == _Nodegraph:
        loaded = khmer.load_nodegraph(savefile)
    elif ttype == Nodetable:
        loaded = Nodetable.load(savefile)
    else:
        raise Exception("unknown tabletype")

    z = loaded.get('ATGGC')
    assert z == 1
Esempio n. 2
0
def test_load_sample_seqfile_withmask(count, smallcount, count_masked,
                                      kpresent, kabsent):
    mask = Nodetable(21, 1e4, 4)
    mask.consume('CACCAATCCGTACGGAGAGCCGTATATATAGACTGCTATACTATTGGATCGTACGGGGC')
    sketch = kevlar.count.load_sample_seqfile(
        [data_file('bogus-genome/refr.fa')],
        21,
        1e6,
        mask=mask,
        consume_masked=count_masked,
        count=count,
        smallcount=smallcount,
    )
    assert sketch.get(kpresent) > 0
    assert sketch.get(kabsent) == 0
    assert sketch.get('GATTACAGATTACAGATTACA') == 0
Esempio n. 3
0
def test_count_second_pass():
    mask = Nodetable.load(data_file('minitrio/mask.nt'))
    counts = Counttable.load(data_file('minitrio/trio-proband-mask-counts.ct'))
    seqfile = data_file('minitrio/trio-proband.fq.gz')
    abund = count_second_pass([seqfile], counts)
    assert abund == {10: 6, 11: 10, 12: 12, 13: 18, 14: 16, 15: 11, 16: 9,
                     17: 9, 18: 11, 19: 8, 20: 9, 21: 7, 22: 3}
Esempio n. 4
0
def test_count_cli_with_mask(capsys):
    mask = Nodetable(21, 1e4, 4)
    mask.consume('CACCAATCCGTACGGAGAGCCGTATATATAGACTGCTATACTATTGGATCGTACGGGGC')
    with NamedTemporaryFile(suffix='.nt') as maskfile, \
            NamedTemporaryFile(suffix='.sct') as countfile:
        mask.save(maskfile.name)
        arglist = [
            'count', '--ksize', '21', '--mask', maskfile.name, '--memory',
            '1M', countfile.name,
            data_file('bogus-genome/refr.fa')
        ]
        args = kevlar.cli.parser().parse_args(arglist)
        kevlar.count.main(args)
    out, err = capsys.readouterr()
    assert '36898 distinct k-mers stored' in err