Beispiel #1
0
def test_iter_read_multi_file():
    infiles = kevlar.tests.data_glob('bogus-genome/mask-chr[1,2].fa')
    print(infiles)
    records = [r for r in kevlar.multi_file_iter_khmer(infiles)]
    assert len(records) == 4
Beispiel #2
0
def main(args):
    timer = kevlar.Timer()
    timer.start()
    if (not args.num_bands) is not (not args.band):
        raise ValueError('Must specify --num-bands and --band together')
    myband = args.band - 1 if args.band else None

    timer.start('loadall')
    kevlar.plog('[kevlar::novel] Loading control samples')
    timer.start('loadctrl')
    controls = load_samples(
        args.control_counts,
        args.control,
        args.ksize,
        args.memory,
        args.max_fpr,
        args.num_bands,
        myband,
        args.threads,
        args.save_ctrl_counts,
    )
    elapsed = timer.stop('loadctrl')
    message = 'Control samples loaded in {:.2f} sec'.format(elapsed)
    kevlar.plog('[kevlar::novel]', message)

    kevlar.plog('[kevlar::novel] Loading case samples')
    timer.start('loadcases')
    cases = load_samples(
        args.case_counts,
        args.case,
        args.ksize,
        args.memory,
        args.max_fpr,
        args.num_bands,
        myband,
        args.threads,
        args.save_case_counts,
    )
    elapsed = timer.stop('loadcases')
    kevlar.plog(
        '[kevlar::novel] Case samples loaded in {:.2f} sec'.format(elapsed))
    elapsed = timer.stop('loadall')
    kevlar.plog(
        '[kevlar::novel] All samples loaded in {:.2f} sec'.format(elapsed))

    timer.start('iter')
    ncases = len(args.case)
    message = 'Iterating over reads from {:d} case sample(s)'.format(ncases)
    kevlar.plog('[kevlar::novel]', message)
    outstream = kevlar.open(args.out, 'w')
    infiles = [f for filelist in args.case for f in filelist]
    caserecords = kevlar.multi_file_iter_khmer(infiles)
    readstream = novel(
        caserecords,
        cases,
        controls,
        ksize=args.ksize,
        abundscreen=args.abund_screen,
        casemin=args.case_min,
        ctrlmax=args.ctrl_max,
        numbands=args.num_bands,
        band=myband,
        skipuntil=args.skip_until,
    )
    for augmented_read in readstream:
        kevlar.print_augmented_fastx(augmented_read, outstream)

    elapsed = timer.stop('iter')
    message = 'Iterated over all case reads in {:.2f} seconds'.format(elapsed)
    kevlar.plog('[kevlar::novel]', message)

    total = timer.stop()
    message = 'Total time: {:.2f} seconds'.format(total)
    kevlar.plog('[kevlar::novel]', message)