def test_iter_read_multi_file(): infiles = kevlar.tests.data_glob('bogus-genome/mask-chr[1,2].fa') print(infiles) records = [r for r in kevlar.multi_file_iter_khmer(infiles)] assert len(records) == 4
def main(args): timer = kevlar.Timer() timer.start() if (not args.num_bands) is not (not args.band): raise ValueError('Must specify --num-bands and --band together') myband = args.band - 1 if args.band else None timer.start('loadall') kevlar.plog('[kevlar::novel] Loading control samples') timer.start('loadctrl') controls = load_samples( args.control_counts, args.control, args.ksize, args.memory, args.max_fpr, args.num_bands, myband, args.threads, args.save_ctrl_counts, ) elapsed = timer.stop('loadctrl') message = 'Control samples loaded in {:.2f} sec'.format(elapsed) kevlar.plog('[kevlar::novel]', message) kevlar.plog('[kevlar::novel] Loading case samples') timer.start('loadcases') cases = load_samples( args.case_counts, args.case, args.ksize, args.memory, args.max_fpr, args.num_bands, myband, args.threads, args.save_case_counts, ) elapsed = timer.stop('loadcases') kevlar.plog( '[kevlar::novel] Case samples loaded in {:.2f} sec'.format(elapsed)) elapsed = timer.stop('loadall') kevlar.plog( '[kevlar::novel] All samples loaded in {:.2f} sec'.format(elapsed)) timer.start('iter') ncases = len(args.case) message = 'Iterating over reads from {:d} case sample(s)'.format(ncases) kevlar.plog('[kevlar::novel]', message) outstream = kevlar.open(args.out, 'w') infiles = [f for filelist in args.case for f in filelist] caserecords = kevlar.multi_file_iter_khmer(infiles) readstream = novel( caserecords, cases, controls, ksize=args.ksize, abundscreen=args.abund_screen, casemin=args.case_min, ctrlmax=args.ctrl_max, numbands=args.num_bands, band=myband, skipuntil=args.skip_until, ) for augmented_read in readstream: kevlar.print_augmented_fastx(augmented_read, outstream) elapsed = timer.stop('iter') message = 'Iterated over all case reads in {:.2f} seconds'.format(elapsed) kevlar.plog('[kevlar::novel]', message) total = timer.stop() message = 'Total time: {:.2f} seconds'.format(total) kevlar.plog('[kevlar::novel]', message)