def main(): if len(sys.argv) == 1: sys.argv.append("-h") args = get_parser().parse_args() logging.basicConfig(format='[%(asctime)s - %(name)s] %(message)s', datefmt='%H:%M:%S', level=logging.DEBUG) if not args.debug: logging.disable('root') logging.info('Starting 2D basecalling.') modelfiles = { 'template': os.path.abspath(args.template_model), 'complement': os.path.abspath(args.complement_model) } #TODO: handle case where there are pre-existing files. if args.watch is not None: # An optional component from nanonet.watcher import Fast5Watcher fast5_files = Fast5Watcher(args.input, timeout=args.watch) else: sort_by_size = None fast5_files = iterate_fast5(args.input, paths=True, strand_list=args.strand_list, limit=args.limit, sort_by_size=sort_by_size) fix_args = [ modelfiles ] fix_kwargs = {a: getattr(args, a) for a in ( 'min_len', 'max_len', 'section', 'event_detect', 'fast_decode', 'write_events', 'opencl_2d', 'ed_params', 'sloika_model' )} # Define worker functions mapper = tang_imap( process_read_2d, fast5_files, fix_args=fix_args, fix_kwargs=fix_kwargs, threads=args.jobs, unordered=True ) # Off we go n_reads = 0 n_bases = 0 n_events = 0 n_bases_2d = 0 timings = [0.0, 0.0, 0.0] t0 = now() sections = ('template', 'complement', '2d') if args.output_prefix is not None: ext = 'fastq' if args.fastq else 'fasta' filenames = ['{}_{}.{}'.format(args.output_prefix, x, ext) for x in sections] else: filenames = ['-'] * 3 with FastaWrite(filenames[0], args.fastq) as fasta_temp, FastaWrite(filenames[1], args.fastq) as fasta_comp, FastaWrite(filenames[2], args.fastq) as fasta_2d: for result in mapper: if result['template'] is None: continue data, time = result['template'] fname, basecall, _, n_ev = data basecall, quality = basecall name, _ = short_names(fname) if args.fastq: fasta_temp.write(name, basecall, quality) else: fasta_temp.write(name, basecall) n_reads += 1 n_bases += len(basecall) n_events += n_ev timings = [x + y for x, y in zip(timings, time + (0.0,))] if result['complement'] is None: continue data, time = result['complement'] _, basecall, _, _ = data basecall, quality = basecall if args.fastq: fasta_comp.write(name, basecall, quality) else: fasta_comp.write(name, basecall) if result['2d'] is None: continue basecall, time_2d = result['2d'] basecall, quality = basecall if args.fastq: fasta_2d.write(name, basecall, quality) else: fasta_2d.write(name, basecall) n_bases_2d += len(basecall) timings[2] += time_2d t1 = now() sys.stderr.write('Processed {} reads in {}s (wall time)\n'.format(n_reads, t1 - t0)) if n_reads > 0: network, decoding, call_2d = timings time_2d = 0 if n_bases_2d == 0 else n_bases_2d/1000.0/call_2d sys.stderr.write( '1D Run network: {:6.2f} ({:6.3f} kb/s, {:6.3f} kev/s)\n' '1D Decoding: {:6.2f} ({:6.3f} kb/s, {:6.3f} kev/s)\n' '2D calling: {:6.2f} ({:6.3f} kb/s)\n' .format( network, n_bases/1000.0/network, n_events/1000.0/network, decoding, n_bases/1000.0/decoding, n_events/1000.0/decoding, call_2d, time_2d ) )
def main(): if len(sys.argv) == 1: sys.argv.append("-h") args = get_parser().parse_args() if args.list_platforms: list_opencl_platforms() sys.exit(0) modelfile = os.path.abspath(args.model) if args.section is None: try: args.section = np.load(modelfile).item().meta['section'] except: sys.stderr.write( "No 'section' found in modelfile, try specifying --section.\n") sys.exit(1) #TODO: handle case where there are pre-existing files. if args.watch is not None: # An optional component from nanonet.watcher import Fast5Watcher initial_jobs = iterate_fast5(args.input, paths=True) fast5_files = Fast5Watcher(args.input, timeout=args.watch, initial_jobs=initial_jobs) else: sort_by_size = 'desc' if args.platforms is not None else None fast5_files = iterate_fast5(args.input, paths=True, strand_list=args.strand_list, limit=args.limit, sort_by_size=sort_by_size) fix_args = [modelfile] fix_kwargs = { a: getattr(args, a) for a in ('min_len', 'max_len', 'section', 'event_detect', 'fast_decode', 'write_events', 'ed_params', 'sloika_model') } # Define worker functions workers = [] if not args.exc_opencl: cpu_function = partial(process_read, *fix_args, **fix_kwargs) workers.extend([(cpu_function, None)] * args.jobs) if args.platforms is not None: if cl is None: raise ImportError('pyopencl is not installed, install with pip.') for platform in args.platforms: vendor, device_id, n_files = platform.split(':') pa = ProcessAttr(use_opencl=True, vendor=vendor, device_id=int(device_id)) fargs = fix_args + [pa] opencl_function = partial(process_read_opencl, *fargs, **fix_kwargs) workers.append((opencl_function, int(n_files))) # Select how to spread load if args.platforms is None: # just CPU worker, n_files = workers[0] mapper = tang_imap(worker, fast5_files, threads=args.jobs, unordered=True) elif len(workers) == 1: # single opencl device # need to wrap files in lists, and unwrap results worker, n_files = workers[0] fast5_files = group_by_list(fast5_files, [n_files]) mapper = itertools.chain.from_iterable( itertools.imap(worker, fast5_files)) else: # Heterogeneous compute mapper = JobQueue(fast5_files, workers) # Off we go n_reads = 0 n_bases = 0 n_events = 0 timings = [0.0, 0.0] t0 = now() with FastaWrite(args.output, args.fastq) as fasta: for result in mapper: if result is None: continue data, time = result fname, call_data, _, n_ev = data name, _ = short_names(fname) basecall, quality = call_data if args.fastq: fasta.write(name, basecall, quality) else: fasta.write(name, basecall) n_reads += 1 n_bases += len(basecall) n_events += n_ev timings = [x + y for x, y in zip(timings, time)] t1 = now() sys.stderr.write( 'Basecalled {} reads ({} bases, {} events) in {}s (wall time)\n'. format(n_reads, n_bases, n_events, t1 - t0)) if n_reads > 0: network, decoding = timings sys.stderr.write( 'Run network: {:6.2f} ({:6.3f} kb/s, {:6.3f} kev/s)\n' 'Decoding: {:6.2f} ({:6.3f} kb/s, {:6.3f} kev/s)\n'.format( network, n_bases / 1000.0 / network, n_events / 1000.0 / network, decoding, n_bases / 1000.0 / decoding, n_events / 1000.0 / decoding, ))