def setup_runner(pipeline: Pipeline, infiles, outfiles, progress, cores, buffer_size): if cores > 1: if ParallelPipelineRunner.can_output_to(outfiles): runner_class = ParallelPipelineRunner # type: Type[PipelineRunner] runner_kwargs = dict(n_workers=cores, buffer_size=buffer_size) else: raise CommandLineError("Running in parallel is currently not supported " "when using --format or when demultiplexing.\n" "Omit --cores/-j to continue.") # return # avoid IDE warnings below else: runner_class = SerialPipelineRunner runner_kwargs = dict() try: runner = runner_class(pipeline, infiles, outfiles, progress, **runner_kwargs) except (dnaio.UnknownFileFormat, dnaio.FileFormatError, OSError) as e: raise CommandLineError(e) return runner
def main(cmdlineargs=None, default_outfile=sys.stdout): """ Main function that sets up a processing pipeline and runs it. default_outfile is the file to which trimmed reads are sent if the ``-o`` parameter is not used. """ start_time = time.time() parser = get_option_parser() if cmdlineargs is None: cmdlineargs = sys.argv[1:] options, args = parser.parse_args(args=cmdlineargs) # Setup logging only if there are not already any handlers (can happen when # this function is being called externally such as from unit tests) if not logging.root.handlers: setup_logging(stdout=bool(options.output), quiet=options.quiet or options.report == 'minimal') if options.quiet and options.report: parser.error("Options --quiet and --report cannot be used at the same time") paired = determine_paired_mode(options) assert paired in (False, 'first', 'both') if paired == 'first': # legacy mode assert options.pair_filter is None pair_filter_mode = 'first' elif options.pair_filter is None: # default pair_filter_mode = 'any' else: # user-provided behavior pair_filter_mode = options.pair_filter try: is_interleaved_input, is_interleaved_output = determine_interleaved(options, args) input_filename, input_paired_filename, quality_filename = input_files_from_parsed_args(args, paired, is_interleaved_input) pipeline = pipeline_from_parsed_args(options, paired, pair_filter_mode, quality_filename, is_interleaved_output) outfiles = open_output_files(options, default_outfile, is_interleaved_output) except CommandLineError as e: parser.error(e) return # avoid IDE warnings below if options.cores < 0: parser.error('Value for --cores cannot be negative') cores = available_cpu_count() if options.cores == 0 else options.cores if cores > 1: if ( ParallelPipelineRunner.can_output_to(outfiles) and quality_filename is None and not options.colorspace and options.format is None ): runner = ParallelPipelineRunner(pipeline, cores, options.buffer_size) else: logger.error('Running in parallel is currently not supported for ' 'the given combination of command-line parameters.\nThese ' 'options are not supported: --info-file, --rest-file, ' '--wildcard-file, --untrimmed-output, ' '--untrimmed-paired-output, --too-short-output, ' '--too-short-paired-output, --too-long-output, ' '--too-long-paired-output, --format, --colorspace') sys.exit(1) else: runner = pipeline try: runner.set_input(input_filename, file2=input_paired_filename, qualfile=quality_filename, colorspace=options.colorspace, fileformat=options.format, interleaved=is_interleaved_input) runner.set_output(outfiles) except (seqio.UnknownFileType, IOError) as e: parser.error(e) implementation = platform.python_implementation() opt = ' (' + implementation + ')' if implementation != 'CPython' else '' logger.info("This is cutadapt %s with Python %s%s", __version__, platform.python_version(), opt) logger.info("Command line parameters: %s", " ".join(cmdlineargs)) logger.info("Processing reads on %d core%s in %s mode ...", cores, 's' if cores > 1 else '', {False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end'}[pipeline.paired]) if pipeline.should_warn_legacy: logger.warning('\n'.join(textwrap.wrap('Legacy mode is ' 'enabled. Read modification and filtering options *ignore* ' 'the second read. To switch to regular paired-end mode, ' 'provide the --pair-filter=any option or use any of the ' '-A/-B/-G/-U/--interleaved options.'))) try: stats = runner.run() # cProfile.runctx('stats=runner.run()', globals(), locals(), 'profile_main.prof') runner.close() except KeyboardInterrupt: print("Interrupted", file=sys.stderr) sys.exit(130) except IOError as e: if e.errno == errno.EPIPE: sys.exit(1) raise except (seqio.FormatError, seqio.UnknownFileType, EOFError) as e: sys.exit("cutadapt: error: {0}".format(e)) elapsed = time.time() - start_time if not options.quiet: # send statistics to stderr if result was sent to stdout stat_file = sys.stderr if options.output is None else None with redirect_standard_output(stat_file): if options.report == 'minimal': print_minimal_report(stats, elapsed, options.gc_content / 100) else: print_report(stats, elapsed, options.gc_content / 100)
def main(cmdlineargs=None, default_outfile=sys.stdout.buffer): """ Main function that sets up a processing pipeline and runs it. default_outfile is the file to which trimmed reads are sent if the ``-o`` parameter is not used. """ start_time = time.time() parser = get_argument_parser() if cmdlineargs is None: cmdlineargs = sys.argv[1:] args = parser.parse_args(args=cmdlineargs) # log to stderr if results are to be sent to stdout log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-" # Setup logging only if there are not already any handlers (can happen when # this function is being called externally such as from unit tests) if not logging.root.handlers: setup_logging(stdout=log_to_stdout, quiet=args.quiet, minimal=args.report == 'minimal', debug=args.debug) if args.profile: import cProfile profiler = cProfile.Profile() profiler.enable() if args.quiet and args.report: parser.error( "Options --quiet and --report cannot be used at the same time") if args.colorspace: parser.error( "These colorspace-specific options are no longer supported: " "--colorspace, -c, -d, --double-encode, -t, --trim-primer, " "--strip-f3, --maq, --bwa, --no-zero-cap. " "Use Cutadapt 1.18 or earlier to work with colorspace data.") paired = determine_paired_mode(args) assert paired in (False, True) # Print the header now because some of the functions below create logging output log_header(cmdlineargs) try: is_interleaved_input, is_interleaved_output = determine_interleaved( args) input_filename, input_paired_filename = input_files_from_parsed_args( args.inputs, paired, is_interleaved_input) pipeline = pipeline_from_parsed_args(args, paired, is_interleaved_output) outfiles = open_output_files(args, default_outfile, is_interleaved_output) except CommandLineError as e: parser.error(e) return # avoid IDE warnings below if args.cores < 0: parser.error('Value for --cores cannot be negative') cores = available_cpu_count() if args.cores == 0 else args.cores if cores > 1: if ParallelPipelineRunner.can_output_to(outfiles): runner_class = ParallelPipelineRunner runner_kwargs = dict(n_workers=cores, buffer_size=args.buffer_size) else: logger.error( 'Running in parallel is currently not supported for ' 'the given combination of command-line parameters.\nThese ' 'options are not supported: --info-file, --rest-file, ' '--wildcard-file, --untrimmed-output, ' '--untrimmed-paired-output, --too-short-output, ' '--too-short-paired-output, --too-long-output, ' '--too-long-paired-output, --format\n' 'Also, demultiplexing is not supported.\n' 'Omit --cores/-j to continue.') return # avoid IDE warnings below else: runner_class = SerialPipelineRunner runner_kwargs = dict() infiles = InputFiles(input_filename, file2=input_paired_filename, interleaved=is_interleaved_input) try: runner = runner_class(pipeline, infiles, outfiles, **runner_kwargs) except (dnaio.UnknownFileFormat, IOError) as e: parser.error(e) return # avoid IDE warnings below logger.info("Processing reads on %d core%s in %s mode ...", cores, 's' if cores > 1 else '', { False: 'single-end', True: 'paired-end' }[pipeline.paired]) try: stats = runner.run() runner.close() except KeyboardInterrupt: print("Interrupted", file=sys.stderr) sys.exit(130) except IOError as e: if e.errno == errno.EPIPE: sys.exit(1) raise except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e: sys.exit("cutadapt: error: {}".format(e)) elapsed = time.time() - start_time if args.report == 'minimal': report = minimal_report else: report = full_report logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100)) if args.profile: import pstats profiler.disable() pstats.Stats(profiler).sort_stats('time').print_stats(20)