예제 #1
0
def setup_runner(
    pipeline: Pipeline,
    inpaths: InputPaths,
    outfiles: OutputFiles,
    progress: Progress,
    cores: int,
    buffer_size: int,
    file_opener: FileOpener,
) -> PipelineRunner:
    try:
        if cores > 1:
            return ParallelPipelineRunner(
                pipeline, inpaths, outfiles, file_opener, progress, n_workers=cores, buffer_size=buffer_size)
        else:
            infiles = inpaths.open(file_opener)
            return SerialPipelineRunner(pipeline, infiles, outfiles, progress)
    except (dnaio.UnknownFileFormat, dnaio.FileFormatError, OSError) as e:
        raise CommandLineError(e)
예제 #2
0
def main(cmdlineargs=None, default_outfile=sys.stdout):
	"""
	Main function that sets up a processing pipeline and runs it.

	default_outfile is the file to which trimmed reads are sent if the ``-o``
	parameter is not used.
	"""
	start_time = time.time()
	parser = get_option_parser()
	if cmdlineargs is None:
		cmdlineargs = sys.argv[1:]
	options, args = parser.parse_args(args=cmdlineargs)
	# Setup logging only if there are not already any handlers (can happen when
	# this function is being called externally such as from unit tests)
	if not logging.root.handlers:
		setup_logging(stdout=bool(options.output), quiet=options.quiet or options.report == 'minimal')
	if options.quiet and options.report:
		parser.error("Options --quiet and --report cannot be used at the same time")

	paired = determine_paired_mode(options)
	assert paired in (False, 'first', 'both')

	if paired == 'first':
		# legacy mode
		assert options.pair_filter is None
		pair_filter_mode = 'first'
	elif options.pair_filter is None:
		# default
		pair_filter_mode = 'any'
	else:
		# user-provided behavior
		pair_filter_mode = options.pair_filter

	try:
		is_interleaved_input, is_interleaved_output = determine_interleaved(options, args)
		input_filename, input_paired_filename, quality_filename = input_files_from_parsed_args(args,
			paired, is_interleaved_input)
		pipeline = pipeline_from_parsed_args(options, paired, pair_filter_mode, quality_filename, is_interleaved_output)
		outfiles = open_output_files(options, default_outfile, is_interleaved_output)
	except CommandLineError as e:
		parser.error(e)
		return  # avoid IDE warnings below

	if options.cores < 0:
		parser.error('Value for --cores cannot be negative')
	cores = available_cpu_count() if options.cores == 0 else options.cores
	if cores > 1:
		if (
			ParallelPipelineRunner.can_output_to(outfiles)
			and quality_filename is None
			and not options.colorspace
			and options.format is None
		):
			runner = ParallelPipelineRunner(pipeline, cores, options.buffer_size)
		else:
			logger.error('Running in parallel is currently not supported for '
				'the given combination of command-line parameters.\nThese '
				'options are not supported: --info-file, --rest-file, '
				'--wildcard-file, --untrimmed-output, '
				'--untrimmed-paired-output, --too-short-output, '
				'--too-short-paired-output, --too-long-output, '
				'--too-long-paired-output, --format, --colorspace')
			sys.exit(1)
	else:
		runner = pipeline
	try:
		runner.set_input(input_filename, file2=input_paired_filename,
			qualfile=quality_filename, colorspace=options.colorspace,
			fileformat=options.format, interleaved=is_interleaved_input)
		runner.set_output(outfiles)
	except (seqio.UnknownFileType, IOError) as e:
		parser.error(e)

	implementation = platform.python_implementation()
	opt = ' (' + implementation + ')' if implementation != 'CPython' else ''
	logger.info("This is cutadapt %s with Python %s%s", __version__,
		platform.python_version(), opt)
	logger.info("Command line parameters: %s", " ".join(cmdlineargs))
	logger.info("Processing reads on %d core%s in %s mode ...",
		cores, 's' if cores > 1 else '',
		{False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end'}[pipeline.paired])

	if pipeline.should_warn_legacy:
		logger.warning('\n'.join(textwrap.wrap('Legacy mode is '
			'enabled. Read modification and filtering options *ignore* '
			'the second read. To switch to regular paired-end mode, '
			'provide the --pair-filter=any option or use any of the '
			'-A/-B/-G/-U/--interleaved options.')))

	try:
		stats = runner.run()
		# cProfile.runctx('stats=runner.run()', globals(), locals(), 'profile_main.prof')
		runner.close()
	except KeyboardInterrupt:
		print("Interrupted", file=sys.stderr)
		sys.exit(130)
	except IOError as e:
		if e.errno == errno.EPIPE:
			sys.exit(1)
		raise
	except (seqio.FormatError, seqio.UnknownFileType, EOFError) as e:
		sys.exit("cutadapt: error: {0}".format(e))

	elapsed = time.time() - start_time
	if not options.quiet:
		# send statistics to stderr if result was sent to stdout
		stat_file = sys.stderr if options.output is None else None
		with redirect_standard_output(stat_file):
			if options.report == 'minimal':
				print_minimal_report(stats, elapsed, options.gc_content / 100)
			else:
				print_report(stats, elapsed, options.gc_content / 100)