예제 #1
0
def main(cmdlineargs=None, default_outfile=sys.stdout.buffer):
    """
    Main function that sets up a processing pipeline and runs it.

    default_outfile is the file to which trimmed reads are sent if the ``-o``
    parameter is not used.
    """
    start_time = time.time()
    parser = get_argument_parser()
    if cmdlineargs is None:
        cmdlineargs = sys.argv[1:]
    args, leftover_args = parser.parse_known_args(args=cmdlineargs)
    # log to stderr if results are to be sent to stdout
    log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-"
    # Setup logging only if there are not already any handlers (can happen when
    # this function is being called externally such as from unit tests)
    if not logging.root.handlers:
        setup_logging(logger,
                      stdout=log_to_stdout,
                      quiet=args.quiet,
                      minimal=args.report == 'minimal',
                      debug=args.debug)
    profiler = setup_profiler_if_requested(args.profile)

    if args.quiet and args.report:
        parser.error(
            "Options --quiet and --report cannot be used at the same time")

    if args.colorspace:
        parser.error(
            "These colorspace-specific options are no longer supported: "
            "--colorspace, -c, -d, --double-encode, -t, --trim-primer, "
            "--strip-f3, --maq, --bwa, --no-zero-cap. "
            "Use Cutadapt 1.18 or earlier to work with colorspace data.")

    paired = determine_paired(args)
    assert paired in (False, True)

    # Print the header now because some of the functions below create logging output
    log_header(cmdlineargs)
    if leftover_args:
        warn_if_en_dashes(cmdlineargs)
        parser.error("unrecognized arguments: " + " ".join(leftover_args))

    if args.cores < 0:
        parser.error('Value for --cores cannot be negative')

    cores = available_cpu_count() if args.cores == 0 else args.cores
    file_opener = FileOpener(compression_level=args.compression_level,
                             threads=0 if cores == 1 else None)
    if sys.stderr.isatty() and not args.quiet:
        progress = Progress()
    else:
        progress = DummyProgress()

    try:
        is_interleaved_input = args.interleaved and len(args.inputs) == 1
        input_filename, input_paired_filename = setup_input_files(
            args.inputs, paired, is_interleaved_input)
        check_arguments(args, paired)
        pipeline = pipeline_from_parsed_args(args, paired, file_opener)
        outfiles = open_output_files(args, default_outfile, file_opener)
        infiles = InputFiles(input_filename,
                             file2=input_paired_filename,
                             interleaved=is_interleaved_input)
        runner = setup_runner(pipeline, infiles, outfiles, progress, cores,
                              args.buffer_size)
    except CommandLineError as e:
        parser.error(str(e))
        return  # avoid IDE warnings below

    logger.info("Processing reads on %d core%s in %s mode ...", cores,
                's' if cores > 1 else '', {
                    False: 'single-end',
                    True: 'paired-end'
                }[pipeline.paired])
    try:
        with runner as r:
            stats = r.run()
    except KeyboardInterrupt:
        print("Interrupted", file=sys.stderr)
        sys.exit(130)
    except BrokenPipeError:
        sys.exit(1)
    except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e:
        sys.exit("cutadapt: error: {}".format(e))

    elapsed = time.time() - start_time
    if args.report == 'minimal':
        report = minimal_report
    else:
        report = full_report
    logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100))
    if profiler is not None:
        import pstats
        profiler.disable()
        pstats.Stats(profiler).sort_stats('time').print_stats(20)
예제 #2
0
def main(cmdlineargs, default_outfile=sys.stdout.buffer) -> Statistics:
    """
    Set up a processing pipeline from the command-line arguments, run it and return
    a Statistics object.

    default_outfile is the file to which trimmed reads are sent if the ``-o``
    parameter is not used.
    """
    start_time = time.time()
    parser = get_argument_parser()
    args, leftover_args = parser.parse_known_args(args=cmdlineargs)
    # log to stderr if results are to be sent to stdout
    log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-"
    # Setup logging only if there are not already any handlers (can happen when
    # this function is being called externally such as from unit tests)
    if not logging.root.handlers:
        setup_logging(logger,
                      stdout=log_to_stdout,
                      quiet=args.quiet,
                      minimal=args.report == 'minimal',
                      debug=args.debug)
    log_header(cmdlineargs)
    profiler = setup_profiler_if_requested(args.profile)

    if args.quiet and args.report:
        parser.error(
            "Options --quiet and --report cannot be used at the same time")

    if leftover_args:
        warn_if_en_dashes(cmdlineargs)
        parser.error("unrecognized arguments: " + " ".join(leftover_args))

    if args.cores < 0:
        parser.error('Value for --cores cannot be negative')

    cores = available_cpu_count() if args.cores == 0 else args.cores
    file_opener = FileOpener(compression_level=args.compression_level,
                             threads=0 if cores == 1 else None)
    if sys.stderr.isatty() and not args.quiet:
        progress = Progress()
    else:
        progress = DummyProgress()
    paired = determine_paired(args)
    assert paired in (False, True)

    try:
        is_interleaved_input = args.interleaved and len(args.inputs) == 1
        input_filename, input_paired_filename = setup_input_files(
            args.inputs, paired, is_interleaved_input)
        check_arguments(args, paired)
        adapters, adapters2 = adapters_from_args(args)
        pipeline = pipeline_from_parsed_args(args, paired, file_opener,
                                             adapters, adapters2)
        adapter_names = [a.name for a in adapters]  # type: List[str]
        adapter_names2 = [a.name for a in adapters2]  # type: List[str]
        outfiles = open_output_files(args, default_outfile, file_opener,
                                     adapter_names, adapter_names2)
        inpaths = InputPaths(input_filename,
                             path2=input_paired_filename,
                             interleaved=is_interleaved_input)
        runner = setup_runner(pipeline, inpaths, outfiles, progress, cores,
                              args.buffer_size, file_opener)
    except CommandLineError as e:
        logger.debug("Command line error. Traceback:", exc_info=True)
        parser.error(str(e))
        return

    logger.info("Processing reads on %d core%s in %s mode ...", cores,
                's' if cores > 1 else '', {
                    False: 'single-end',
                    True: 'paired-end'
                }[pipeline.paired])
    try:
        with runner as r:
            stats = r.run()
    except KeyboardInterrupt:
        print("Interrupted", file=sys.stderr)
        sys.exit(130)
    except BrokenPipeError:
        sys.exit(1)
    except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e:
        logger.debug("Command line error. Traceback:", exc_info=True)
        sys.exit("cutadapt: error: {}".format(e))

    elapsed = time.time() - start_time
    if args.report == 'minimal':
        report = minimal_report
    else:
        report = full_report
    logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100))
    if profiler is not None:
        import pstats
        profiler.disable()
        pstats.Stats(profiler).sort_stats('time').print_stats(20)
    return stats
예제 #3
0
def main(cmdlineargs=None, default_outfile=sys.stdout.buffer):
    """
    Main function that sets up a processing pipeline and runs it.

    default_outfile is the file to which trimmed reads are sent if the ``-o``
    parameter is not used.
    """
    start_time = time.time()
    parser = get_argument_parser()
    if cmdlineargs is None:
        cmdlineargs = sys.argv[1:]
    args = parser.parse_args(args=cmdlineargs)
    # log to stderr if results are to be sent to stdout
    log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-"
    # Setup logging only if there are not already any handlers (can happen when
    # this function is being called externally such as from unit tests)
    if not logging.root.handlers:
        setup_logging(logger,
                      stdout=log_to_stdout,
                      quiet=args.quiet,
                      minimal=args.report == 'minimal',
                      debug=args.debug)
    if args.profile:
        import cProfile
        profiler = cProfile.Profile()
        profiler.enable()

    if args.quiet and args.report:
        parser.error(
            "Options --quiet and --report cannot be used at the same time")

    if args.colorspace:
        parser.error(
            "These colorspace-specific options are no longer supported: "
            "--colorspace, -c, -d, --double-encode, -t, --trim-primer, "
            "--strip-f3, --maq, --bwa, --no-zero-cap. "
            "Use Cutadapt 1.18 or earlier to work with colorspace data.")

    paired = determine_paired_mode(args)
    assert paired in (False, True)

    # Print the header now because some of the functions below create logging output
    log_header(cmdlineargs)
    try:
        is_interleaved_input, is_interleaved_output = determine_interleaved(
            args)
        input_filename, input_paired_filename = input_files_from_parsed_args(
            args.inputs, paired, is_interleaved_input)
        pipeline = pipeline_from_parsed_args(args, paired,
                                             is_interleaved_output)
        outfiles = open_output_files(args, default_outfile,
                                     is_interleaved_output)
    except CommandLineError as e:
        parser.error(str(e))
        return  # avoid IDE warnings below

    if args.cores < 0:
        parser.error('Value for --cores cannot be negative')
    cores = available_cpu_count() if args.cores == 0 else args.cores
    if cores > 1:
        if ParallelPipelineRunner.can_output_to(outfiles):
            runner_class = ParallelPipelineRunner
            runner_kwargs = dict(n_workers=cores, buffer_size=args.buffer_size)
        else:
            parser.error(
                'Running in parallel is currently not supported for '
                'the given combination of command-line parameters.\nThese '
                'options are not supported: --info-file, --rest-file, '
                '--wildcard-file, --format\n'
                'Also, demultiplexing is not supported.\n'
                'Omit --cores/-j to continue.')
            return  # avoid IDE warnings below
    else:
        runner_class = SerialPipelineRunner
        runner_kwargs = dict()
    infiles = InputFiles(input_filename,
                         file2=input_paired_filename,
                         interleaved=is_interleaved_input)
    if sys.stderr.isatty() and not args.quiet:
        progress = Progress()
    else:
        progress = DummyProgress()
    try:
        runner = runner_class(pipeline, infiles, outfiles, progress,
                              **runner_kwargs)
    except (dnaio.UnknownFileFormat, IOError) as e:
        parser.error(e)
        return  # avoid IDE warnings below

    logger.info("Processing reads on %d core%s in %s mode ...", cores,
                's' if cores > 1 else '', {
                    False: 'single-end',
                    True: 'paired-end'
                }[pipeline.paired])
    try:
        stats = runner.run()
        runner.close()
    except KeyboardInterrupt:
        print("Interrupted", file=sys.stderr)
        sys.exit(130)
    except BrokenPipeError:
        sys.exit(1)
    except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e:
        sys.exit("cutadapt: error: {}".format(e))

    elapsed = time.time() - start_time
    if args.report == 'minimal':
        report = minimal_report
    else:
        report = full_report
    logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100))
    if args.profile:
        import pstats
        profiler.disable()
        pstats.Stats(profiler).sort_stats('time').print_stats(20)