Ejemplo n.º 1
0
def test_paired_adapter_cutter_actions(action, expected_trimmed1,
                                       expected_trimmed2):
    a1 = BackAdapter("GGTTAA")
    a2 = BackAdapter("AACCGG")
    s1 = Sequence("name", "CCCCGGTTAACCCC")
    s2 = Sequence("name", "TTTTAACCGGTTTT")
    pac = PairedAdapterCutter([a1], [a2], action=action)
    info1 = ModificationInfo(s1)
    info2 = ModificationInfo(s2)
    trimmed1, trimmed2 = pac(s1, s2, info1, info2)
    assert expected_trimmed1 == trimmed1.sequence
    assert expected_trimmed2 == trimmed2.sequence
Ejemplo n.º 2
0
def test_paired_adapter_cutter_actions(action, expected_trimmed1,
                                       expected_trimmed2):
    from cutadapt.adapters import SingleAdapter, Where
    a1 = SingleAdapter("GGTTAA", where=Where.BACK)
    a2 = SingleAdapter("AACCGG", where=Where.BACK)
    s1 = Sequence("name", "CCCCGGTTAACCCC")
    s2 = Sequence("name", "TTTTAACCGGTTTT")
    pac = PairedAdapterCutter([a1], [a2], action=action)
    info1 = ModificationInfo()
    info2 = ModificationInfo()
    trimmed1, trimmed2 = pac(s1, s2, info1, info2)
    assert expected_trimmed1 == trimmed1.sequence
    assert expected_trimmed2 == trimmed2.sequence
Ejemplo n.º 3
0
def add_adapter_cutter(
    pipeline,
    adapters,
    adapters2,
    paired: bool,
    pair_adapters: bool,
    action: Optional[str],
    times: int,
    reverse_complement: bool,
    add_rc_suffix: bool,
    allow_index: bool,
):
    if pair_adapters:
        if reverse_complement:
            raise CommandLineError("Cannot use --revcomp with --pair-adapters")
        try:
            cutter = PairedAdapterCutter(adapters, adapters2, action)
        except PairedAdapterCutterError as e:
            raise CommandLineError("--pair-adapters: " + str(e))
        pipeline.add_paired_modifier(cutter)
    else:
        adapter_cutter, adapter_cutter2 = None, None
        try:
            if adapters:
                adapter_cutter = AdapterCutter(adapters, times, action, allow_index)
            if adapters2:
                adapter_cutter2 = AdapterCutter(adapters2, times, action, allow_index)
        except ValueError as e:
            raise CommandLineError(e)
        if paired:
            if reverse_complement:
                raise CommandLineError("--revcomp not implemented for paired-end reads")
            if adapter_cutter or adapter_cutter2:
                pipeline.add(adapter_cutter, adapter_cutter2)
        elif adapter_cutter:
            if reverse_complement:
                modifier = ReverseComplementer(
                    adapter_cutter,
                    rc_suffix=" rc" if add_rc_suffix else None,
                )  # type: Union[AdapterCutter,ReverseComplementer]
            else:
                modifier = adapter_cutter
            pipeline.add(modifier)
Ejemplo n.º 4
0
def pipeline_from_parsed_args(args, paired, is_interleaved_output):
    """
    Setup a processing pipeline from parsed command-line arguments.

    If there are any problems parsing the arguments, a CommandLineError is thrown.

    Return an instance of Pipeline (SingleEndPipeline or PairedEndPipeline)
    """

    if not paired:
        if args.untrimmed_paired_output:
            raise CommandLineError(
                "Option --untrimmed-paired-output can only be used when "
                "trimming paired-end reads (with option -p).")

    if paired:
        if not is_interleaved_output:
            if not args.paired_output:
                raise CommandLineError(
                    "When a paired-end trimming option such as -A/-G/-B/-U, "
                    "is used, a second output file needs to be specified via -p (--paired-output)."
                )
            if not args.output:
                raise CommandLineError(
                    "When you use -p or --paired-output, you must also "
                    "use the -o option.")

        if bool(args.untrimmed_output) != bool(args.untrimmed_paired_output):
            raise CommandLineError(
                "When trimming paired-end reads, you must use either none "
                "or both of the --untrimmed-output/--untrimmed-paired-output options."
            )
        if args.too_short_output and not args.too_short_paired_output:
            raise CommandLineError(
                "When using --too-short-output with paired-end "
                "reads, you also need to use --too-short-paired-output")
        if args.too_long_output and not args.too_long_paired_output:
            raise CommandLineError(
                "When using --too-long-output with paired-end "
                "reads, you also need to use --too-long-paired-output")

    if args.format is not None:
        logger.warning(
            "Option --format is deprecated and ignored because the input file format is "
            "always auto-detected")

    if not (0 <= args.error_rate < 1.):
        raise CommandLineError(
            "The maximum error rate must be at least 0 and less than 1.")
    if args.overlap < 1:
        raise CommandLineError("The overlap must be at least 1.")
    if not (0 <= args.gc_content <= 100):
        raise CommandLineError(
            "GC content must be given as percentage between 0 and 100")
    if args.action == 'none':
        args.action = None

    adapter_parser = AdapterParser(
        max_error_rate=args.error_rate,
        min_overlap=args.overlap,
        read_wildcards=args.match_read_wildcards,
        adapter_wildcards=args.match_adapter_wildcards,
        indels=args.indels,
    )
    try:
        adapters = adapter_parser.parse_multi(args.adapters, args.anywhere,
                                              args.front)
        adapters2 = adapter_parser.parse_multi(args.adapters2, args.anywhere2,
                                               args.front2)
    except IOError as e:
        if e.errno == errno.ENOENT:
            raise CommandLineError(e)
        raise
    except ValueError as e:
        raise CommandLineError(e)
    if args.debug:
        for adapter in adapters + adapters2:
            adapter.enable_debug()

    # Create the processing pipeline
    if paired:
        pair_filter_mode = 'any' if args.pair_filter is None else args.pair_filter
        pipeline = PairedEndPipeline(pair_filter_mode)
    else:
        pipeline = SingleEndPipeline()

    # When adapters are being trimmed only in R1 or R2, override the pair filter mode
    # as using the default of 'any' would regard all read pairs as untrimmed.
    if paired and (not adapters2
                   or not adapters) and (args.discard_untrimmed
                                         or args.untrimmed_output
                                         or args.untrimmed_paired_output):
        pipeline.override_untrimmed_pair_filter = True

    for i, cut_arg in enumerate([args.cut, args.cut2]):
        # cut_arg is a list
        if not cut_arg:
            continue
        if len(cut_arg) > 2:
            raise CommandLineError(
                "You cannot remove bases from more than two ends.")
        if len(cut_arg) == 2 and cut_arg[0] * cut_arg[1] > 0:
            raise CommandLineError(
                "You cannot remove bases from the same end twice.")
        for c in cut_arg:
            if c == 0:
                continue
            if i == 0:  # R1
                if paired:
                    pipeline.add(UnconditionalCutter(c), None)
                else:
                    pipeline.add(UnconditionalCutter(c))
            else:
                # R2
                assert isinstance(pipeline, PairedEndPipeline)
                pipeline.add(None, UnconditionalCutter(c))

    pipeline_add = pipeline.add_both if paired else pipeline.add

    if args.nextseq_trim is not None:
        pipeline_add(
            NextseqQualityTrimmer(args.nextseq_trim, args.quality_base))
    if args.quality_cutoff is not None:
        cutoffs = parse_cutoffs(args.quality_cutoff)
        pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.quality_base))

    if args.pair_adapters:
        if not paired:
            raise CommandLineError(
                "Option --pair-adapters can only be used when trimming "
                "paired-end reads")
        if args.times != 1:
            raise CommandLineError(
                "--pair-adapters cannot be used with --times")
        try:
            cutter = PairedAdapterCutter(adapters, adapters2, args.action)
        except PairedAdapterCutterError as e:
            raise CommandLineError("--pair-adapters: " + str(e))
        pipeline.add_paired_modifier(cutter)
    else:
        adapter_cutter, adapter_cutter2 = None, None
        if adapters:
            adapter_cutter = AdapterCutter(adapters, args.times, args.action)
        if adapters2:
            adapter_cutter2 = AdapterCutter(adapters2, args.times, args.action)
        if paired:
            if adapter_cutter or adapter_cutter2:
                pipeline.add(adapter_cutter, adapter_cutter2)
        else:
            if adapter_cutter:
                pipeline.add(adapter_cutter)

    # Remaining modifiers that apply to both reads of paired-end reads
    if args.length is not None:
        pipeline_add(Shortener(args.length))
    if args.trim_n:
        pipeline_add(NEndTrimmer())
    if args.length_tag:
        pipeline_add(LengthTagModifier(args.length_tag))
    for suffix in args.strip_suffix:
        pipeline_add(SuffixRemover(suffix))
    if args.prefix or args.suffix:
        pipeline_add(PrefixSuffixAdder(args.prefix, args.suffix))
    if args.zero_cap:
        pipeline_add(ZeroCapper(quality_base=args.quality_base))

    # Set filtering parameters
    # Minimum/maximum length
    for attr in 'minimum_length', 'maximum_length':
        param = getattr(args, attr)
        if param is not None:
            lengths = parse_lengths(param)
            if not paired and len(lengths) == 2:
                raise CommandLineError(
                    'Two minimum or maximum lengths given for single-end data')
            if paired and len(lengths) == 1:
                lengths = (lengths[0], lengths[0])
            setattr(pipeline, attr, lengths)
    pipeline.max_n = args.max_n
    pipeline.discard_casava = args.discard_casava
    pipeline.discard_trimmed = args.discard_trimmed
    pipeline.discard_untrimmed = args.discard_untrimmed

    return pipeline
Ejemplo n.º 5
0
def pipeline_from_parsed_args(args, paired, is_interleaved_output):
    """
    Setup a processing pipeline from parsed command-line arguments.

    If there are any problems parsing the arguments, a CommandLineError is raised.

    Return an instance of Pipeline (SingleEndPipeline or PairedEndPipeline)
    """
    check_arguments(args, paired, is_interleaved_output)
    if args.action == 'none':
        args.action = None

    adapter_parser = AdapterParser(
        max_error_rate=args.error_rate,
        min_overlap=args.overlap,
        read_wildcards=args.match_read_wildcards,
        adapter_wildcards=args.match_adapter_wildcards,
        indels=args.indels,
    )
    try:
        adapters = adapter_parser.parse_multi(args.adapters)
        adapters2 = adapter_parser.parse_multi(args.adapters2)
    except (FileNotFoundError, ValueError) as e:
        raise CommandLineError(e)
    warn_duplicate_adapters(adapters)
    warn_duplicate_adapters(adapters2)
    if args.debug:
        for adapter in adapters + adapters2:
            adapter.enable_debug()

    # Create the processing pipeline
    if paired:
        pair_filter_mode = 'any' if args.pair_filter is None else args.pair_filter
        pipeline = PairedEndPipeline(pair_filter_mode)
    else:
        pipeline = SingleEndPipeline()

    # When adapters are being trimmed only in R1 or R2, override the pair filter mode
    # as using the default of 'any' would regard all read pairs as untrimmed.
    if paired and (not adapters2
                   or not adapters) and (args.discard_untrimmed
                                         or args.untrimmed_output
                                         or args.untrimmed_paired_output):
        pipeline.override_untrimmed_pair_filter = True

    add_unconditional_cutters(pipeline, args.cut, args.cut2, paired)

    pipeline_add = pipeline.add_both if paired else pipeline.add

    if args.nextseq_trim is not None:
        pipeline_add(
            NextseqQualityTrimmer(args.nextseq_trim, args.quality_base))
    if args.quality_cutoff is not None:
        cutoffs = parse_cutoffs(args.quality_cutoff)
        pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.quality_base))

    if args.pair_adapters:
        try:
            cutter = PairedAdapterCutter(adapters, adapters2, args.action)
        except PairedAdapterCutterError as e:
            raise CommandLineError("--pair-adapters: " + str(e))
        pipeline.add_paired_modifier(cutter)
    else:
        adapter_cutter, adapter_cutter2 = None, None
        if adapters:
            adapter_cutter = AdapterCutter(adapters, args.times, args.action)
        if adapters2:
            adapter_cutter2 = AdapterCutter(adapters2, args.times, args.action)
        if paired:
            if adapter_cutter or adapter_cutter2:
                pipeline.add(adapter_cutter, adapter_cutter2)
        else:
            if adapter_cutter:
                pipeline.add(adapter_cutter)

    for modifier in modifiers_applying_to_both_ends_if_paired(args):
        pipeline_add(modifier)

    # Set filtering parameters
    # Minimum/maximum length
    for attr in 'minimum_length', 'maximum_length':
        param = getattr(args, attr)
        if param is not None:
            lengths = parse_lengths(param)
            if not paired and len(lengths) == 2:
                raise CommandLineError(
                    'Two minimum or maximum lengths given for single-end data')
            if paired and len(lengths) == 1:
                lengths = (lengths[0], lengths[0])
            setattr(pipeline, attr, lengths)
    pipeline.max_n = args.max_n
    pipeline.discard_casava = args.discard_casava
    pipeline.discard_trimmed = args.discard_trimmed
    pipeline.discard_untrimmed = args.discard_untrimmed

    return pipeline
Ejemplo n.º 6
0
def pipeline_from_parsed_args(args, paired, is_interleaved_output):
    """
    Setup a processing pipeline from parsed command-line arguments.

    If there are any problems parsing the arguments, a CommandLineError is thrown.

    Return an instance of Pipeline (SingleEndPipeline or PairedEndPipeline)
    """
    check_arguments(args, paired, is_interleaved_output)
    if args.action == 'none':
        args.action = None

    adapter_parser = AdapterParser(
        max_error_rate=args.error_rate,
        min_overlap=args.overlap,
        read_wildcards=args.match_read_wildcards,
        adapter_wildcards=args.match_adapter_wildcards,
        indels=args.indels,
    )
    try:
        adapters = adapter_parser.parse_multi(args.adapters, args.anywhere, args.front)
        adapters2 = adapter_parser.parse_multi(args.adapters2, args.anywhere2, args.front2)
    except IOError as e:
        if e.errno == errno.ENOENT:
            raise CommandLineError(e)
        raise
    except ValueError as e:
        raise CommandLineError(e)
    if args.debug:
        for adapter in adapters + adapters2:
            adapter.enable_debug()

    # Create the processing pipeline
    if paired:
        pair_filter_mode = 'any' if args.pair_filter is None else args.pair_filter
        pipeline = PairedEndPipeline(pair_filter_mode)
    else:
        pipeline = SingleEndPipeline()

    # When adapters are being trimmed only in R1 or R2, override the pair filter mode
    # as using the default of 'any' would regard all read pairs as untrimmed.
    if paired and (not adapters2 or not adapters) and (
            args.discard_untrimmed or args.untrimmed_output or args.untrimmed_paired_output):
        pipeline.override_untrimmed_pair_filter = True

    for i, cut_arg in enumerate([args.cut, args.cut2]):
        # cut_arg is a list
        if not cut_arg:
            continue
        if len(cut_arg) > 2:
            raise CommandLineError("You cannot remove bases from more than two ends.")
        if len(cut_arg) == 2 and cut_arg[0] * cut_arg[1] > 0:
            raise CommandLineError("You cannot remove bases from the same end twice.")
        for c in cut_arg:
            if c == 0:
                continue
            if i == 0:  # R1
                if paired:
                    pipeline.add(UnconditionalCutter(c), None)
                else:
                    pipeline.add(UnconditionalCutter(c))
            else:
                # R2
                assert isinstance(pipeline, PairedEndPipeline)
                pipeline.add(None, UnconditionalCutter(c))

    pipeline_add = pipeline.add_both if paired else pipeline.add

    if args.nextseq_trim is not None:
        pipeline_add(NextseqQualityTrimmer(args.nextseq_trim, args.quality_base))
    if args.quality_cutoff is not None:
        cutoffs = parse_cutoffs(args.quality_cutoff)
        pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.quality_base))

    if args.pair_adapters:
        if not paired:
            raise CommandLineError("Option --pair-adapters can only be used when trimming "
                "paired-end reads")
        if args.times != 1:
            raise CommandLineError("--pair-adapters cannot be used with --times")
        try:
            cutter = PairedAdapterCutter(adapters, adapters2, args.action)
        except PairedAdapterCutterError as e:
            raise CommandLineError("--pair-adapters: " + str(e))
        pipeline.add_paired_modifier(cutter)
    else:
        adapter_cutter, adapter_cutter2 = None, None
        if adapters:
            adapter_cutter = AdapterCutter(adapters, args.times, args.action)
        if adapters2:
            adapter_cutter2 = AdapterCutter(adapters2, args.times, args.action)
        if paired:
            if adapter_cutter or adapter_cutter2:
                pipeline.add(adapter_cutter, adapter_cutter2)
        else:
            if adapter_cutter:
                pipeline.add(adapter_cutter)

    # Remaining modifiers that apply to both reads of paired-end reads
    if args.length is not None:
        pipeline_add(Shortener(args.length))
    if args.trim_n:
        pipeline_add(NEndTrimmer())
    if args.length_tag:
        pipeline_add(LengthTagModifier(args.length_tag))
    for suffix in args.strip_suffix:
        pipeline_add(SuffixRemover(suffix))
    if args.prefix or args.suffix:
        pipeline_add(PrefixSuffixAdder(args.prefix, args.suffix))
    if args.zero_cap:
        pipeline_add(ZeroCapper(quality_base=args.quality_base))

    # Set filtering parameters
    # Minimum/maximum length
    for attr in 'minimum_length', 'maximum_length':
        param = getattr(args, attr)
        if param is not None:
            lengths = parse_lengths(param)
            if not paired and len(lengths) == 2:
                raise CommandLineError('Two minimum or maximum lengths given for single-end data')
            if paired and len(lengths) == 1:
                lengths = (lengths[0], lengths[0])
            setattr(pipeline, attr, lengths)
    pipeline.max_n = args.max_n
    pipeline.discard_casava = args.discard_casava
    pipeline.discard_trimmed = args.discard_trimmed
    pipeline.discard_untrimmed = args.discard_untrimmed

    return pipeline