def test_paired_adapter_cutter_actions(action, expected_trimmed1, expected_trimmed2): a1 = BackAdapter("GGTTAA") a2 = BackAdapter("AACCGG") s1 = Sequence("name", "CCCCGGTTAACCCC") s2 = Sequence("name", "TTTTAACCGGTTTT") pac = PairedAdapterCutter([a1], [a2], action=action) info1 = ModificationInfo(s1) info2 = ModificationInfo(s2) trimmed1, trimmed2 = pac(s1, s2, info1, info2) assert expected_trimmed1 == trimmed1.sequence assert expected_trimmed2 == trimmed2.sequence
def test_paired_adapter_cutter_actions(action, expected_trimmed1, expected_trimmed2): from cutadapt.adapters import SingleAdapter, Where a1 = SingleAdapter("GGTTAA", where=Where.BACK) a2 = SingleAdapter("AACCGG", where=Where.BACK) s1 = Sequence("name", "CCCCGGTTAACCCC") s2 = Sequence("name", "TTTTAACCGGTTTT") pac = PairedAdapterCutter([a1], [a2], action=action) info1 = ModificationInfo() info2 = ModificationInfo() trimmed1, trimmed2 = pac(s1, s2, info1, info2) assert expected_trimmed1 == trimmed1.sequence assert expected_trimmed2 == trimmed2.sequence
def add_adapter_cutter( pipeline, adapters, adapters2, paired: bool, pair_adapters: bool, action: Optional[str], times: int, reverse_complement: bool, add_rc_suffix: bool, allow_index: bool, ): if pair_adapters: if reverse_complement: raise CommandLineError("Cannot use --revcomp with --pair-adapters") try: cutter = PairedAdapterCutter(adapters, adapters2, action) except PairedAdapterCutterError as e: raise CommandLineError("--pair-adapters: " + str(e)) pipeline.add_paired_modifier(cutter) else: adapter_cutter, adapter_cutter2 = None, None try: if adapters: adapter_cutter = AdapterCutter(adapters, times, action, allow_index) if adapters2: adapter_cutter2 = AdapterCutter(adapters2, times, action, allow_index) except ValueError as e: raise CommandLineError(e) if paired: if reverse_complement: raise CommandLineError("--revcomp not implemented for paired-end reads") if adapter_cutter or adapter_cutter2: pipeline.add(adapter_cutter, adapter_cutter2) elif adapter_cutter: if reverse_complement: modifier = ReverseComplementer( adapter_cutter, rc_suffix=" rc" if add_rc_suffix else None, ) # type: Union[AdapterCutter,ReverseComplementer] else: modifier = adapter_cutter pipeline.add(modifier)
def pipeline_from_parsed_args(args, paired, is_interleaved_output): """ Setup a processing pipeline from parsed command-line arguments. If there are any problems parsing the arguments, a CommandLineError is thrown. Return an instance of Pipeline (SingleEndPipeline or PairedEndPipeline) """ if not paired: if args.untrimmed_paired_output: raise CommandLineError( "Option --untrimmed-paired-output can only be used when " "trimming paired-end reads (with option -p).") if paired: if not is_interleaved_output: if not args.paired_output: raise CommandLineError( "When a paired-end trimming option such as -A/-G/-B/-U, " "is used, a second output file needs to be specified via -p (--paired-output)." ) if not args.output: raise CommandLineError( "When you use -p or --paired-output, you must also " "use the -o option.") if bool(args.untrimmed_output) != bool(args.untrimmed_paired_output): raise CommandLineError( "When trimming paired-end reads, you must use either none " "or both of the --untrimmed-output/--untrimmed-paired-output options." ) if args.too_short_output and not args.too_short_paired_output: raise CommandLineError( "When using --too-short-output with paired-end " "reads, you also need to use --too-short-paired-output") if args.too_long_output and not args.too_long_paired_output: raise CommandLineError( "When using --too-long-output with paired-end " "reads, you also need to use --too-long-paired-output") if args.format is not None: logger.warning( "Option --format is deprecated and ignored because the input file format is " "always auto-detected") if not (0 <= args.error_rate < 1.): raise CommandLineError( "The maximum error rate must be at least 0 and less than 1.") if args.overlap < 1: raise CommandLineError("The overlap must be at least 1.") if not (0 <= args.gc_content <= 100): raise CommandLineError( "GC content must be given as percentage between 0 and 100") if args.action == 'none': args.action = None adapter_parser = AdapterParser( max_error_rate=args.error_rate, min_overlap=args.overlap, read_wildcards=args.match_read_wildcards, adapter_wildcards=args.match_adapter_wildcards, indels=args.indels, ) try: adapters = adapter_parser.parse_multi(args.adapters, args.anywhere, args.front) adapters2 = adapter_parser.parse_multi(args.adapters2, args.anywhere2, args.front2) except IOError as e: if e.errno == errno.ENOENT: raise CommandLineError(e) raise except ValueError as e: raise CommandLineError(e) if args.debug: for adapter in adapters + adapters2: adapter.enable_debug() # Create the processing pipeline if paired: pair_filter_mode = 'any' if args.pair_filter is None else args.pair_filter pipeline = PairedEndPipeline(pair_filter_mode) else: pipeline = SingleEndPipeline() # When adapters are being trimmed only in R1 or R2, override the pair filter mode # as using the default of 'any' would regard all read pairs as untrimmed. if paired and (not adapters2 or not adapters) and (args.discard_untrimmed or args.untrimmed_output or args.untrimmed_paired_output): pipeline.override_untrimmed_pair_filter = True for i, cut_arg in enumerate([args.cut, args.cut2]): # cut_arg is a list if not cut_arg: continue if len(cut_arg) > 2: raise CommandLineError( "You cannot remove bases from more than two ends.") if len(cut_arg) == 2 and cut_arg[0] * cut_arg[1] > 0: raise CommandLineError( "You cannot remove bases from the same end twice.") for c in cut_arg: if c == 0: continue if i == 0: # R1 if paired: pipeline.add(UnconditionalCutter(c), None) else: pipeline.add(UnconditionalCutter(c)) else: # R2 assert isinstance(pipeline, PairedEndPipeline) pipeline.add(None, UnconditionalCutter(c)) pipeline_add = pipeline.add_both if paired else pipeline.add if args.nextseq_trim is not None: pipeline_add( NextseqQualityTrimmer(args.nextseq_trim, args.quality_base)) if args.quality_cutoff is not None: cutoffs = parse_cutoffs(args.quality_cutoff) pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.quality_base)) if args.pair_adapters: if not paired: raise CommandLineError( "Option --pair-adapters can only be used when trimming " "paired-end reads") if args.times != 1: raise CommandLineError( "--pair-adapters cannot be used with --times") try: cutter = PairedAdapterCutter(adapters, adapters2, args.action) except PairedAdapterCutterError as e: raise CommandLineError("--pair-adapters: " + str(e)) pipeline.add_paired_modifier(cutter) else: adapter_cutter, adapter_cutter2 = None, None if adapters: adapter_cutter = AdapterCutter(adapters, args.times, args.action) if adapters2: adapter_cutter2 = AdapterCutter(adapters2, args.times, args.action) if paired: if adapter_cutter or adapter_cutter2: pipeline.add(adapter_cutter, adapter_cutter2) else: if adapter_cutter: pipeline.add(adapter_cutter) # Remaining modifiers that apply to both reads of paired-end reads if args.length is not None: pipeline_add(Shortener(args.length)) if args.trim_n: pipeline_add(NEndTrimmer()) if args.length_tag: pipeline_add(LengthTagModifier(args.length_tag)) for suffix in args.strip_suffix: pipeline_add(SuffixRemover(suffix)) if args.prefix or args.suffix: pipeline_add(PrefixSuffixAdder(args.prefix, args.suffix)) if args.zero_cap: pipeline_add(ZeroCapper(quality_base=args.quality_base)) # Set filtering parameters # Minimum/maximum length for attr in 'minimum_length', 'maximum_length': param = getattr(args, attr) if param is not None: lengths = parse_lengths(param) if not paired and len(lengths) == 2: raise CommandLineError( 'Two minimum or maximum lengths given for single-end data') if paired and len(lengths) == 1: lengths = (lengths[0], lengths[0]) setattr(pipeline, attr, lengths) pipeline.max_n = args.max_n pipeline.discard_casava = args.discard_casava pipeline.discard_trimmed = args.discard_trimmed pipeline.discard_untrimmed = args.discard_untrimmed return pipeline
def pipeline_from_parsed_args(args, paired, is_interleaved_output): """ Setup a processing pipeline from parsed command-line arguments. If there are any problems parsing the arguments, a CommandLineError is raised. Return an instance of Pipeline (SingleEndPipeline or PairedEndPipeline) """ check_arguments(args, paired, is_interleaved_output) if args.action == 'none': args.action = None adapter_parser = AdapterParser( max_error_rate=args.error_rate, min_overlap=args.overlap, read_wildcards=args.match_read_wildcards, adapter_wildcards=args.match_adapter_wildcards, indels=args.indels, ) try: adapters = adapter_parser.parse_multi(args.adapters) adapters2 = adapter_parser.parse_multi(args.adapters2) except (FileNotFoundError, ValueError) as e: raise CommandLineError(e) warn_duplicate_adapters(adapters) warn_duplicate_adapters(adapters2) if args.debug: for adapter in adapters + adapters2: adapter.enable_debug() # Create the processing pipeline if paired: pair_filter_mode = 'any' if args.pair_filter is None else args.pair_filter pipeline = PairedEndPipeline(pair_filter_mode) else: pipeline = SingleEndPipeline() # When adapters are being trimmed only in R1 or R2, override the pair filter mode # as using the default of 'any' would regard all read pairs as untrimmed. if paired and (not adapters2 or not adapters) and (args.discard_untrimmed or args.untrimmed_output or args.untrimmed_paired_output): pipeline.override_untrimmed_pair_filter = True add_unconditional_cutters(pipeline, args.cut, args.cut2, paired) pipeline_add = pipeline.add_both if paired else pipeline.add if args.nextseq_trim is not None: pipeline_add( NextseqQualityTrimmer(args.nextseq_trim, args.quality_base)) if args.quality_cutoff is not None: cutoffs = parse_cutoffs(args.quality_cutoff) pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.quality_base)) if args.pair_adapters: try: cutter = PairedAdapterCutter(adapters, adapters2, args.action) except PairedAdapterCutterError as e: raise CommandLineError("--pair-adapters: " + str(e)) pipeline.add_paired_modifier(cutter) else: adapter_cutter, adapter_cutter2 = None, None if adapters: adapter_cutter = AdapterCutter(adapters, args.times, args.action) if adapters2: adapter_cutter2 = AdapterCutter(adapters2, args.times, args.action) if paired: if adapter_cutter or adapter_cutter2: pipeline.add(adapter_cutter, adapter_cutter2) else: if adapter_cutter: pipeline.add(adapter_cutter) for modifier in modifiers_applying_to_both_ends_if_paired(args): pipeline_add(modifier) # Set filtering parameters # Minimum/maximum length for attr in 'minimum_length', 'maximum_length': param = getattr(args, attr) if param is not None: lengths = parse_lengths(param) if not paired and len(lengths) == 2: raise CommandLineError( 'Two minimum or maximum lengths given for single-end data') if paired and len(lengths) == 1: lengths = (lengths[0], lengths[0]) setattr(pipeline, attr, lengths) pipeline.max_n = args.max_n pipeline.discard_casava = args.discard_casava pipeline.discard_trimmed = args.discard_trimmed pipeline.discard_untrimmed = args.discard_untrimmed return pipeline
def pipeline_from_parsed_args(args, paired, is_interleaved_output): """ Setup a processing pipeline from parsed command-line arguments. If there are any problems parsing the arguments, a CommandLineError is thrown. Return an instance of Pipeline (SingleEndPipeline or PairedEndPipeline) """ check_arguments(args, paired, is_interleaved_output) if args.action == 'none': args.action = None adapter_parser = AdapterParser( max_error_rate=args.error_rate, min_overlap=args.overlap, read_wildcards=args.match_read_wildcards, adapter_wildcards=args.match_adapter_wildcards, indels=args.indels, ) try: adapters = adapter_parser.parse_multi(args.adapters, args.anywhere, args.front) adapters2 = adapter_parser.parse_multi(args.adapters2, args.anywhere2, args.front2) except IOError as e: if e.errno == errno.ENOENT: raise CommandLineError(e) raise except ValueError as e: raise CommandLineError(e) if args.debug: for adapter in adapters + adapters2: adapter.enable_debug() # Create the processing pipeline if paired: pair_filter_mode = 'any' if args.pair_filter is None else args.pair_filter pipeline = PairedEndPipeline(pair_filter_mode) else: pipeline = SingleEndPipeline() # When adapters are being trimmed only in R1 or R2, override the pair filter mode # as using the default of 'any' would regard all read pairs as untrimmed. if paired and (not adapters2 or not adapters) and ( args.discard_untrimmed or args.untrimmed_output or args.untrimmed_paired_output): pipeline.override_untrimmed_pair_filter = True for i, cut_arg in enumerate([args.cut, args.cut2]): # cut_arg is a list if not cut_arg: continue if len(cut_arg) > 2: raise CommandLineError("You cannot remove bases from more than two ends.") if len(cut_arg) == 2 and cut_arg[0] * cut_arg[1] > 0: raise CommandLineError("You cannot remove bases from the same end twice.") for c in cut_arg: if c == 0: continue if i == 0: # R1 if paired: pipeline.add(UnconditionalCutter(c), None) else: pipeline.add(UnconditionalCutter(c)) else: # R2 assert isinstance(pipeline, PairedEndPipeline) pipeline.add(None, UnconditionalCutter(c)) pipeline_add = pipeline.add_both if paired else pipeline.add if args.nextseq_trim is not None: pipeline_add(NextseqQualityTrimmer(args.nextseq_trim, args.quality_base)) if args.quality_cutoff is not None: cutoffs = parse_cutoffs(args.quality_cutoff) pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.quality_base)) if args.pair_adapters: if not paired: raise CommandLineError("Option --pair-adapters can only be used when trimming " "paired-end reads") if args.times != 1: raise CommandLineError("--pair-adapters cannot be used with --times") try: cutter = PairedAdapterCutter(adapters, adapters2, args.action) except PairedAdapterCutterError as e: raise CommandLineError("--pair-adapters: " + str(e)) pipeline.add_paired_modifier(cutter) else: adapter_cutter, adapter_cutter2 = None, None if adapters: adapter_cutter = AdapterCutter(adapters, args.times, args.action) if adapters2: adapter_cutter2 = AdapterCutter(adapters2, args.times, args.action) if paired: if adapter_cutter or adapter_cutter2: pipeline.add(adapter_cutter, adapter_cutter2) else: if adapter_cutter: pipeline.add(adapter_cutter) # Remaining modifiers that apply to both reads of paired-end reads if args.length is not None: pipeline_add(Shortener(args.length)) if args.trim_n: pipeline_add(NEndTrimmer()) if args.length_tag: pipeline_add(LengthTagModifier(args.length_tag)) for suffix in args.strip_suffix: pipeline_add(SuffixRemover(suffix)) if args.prefix or args.suffix: pipeline_add(PrefixSuffixAdder(args.prefix, args.suffix)) if args.zero_cap: pipeline_add(ZeroCapper(quality_base=args.quality_base)) # Set filtering parameters # Minimum/maximum length for attr in 'minimum_length', 'maximum_length': param = getattr(args, attr) if param is not None: lengths = parse_lengths(param) if not paired and len(lengths) == 2: raise CommandLineError('Two minimum or maximum lengths given for single-end data') if paired and len(lengths) == 1: lengths = (lengths[0], lengths[0]) setattr(pipeline, attr, lengths) pipeline.max_n = args.max_n pipeline.discard_casava = args.discard_casava pipeline.discard_trimmed = args.discard_trimmed pipeline.discard_untrimmed = args.discard_untrimmed return pipeline