def main():
    parser = argparse.ArgumentParser(description=__doc__,        
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('f_file', metavar='in1.fast<q|a>', 
        help="input forward or interleaved reads [required]")
    input_arg = parser.add_mutually_exclusive_group(required=True)
    input_arg.add_argument('--interleaved',
        action='store_true',
        help="input is interleaved paired-end reads")
    input_arg.add_argument('r_file', metavar='in2.fast<q|a>', nargs='?',
        help="input reverse reads")
    parser.add_argument('-o', '--out', dest='out_f', metavar='FILE',
        type=seq_io.open_output, default=sys.stdout,
        help="output reads")
    output_arg = parser.add_mutually_exclusive_group(required=False)
    output_arg.add_argument('-v', '--out-reverse', metavar='FILE', dest='out_r',
        type=seq_io.open_output,
        help="output reverse reads")
    output_arg.add_argument('--out-interleaved', dest='out_interleaved',
        action='store_true',
        help="output interleaved paired-end reads, even if input is split")
    parser.add_argument('-f', '--out-format', metavar='FORMAT',
        dest='out_format',
        default='fastq',
        choices=['fasta', 'fastq'],
        help="output file format. Can be fasta or fastq. [default: fastq]")
    parser.add_argument('-l', '--log', metavar='LOG',
        type=seq_io.open_output,
        help="output log file to keep track of replicates")
    dup_args = parser.add_argument_group('replicate types')
    dup_args.add_argument('--prefix',
        action='store_true',
        help="replicate can be a 5' prefix of another read")
    dup_args.add_argument('--rev-comp', dest='rev_comp',
        action='store_true',
        help="replicate can be the reverse-complement of another read")
    parser.add_argument('--reduce-memory', dest='mem_use',
        action='store_true',
        help="reduce the mount of memory that the program uses. This could "
        "result in a drastic increase in run time.")
    parser.add_argument('--version',
        action='version',
        version='%(prog)s ' + __version__)
    args = parser.parse_args()
    all_args = sys.argv[1:]

    seq_io.program_info('filter_replicates', all_args, __version__)

    if args.r_file and not (args.out_r or args.out_interleaved):
        parser.error("one of -v/--out-reverse or --out-interleaved is required "
            "when the argument -r/--reverse is used")

    f_file = sys.stdin if args.f_file == '-' else args.f_file
    out_f = args.out_f
    iterator = seq_io.get_iterator(f_file, args.r_file, args.interleaved)

    seq_io.logger(args.log, "Replicate\tTemplate\tType\n")

    compress = zlib.compress if args.mem_use else self
    decompress = zlib.decompress if args.mem_use else self

    writer = seq_io.fasta_writer if args.out_format == 'fasta' else \
        seq_io.fastq_writer

    seq_db = {}
    uniques = {}
    for i, (forward, reverse) in enumerate(iterator):
        ident = forward['identifier']
        fdesc, rdesc = (forward['description'], reverse['description'])
        fseq, rseq = (forward['sequence'], reverse['sequence'])
        fqual, rqual = (forward['quality'], reverse['quality'])

        flen, rlen = len(fseq), len(rseq)

        uniques[i] = (fseq + rseq, flen, compress(fqual + rqual), ident)

        fsubsize, rsubsize = ((20, 20) if args.prefix else (flen, rlen))
        key = hashlib.md5(fseq[:fsubsize] + rseq[:rsubsize]).digest()

        dup_pos, temp_pos, dup_type = replicate_status(i, key, uniques, seq_db)

        # match to database found, so delete id from database of uniques
        if dup_pos:
            seq_io.logger(args.log, "{}\t{}\t{}\n".format(uniques[dup_pos][3], 
                uniques[temp_pos][3], dup_type))
            try:
                del uniques[dup_pos]
            except KeyError:
                seq_io.print_error("error: input file has more than one "
                    "sequence with the same identifier")
                sys.exit(1)
            continue

        # sequence is unique, so check reverse-complement if set
        if args.rev_comp:
            f_rc, r_rc = pairs.reverse_complement_paired(fseq, rseq)
            rckey = hashlib.md5(f_rc[:fsubsize] + r_rc[:rsubsize]).digest()
            dup_pos, temp_pos, dup_type = replicate_status(i, rckey,  uniques,
                seq_db)
            if dup_pos:
                dup_type = 'rev-comp ' + dup_type
                seq_io.logger(args.log, "{}\t{}\t{}\n".format(
                    uniques[dup_pos][3], uniques[temp_pos][3], dup_type))
                try:
                    del uniques[dup_pos]
                except KeyError:
                    seq_io.print_error("error: input file has more than one "
                        "sequence with the same identifier")
                continue

        # record is definitely not a duplicate, so add to database of ids to 
        # check a match for
        try:
            seq_db[key].append(i)
        except KeyError:
            seq_db[key] = [i]

    try:
        i += 1
    except UnboundLocalError:
        seq_io.print_error("error: no sequences were found to process.")

    out_r = out_f if ((args.interleaved or args.out_interleaved) and not \
        args.out_r) else args.out_r


    for j, index in enumerate(sorted(uniques.keys())):
        record = uniques[index]
        ident = record[3]
        fseq, rseq = split_by_length(record[0], record[1])
        fqual, rqual = split_by_length(decompress(record[2]), record[1])
        writer(out_f, {'identifier': ident, 'description': fdesc, 
            'sequence': fseq, 'quality': fqual})
        writer(out_r, {'identifier': ident, 'description': rdesc, 
            'sequence': rseq, 'quality': rqual})

    j += 1

    num_reps = i - j
    print("\nRead Pairs processed:\t{!s}\nReplicates found:\t{!s} "
        "({:.2%})\n".format(i, num_reps, num_reps / i), file=sys.stderr)
Example #2
0
def main():
    parser = argparse.ArgumentParser(description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('f_file', metavar='in1.fast<q|a>', 
        help="input reads in fastq or fasta format. Can be a file containing "
        "either single-end or forward/interleaved reads if reads are "
        "paired-end [required]")
    input_arg = parser.add_mutually_exclusive_group(required=False)
    input_arg.add_argument('--interleaved',
        action='store_true',
        help="input is interleaved paired-end reads")
    input_arg.add_argument('r_file', metavar='in2.fast<q|a>', nargs='?',
        help="input reverse reads")
    parser.add_argument('-b', '--barcodes', metavar='FILE',
        type=seq_io.open_input,
        help="file containing sample names mapped to the appropriate barcode"
        "sequences, in tab-separated format, with sample names in the first "
        "column. If this argument is unused, the output files will be named "
        "for each barcode sequence found in the fasta\q file.")
    parser.add_argument('-s', '--suffix', metavar='STR',
        type=str,
        help="string to append to the end of the file name. The default is to "
        "append the file format (fastq or fasta) and the strand for PE data "
        "(forward, reverse, interleaved).")
    parser.add_argument('-f', '--out-format', metavar='FORMAT',
        dest='out_format',
        default='fastq',
        choices=['fasta', 'fastq'],
        help="output file format. Can be fasta or fastq. [default: fastq]")
    compress_arg = parser.add_mutually_exclusive_group(required=False)
    compress_arg.add_argument('--gzip',
        action='store_true',
        help="output files should be compressed using the gzip algorithm. The "
        "suffix '.gz'. will be appended to the file names.")
    compress_arg.add_argument('--bzip2',
        action='store_true',
        help="output files should be compressed using the bzip2 algorithm. The "
        "suffix '.bz2' will be appended to the file names.")
    parser.add_argument('--version',
        action='version',
        version='%(prog)s ' + __version__)

    args = parser.parse_args()
    all_args = sys.argv[1:]

    seq_io.program_info('demultiplex_headers', all_args, __version__)

    f_file = sys.stdin if args.f_file == '-' else args.f_file
    iterator = seq_io.get_iterator(f_file, args.r_file, args.interleaved)

    writer = seq_io.fasta_writer if args.out_format == 'fasta' else \
            seq_io.fastq_writer

    if args.gzip:
        compression = '.gz'
    elif args.bzip2:
        compression = '.bz2'
    else:
        compression = ''

    suffix = args.suffix if args.suffix else args.out_format

    tags = {}
    if args.barcodes:
        names = []
        for line in args.barcodes:
            try:
                name, tag = line.strip().split('\t')
            except ValueError:
                seq_io.print_error("error: barcode mapping file does not "
                    "appear to be formatted correctly")
            if name in names:
                seq_io.print_error("error: the same sample name is used for "
                    "more than one barcode sequence")
            else:
                names.append(name)

            tags[tag] = name

    outfiles = {}
    for i, (forward, reverse) in enumerate(iterator):
        tag = forward['description'].split(':')[-1]
        if (not tag.isalpha()) or (len(tag) != 6):
            seq_io.print_error("error: unable to determine the format of the "
                "sequence headers")

        try:
            name = tags[tag]
        except KeyError:
            name = str(tag)

        try:
            writer(outfiles[name][0], forward)
            writer(outfiles[name][1], reverse)
        except KeyError:
            if args.r_file:
                handle1 = seq_io.open_output("{}.forward.{}{}".format(name,
                    suffix, compression))
                handle2 = seq_io.open_output("{}.reverse.{}{}".format(name,
                    suffix, compression))
            elif args.interleaved:
                handle1 = seq_io.open_output("{}.interleaved.{}{}".format(name,
                     suffix, compression))
                handle2 = handle1
            else:
                handle1 = seq_io.open_output("{}.{}{}".format(name, suffix,
                    compression))
                handle2 = ''
            outfiles[name] = [handle1, handle2]

            writer(handle1, forward)
            writer(handle2, reverse)

    i += 1
    num_parts = len(outfiles)
    print("\nRecords processed:\t{!s}\nNumber of partitions:\t{!s}\n".format(i, 
        num_parts), file=sys.stderr)
Example #3
0
def main():
    parser = argparse.ArgumentParser(description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('f_file', metavar='in1.fastq',
        help="input reads in fastq format. Can be a file containing either "
        "single-end or forward/interleaved reads if reads are paired-end "
        "[required]")
    input_arg = parser.add_mutually_exclusive_group(required=False)
    input_arg.add_argument('--interleaved',
        action='store_true',
        help="input is interleaved paired-end reads")
    input_arg.add_argument('--force',
        action='store_true',
        help="force process as single-end reads even if input is interleaved "
        "paired-end reads")
    input_arg.add_argument('r_file', metavar='in2.fastq', nargs='?',
        help="input reverse reads in fastq format")
    parser.add_argument('-o', '--out', metavar='FILE', dest='out_f',
        type=seq_io.open_output, default=sys.stdout,
        help="output trimmed reads [required]")
    output_arg = parser.add_mutually_exclusive_group(required=False)
    output_arg.add_argument('-v', '--out-reverse', metavar='FILE', dest='out_r',
        type=seq_io.open_output,
        help="output trimmed reverse reads")
    output_arg.add_argument('--out-interleaved', dest='out_interleaved',
        action='store_true',
        help="output interleaved paired-end reads, even if input is split")
    parser.add_argument('-s', '--singles', metavar='FILE', dest='out_s',
        type=seq_io.open_output,
        help="output trimmed orphaned reads")
    parser.add_argument('-f', '--out-format', metavar='FORMAT', 
        dest='out_format', default='fastq',
        choices=['fasta', 'fastq'],
        help="output files format (fastq or fasta) [default: fastq]")
    parser.add_argument('-l', '--log',
        type=seq_io.open_output,
        help="output log file to keep track of trimmed sequences")
    parser.add_argument('-q', '--qual-type', metavar='TYPE', dest='qual_type',
        type=int, default=33,
        choices=[33, 64],
        help="ASCII base quality score encoding [default: 33]. Options are "
            "33 (for phred33) or 64 (for phred64)")
    parser.add_argument('-m', '--min-len', metavar='LEN', dest='minlen',
        type=get_list, default='0',
        help="filter reads shorter than the minimum length threshold "
        "[default: 0,0]. Different values can be provided for the forward and "
        "reverse reads by separating them with a comma (e.g. 80,60)")
    trim_args = parser.add_argument_group('trimming options')
    trim_args.add_argument('-O', '--trim-order', metavar='ORDER',
        dest='trim_order',
        default='ltw',
        help="order of trimming steps [default: ltw (corresponds to leading, "
        "trailing, and sliding-window)]")
    trim_args.add_argument('-W', '--sliding-window', metavar='FRAME',
        dest='sw',
        type=parse_sw_arg,
        help="trim both 5' and 3' ends of a read using a sliding window "
        "approach. Input should be of the form 'window_size:qual_threshold', "
        "where 'qual_threshold' is an integer between 0 and 42 and "
        "'window_size' can either be length in bases or fraction of the total "
        "read length")
    trim_args.add_argument('-H', '--headcrop', metavar='INT,INT',
        type=get_list, default='0',
        help="remove exactly the number of bases specified from the start of "
        "the read. Different values can be provided for the forward and "
        "reverse reads by separating them with a comma (e.g. 2,0)")
    trim_args.add_argument('-C', '--crop', metavar='INT,INT',
        type=get_list, default='0',
        help="remove exactly the number of bases specified from the end of "
        "the read. Different values can be provided for the forward and "
        "reverse reads by separating them with a comma (e.g. 2,0)")
    trim_args.add_argument('-L', '--leading', metavar='SCORE', 
        dest='lead_score',
        type=int,
        help="trim by removing low quality bases from the start of the read")
    trim_args.add_argument('-T', '--trailing', metavar='SCORE', 
        dest='trail_score',
        type=int,
        help="trim by removing low quality bases from the end of the read")
    trim_args.add_argument('--trunc-n', dest='trunc_n',
        action='store_true',
        help="truncate sequence at the position of the first ambiguous base")
    parser.add_argument('--version',
        action='version',
        version='%(prog)s ' + __version__)
    args = parser.parse_args()
    all_args = sys.argv[1:]

    seq_io.program_info('qtrim', all_args, __version__)

    try:
        fcrop, rcrop = args.crop
    except ValueError:
        fcrop = rcrop = args.crop[0]
    try:
        fheadcrop, rheadcrop = args.headcrop
    except ValueError:
        fheadcrop = rheadcrop = args.headcrop[0]
    try:
        fminlen, rminlen = args.minlen
    except ValueError:
        fminlen = rminlen = args.minlen[0]

    f_file = sys.stdin if args.f_file == '-' else args.f_file
    out_f = args.out_f
    iterator = seq_io.get_iterator(f_file, args.r_file, args.interleaved)

    if args.r_file and not (args.out_r or args.out_interleaved):
        parser.error("one of -v/--out-reverse or --out-interleaved is required "
            "when the argument -r/--reverse is used")

    trim_tasks = {'l': (trim.trim_leading, args.lead_score), 
        't': (trim.trim_trailing, args.trail_score), 
        'w': (trim.adaptive_trim, args.sw)}

    trim_steps = []
    for task in args.trim_order:
        value = trim_tasks[task][-1]
        if value:
            trim_steps.append(trim_tasks[task])
    if len(trim_steps) < 1 and not (args.crop or args.headcrop):
        seq_io.print_error("error: no trimming steps were applied")

    writer = seq_io.fasta_writer if (args.out_format == 'fasta') else \
        seq_io.fastq_writer

    paired = True if (args.interleaved or args.r_file) else False
 
    if paired:
        print("\nProcessing input as paired-end reads", file=sys.stderr)
        seq_io.logger(args.log, "Record\tForward length\tForward trimmed "
            "length\tReverse length\tReverse trimmed length\n")

        out_s = args.out_s if args.out_s else None
        out_r = out_f if ((args.interleaved or args.out_interleaved) and not \
            args.out_r) else args.out_r

        pairs_passed = discarded_pairs = fsingles = rsingles = 0
        for i, (forward, reverse) in enumerate(iterator):
            identifier = forward['identifier']
            forig = len(forward['sequence'])
            rorig = len(reverse['sequence'])

            forward = apply_trimming(forward, trim_steps, args.qual_type, 
                fheadcrop, fcrop, args.trunc_n)
            ftrim = len(forward['sequence'])

            reverse = apply_trimming(reverse, trim_steps, args.qual_type, 
                rheadcrop, rcrop, args.trunc_n)
            rtrim = len(reverse['sequence'])

            # both good
            if ftrim >= fminlen and rtrim >= rminlen:
                pairs_passed += 1
                writer(out_f, forward)
                writer(out_r, reverse)
            # forward orphaned, reverse filtered
            elif ftrim >= fminlen and rtrim < rminlen:
                fsingles += 1
                writer(out_s, forward)
            # reverse orphaned, forward filtered
            elif ftrim < fminlen and rtrim >= rminlen:
                rsingles += 1
                writer(out_s, reverse)
            # both discarded
            else:
                discarded_pairs += 1

            seq_io.logger(args.log, "{}\t{}\t{}\t{}\t{}\n".format(identifier, 
                forig, ftrim, rorig, rtrim))

        try:
            i += 1
        except UnboundLocalError:
            seq_io.print_error("error: no sequences were found to process")

        total = i * 2
        passed = pairs_passed * 2 + fsingles + rsingles
        print("\nRecords processed:\t{!s} ({!s} pairs)\nPassed filtering:\t"
            "{!s} ({:.2%})\n  Paired reads kept:\t{!s} ({:.2%})\n  Forward "
            "only kept:\t{!s} ({:.2%})\n  Reverse only kept:\t{!s} ({:.2%})"
            "\nRead pairs discarded:\t{!s} ({:.2%})\n".format(total, i,
            passed, passed / total, pairs_passed, pairs_passed / i,
            fsingles, fsingles / total, rsingles, rsingles / total,
            discarded_pairs, discarded_pairs / i), file=sys.stderr)

    else:
        print("\nProcessing input as single-end reads", file=sys.stderr)
        seq_io.logger(args.log, "Record\tLength\tTrimmed length\n")

        if args.out_s:
            print("\nwarning: argument --singles used with single-end reads"
                "... ignoring\n", file=sys.stderr)

        discarded = 0
        for i, record in enumerate(iterator):
            if i == 0:
                first_read = record['identifier']
            elif i == 1:
                if first_read == record['identifier'] and not args.force:
                    seq_io.print_error("warning: the input fastq appears to "
                        "contain interleaved paired-end reads. Please run with "
                        "the --force flag to proceed with processing the data "
                        "as single-end reads")

            origlen = len(record['sequence'])
            record = apply_trimming(record, trim_steps, args.qual_type,
                fheadcrop, fcrop, args.trunc_n)
            trimlen = len(record['sequence'])
            
            if trimlen >= fminlen:
                writer(out_f, record)
            else:
                discarded += 1

            seq_io.logger(args.log, "{}\t{}\t{}\n".format(record['identifier'],
                origlen, trimlen))

        try:
            i += 1
        except UnboundLocalError:
            seq_io.print_error("error: no sequences were found to process. Is "
                "the input properly formatted?")
 
        passed = i - discarded
        print("\nRecords processed:\t{!s}\nPassed filtering:\t{!s} "
        "({:.2%})\nRecords discarded:\t{!s} ({:.2%})\n".format(i, passed,
        passed / i, discarded, discarded / i), file=sys.stderr)
Example #4
0
def main():
    parser = argparse.ArgumentParser(description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('f_file', metavar='in1.fastq',
        help="input reads file in fastq format [required]. Can be a file "
        "containing either forward of interleaved reads")
    input_arg = parser.add_mutually_exclusive_group(required=True)
    input_arg.add_argument('--interleaved',
        action='store_true',
        help="input is interleaved paired-end reads")
    input_arg.add_argument('r_file', metavar='in2.fastq', nargs='?',
        help="input reverse reads file in fastq format")
    parser.add_argument('-o', '--out', metavar='FILE', dest='out_f',
        type=seq_io.open_output, default=sys.stdout,
        help="output file for filtered reads [required]")
    output_arg = parser.add_mutually_exclusive_group(required=False)
    output_arg.add_argument('-v', '--out-reverse', metavar='FILE', dest='out_r',
        type=seq_io.open_output,
        help="output file for filtered reverse reads")
    output_arg.add_argument('--out-interleaved', dest='out_interleaved',
        action='store_true',
        help="output interleaved paired-end reads, even if input is split")
    parser.add_argument('-s', '--singles', metavar='FILE', dest='out_s',
        type=seq_io.open_output,
        help="output file for filtered orphan reads")
    parser.add_argument('-f', '--out-format', metavar='FORMAT', 
        dest='out_format', default='fastq',
        choices=['fasta', 'fastq'],
        help="output files format [default: fastq]. Options are fastq or fasta")
    parser.add_argument('-l', '--log',
        type=seq_io.open_output,
        help="output log file")
    parser.add_argument('-q', '--qual-type', metavar='TYPE', dest='qual_type',
        type=int, default=33,
        choices=[33, 64],
        help="ASCII base quality score encoding [default: 33]. Options are "
            "33 (for phred33) or 64 (for phred64)")
    parser.add_argument('-a', '--alpha', 
        type=float, default=0.005,
        help="probability of underestimating the actual number of errors in a "
            "sequence [default: 0.005]")
    parser.add_argument('-c', '--crop', metavar='LEN,LEN',
        type=get_list,
        help="trim read to size specified by removing bases from the end of "
            "the read")
    parser.add_argument('-d', '--headcrop', metavar='LEN,LEN',
        type=get_list,
        help="trim of bases from the start of the read")
    parser.add_argument('--ambig',
        action='store_true',
        help="remove sequences with ambiguous bases. Default is to treat "
            "ambiguous bases as errors")
    parser.add_argument('-e', '--error-calc',
        choices = ('poisson_binomial', 'poisson'), default="poisson_binomial",
        help="method to use for calculating the number of errors expected in "
        "a sequence [default: poisson_binomial]")
    filter_mode = parser.add_mutually_exclusive_group()
    filter_mode.add_argument('-m', '--max-errors', dest='maxerror',
        type=float, default=1.0,
        help="maximum number of errors allowed in a sequence [default: 1]")
    filter_mode.add_argument('-u', '--uncert', 
        type=float, default=0.01,
        help="maximum divergence of the observed sequence from the template "
            "due to sequencing error [default: 0.01]")
    parser.add_argument('--version',
        action='version',
        version='%(prog)s ' + __version__)
    args = parser.parse_args()
    all_args = sys.argv[1:]

    seq_io.program_info('error_filter', all_args, __version__)

    try:
        fcrop, rcrop = args.crop
    except ValueError:
        fcrop = rcrop = args.crop[0]
    except TypeError:
        fcrop = rcrop = None
    try:
        fheadcrop, rheadcrop = args.headcrop
    except ValueError:
        fheadcrop = rheadcrop = args.headcrop[0]
    except TypeError:
        fheadcrop = rheadcrop = None

    f_file = sys.stdin if args.f_file == '-' else args.f_file
    iterator = seq_io.get_iterator(f_file, args.r_file, args.interleaved)
    out_f = args.out_f

    if args.r_file and not (args.out_r or args.out_interleaved):
        parser.error("one of -v/--out-reverse or --out-interleaved is required "
            "when the argument -r/--reverse is used")

    writer = seq_io.fasta_writer if (args.out_format == 'fasta') else \
        seq_io.fastq_writer

    out_r = out_f if ((args.interleaved or args.out_interleaved) and not \
        args.out_r) else args.out_r

    out_s = args.out_s if args.out_s else None
    
    error_func = {'poisson_binomial': bernoulli.calculate_errors_PB,
                  'poisson': calculate_errors_poisson}

    pairs_passed = filtered_pairs = fsingles = rsingles = 0
    for i, (forward, reverse) in enumerate(iterator):
        forward = crop_string(forward, fcrop, fheadcrop)
        fheader = "{} {}".format(forward['identifier'], forward['description'])
        fquals = [ord(j) - args.qual_type for j in forward['quality']]
        flen = len(forward['sequence'])
        fee, fNs = error_func[args.error_calc](forward['sequence'], fquals, args.alpha)

        reverse = crop_string(reverse, rcrop, rheadcrop)
        rheader = "{} {}".format(reverse['identifier'], reverse['description'])
        rquals = [ord(j) - args.qual_type for j in reverse['quality']]
        rlen = len(reverse['sequence'])
        ree, rNs = error_func[args.error_calc](reverse['sequence'], rquals, args.alpha)

        if args.maxerror:
            fthreshold = rthreshold = args.maxerror
        else:
            fthreshold = flen * args.uncert
            rthreshold = rlen * args.uncert

        # both good
        if fee <= fthreshold and ree <= rthreshold:
            pairs_passed += 1
            writer(out_f, forward)
            writer(out_r, reverse)
        # forward orphaned, reverse filtered
        elif fee <= fthreshold and ree > rthreshold:
            fsingles += 1
            writer(out_s, forward)
            seq_io.logger(args.log, "{}\terrors={!s}".format(rheader, ree))
        # reverse orphaned, forward filtered
        elif fee > fthreshold and ree <=rthreshold:
            rsingles += 1
            writer(out_s, reverse)
            seq_io.logger(args.log, "{}\terrors={!s}".format(fheader, fee))
        # both discarded
        else:
            filtered_pairs += 1
            seq_io.logger(args.log, "{}\terrors={!s}\n{}\terrors={!s}".format(
                fheader, fee, rheader, ree))

    try:
        i += 1
    except UnboundLocalError:
        seq_io.print_error("error: no sequences were found to process")

    total = i * 2
    passed = pairs_passed * 2 + fsingles + rsingles
    print("\nRecords processed:\t{!s} ({!s} pairs)\nPassed filtering:\t"
        "{!s} ({:.2%})\n  Paired reads kept:\t{!s} ({:.2%})\n  Forward "
        "only kept:\t{!s} ({:.2%})\n  Reverse only kept:\t{!s} ({:.2%})"
        "\nRead pairs discarded:\t{!s} ({:.2%})\n".format(total, i,
        passed, passed / total, pairs_passed, pairs_passed / i,
        fsingles, fsingles / total, rsingles, rsingles / total,
        filtered_pairs, filtered_pairs / i), file=sys.stderr)