Esempio n. 1
0
def get_parser():
    epilog = """
    The output is one file for each input file, <input file>.abundtrim, placed
    in the current directory.  This output contains the input sequences
    trimmed at low-abundance k-mers.

    The ``-V/--variable-coverage`` parameter will, if specified,
    prevent elimination of low-abundance reads by only trimming
    low-abundance k-mers from high-abundance reads; use this for
    non-genomic data sets that may have variable coverage.

    Note that the output reads will not necessarily be in the same order
    as the reads in the input files; if this is an important consideration,
    use ``load-into-counting.py`` and ``filter-abund.py``.  However, read
    pairs will be kept together, in "broken-paired" format; you can use
    ``extract-paired-reads.py`` to extract read pairs and orphans.

    Example::

        trim-low-abund.py -x 5e7 -k 20 -C 2 data/100k-filtered.fa
    """

    parser = build_counting_args(
        descr='Trim low-abundance k-mers using a streaming algorithm.',
        epilog=textwrap.dedent(epilog))

    parser.add_argument('input_filenames', nargs='+')

    parser.add_argument('--cutoff', '-C', type=int,
                        help='remove k-mers below this abundance',
                        default=DEFAULT_CUTOFF)

    parser.add_argument('--normalize-to', '-Z', type=int,
                        help='base cutoff on this median k-mer abundance',
                        default=DEFAULT_NORMALIZE_LIMIT)

    parser.add_argument('-o', '--output', metavar="output_filename",
                        type=argparse.FileType('wb'),
                        help='only output a single file with '
                        'the specified filename; use a single dash "-" to '
                        'specify that output should go to STDOUT (the '
                        'terminal)')

    parser.add_argument('--variable-coverage', '-V', action='store_true',
                        default=False,
                        help='Only trim low-abundance k-mers from sequences '
                        'that have high coverage.')

    add_loadgraph_args(parser)
    parser.add_argument('-s', '--savegraph', metavar="filename", default='',
                        help='save the k-mer countgraph to disk after all'
                        'reads are loaded.')

    # expert options
    parser.add_argument('--force', default=False, action='store_true')
    parser.add_argument('--ignore-pairs', default=False, action='store_true')
    parser.add_argument('--tempdir', '-T', type=str, default='./')
    add_output_compression_type(parser)

    return parser
Esempio n. 2
0
def get_parser():
    epilog = """
    The output is one file for each input file, <input file>.corr, placed
    in the current directory.  This output contains the input sequences,
    corrected at low-abundance k-mers.

    Note that the output reads will not necessarily be in the same
    order as the reads in the input files. However, read pairs will be
    kept together, in "broken-paired" format; you can use
    ``extract-paired-reads.py`` to extract read pairs and orphans.

    Example::

        correct-reads.py -x 5e7 -k 20 -C 2 data/100k-filtered.fa
    """

    parser = build_counting_args(
        descr='Correct reads using a semi-streaming algorithm.',
        epilog=textwrap.dedent(epilog))

    parser.add_argument('input_filenames', nargs='+')

    parser.add_argument('--cutoff', '-C', type=int,
                        help='k-mers below this abundance are not trusted',
                        default=DEFAULT_CUTOFF)

    parser.add_argument('--normalize-to', '-Z', type=int,
                        help='base cutoff on this median k-mer abundance',
                        default=DEFAULT_NORMALIZE_LIMIT)

    parser.add_argument('-o', '--out', metavar="filename",
                        type=argparse.FileType('w'),
                        default=None, help='only output a single file with '
                        'the specified filename; use a single dash "-" to '
                        'specify that output should go to STDOUT (the '
                        'terminal)')

    parser.add_argument('--variable-coverage', '-V', action='store_true',
                        default=False,
                        help='Only correct sequences that have high coverage.')

    add_loadgraph_args(parser)
    parser.add_argument('-s', '--savegraph', metavar="filename", default='',
                        help='save the k-mer countgraph to disk after all'
                        'reads are loaded.')

    # expert options
    parser.add_argument('--force', default=False, action='store_true')
    parser.add_argument('--ignore-pairs', default=False, action='store_true')
    parser.add_argument('--tempdir', '-T', type=str, default='./')
    parser.add_argument("--theta", dest="bits_theta", type=float, default=1.0)

    return parser
Esempio n. 3
0
def get_parser():
    epilog = """
    The output is one file for each input file, <input file>.corr, placed
    in the current directory.  This output contains the input sequences,
    corrected at low-abundance k-mers.

    Note that the output reads will not necessarily be in the same
    order as the reads in the input files. However, read pairs will be
    kept together, in "broken-paired" format; you can use
    ``extract-paired-reads.py`` to extract read pairs and orphans.

    Example::

        correct-reads.py -x 5e7 -k 20 -C 2 data/100k-filtered.fa
    """

    parser = build_counting_args(
        descr='Correct reads using a semi-streaming algorithm.',
        epilog=textwrap.dedent(epilog))

    parser.add_argument('input_filenames', nargs='+')

    parser.add_argument('--cutoff', '-C', type=int,
                        help='k-mers below this abundance are not trusted',
                        default=DEFAULT_CUTOFF)

    parser.add_argument('--normalize-to', '-Z', type=int,
                        help='base cutoff on this median k-mer abundance',
                        default=DEFAULT_NORMALIZE_LIMIT)

    parser.add_argument('-o', '--out', metavar="filename",
                        type=argparse.FileType('w'),
                        default=None, help='only output a single file with '
                        'the specified filename; use a single dash "-" to '
                        'specify that output should go to STDOUT (the '
                        'terminal)')

    parser.add_argument('--variable-coverage', '-V', action='store_true',
                        default=False,
                        help='Only correct sequences that have high coverage.')

    add_loadgraph_args(parser)
    parser.add_argument('-s', '--savegraph', metavar="filename", default='',
                        help='save the k-mer countgraph to disk after all'
                        'reads are loaded.')

    # expert options
    parser.add_argument('--force', default=False, action='store_true')
    parser.add_argument('--ignore-pairs', default=False, action='store_true')
    parser.add_argument('--tempdir', '-T', type=str, default='./')
    parser.add_argument("--theta", dest="bits_theta", type=float, default=1.0)

    return parser
Esempio n. 4
0
def get_parser():
    epilog = """\
    Discard sequences based on whether or not their median k-mer abundance lies
    above a specified cutoff. Kept sequences will be placed in <fileN>.keep.

    By default, paired end reads will be considered together; if
    either read should be kept, both will be kept. (This keeps both
    reads from a fragment, and helps with retention of repeats.)
    Unpaired reads are treated individually.

    If :option:`-p`/:option:`--paired` is set, then proper pairing is required
    and the script will exit on unpaired reads, although
    :option:`--unpaired-reads` can be used to supply a file of orphan
    reads to be read after the paired reads.

    :option:`--force_single` will ignore all pairing information and treat
    reads individually.

    With :option:`-s`/:option:`--savegraph`, the k-mer countgraph
    will be saved to the specified file after all sequences have been
    processed. :option:`-l`/:option:`--loadgraph` will load the
    specified k-mer countgraph before processing the specified
    files.  Note that these graphs are are in the same format as those
    produced by :program:`load-into-counting.py` and consumed by
    :program:`abundance-dist.py`.

    To append reads to an output file (rather than overwriting it), send output
    to STDOUT with `--output -` and use UNIX file redirection syntax (`>>`) to
    append to the file.

    Example::

        normalize-by-median.py -k 17 tests/test-data/test-abund-read-2.fa

    Example::

        normalize-by-median.py -p -k 17 \\
        tests/test-data/test-abund-read-paired.fa

    Example::

        normalize-by-median.py -p -k 17 -o - tests/test-data/paired.fq \\
        >> appended-output.fq

    Example::

        normalize-by-median.py -k 17 -f tests/test-data/test-error-reads.fq \\
        tests/test-data/test-fastq-reads.fq

    Example::

        normalize-by-median.py -k 17 -s test.ct \\
        tests/test-data/test-abund-read-2.fa \\
        tests/test-data/test-fastq-reads.fq"""
    parser = build_counting_args(
        descr="Do digital normalization (remove mostly redundant sequences)",
        epilog=textwrap.dedent(epilog),
        citations=['diginorm'])
    parser.add_argument('-q',
                        '--quiet',
                        dest='quiet',
                        default=False,
                        action='store_true')
    parser.add_argument('-p',
                        '--paired',
                        action='store_true',
                        help='require that all sequences be properly paired')
    parser.add_argument('--force_single',
                        dest='force_single',
                        action='store_true',
                        help='treat all sequences as single-ended/unpaired')
    parser.add_argument('-u',
                        '--unpaired-reads',
                        metavar="unpaired_reads_filename",
                        help='include a file of unpaired reads to which '
                        '-p/--paired does not apply.')
    parser.add_argument('-s',
                        '--savegraph',
                        metavar="filename",
                        default=None,
                        help='save the k-mer countgraph to disk after all '
                        'reads are loaded.')
    parser.add_argument('-R',
                        '--report',
                        help='write progress report to report_filename',
                        metavar='report_filename',
                        type=argparse.FileType('w'))
    parser.add_argument('--report-frequency',
                        metavar='report_frequency',
                        type=int,
                        default=100000,
                        help='report progress every report_frequency reads')
    parser.add_argument('-f',
                        '--force',
                        dest='force',
                        help='continue past file reading errors',
                        action='store_true')
    parser.add_argument('-o',
                        '--output',
                        metavar="filename",
                        type=khFileType('wb'),
                        default=None,
                        dest='single_output_file',
                        help='only output a single file with '
                        'the specified filename; use a single dash "-" to '
                        'specify that output should go to STDOUT (the '
                        'terminal)')
    parser.add_argument('input_filenames',
                        metavar='input_sequence_filename',
                        help='Input FAST[AQ] sequence filename.',
                        nargs='+')
    add_loadgraph_args(parser)
    parser.add_argument('-z',
                        '--loadgraph2',
                        metavar="filename",
                        default=None,
                        help='load a second k-mer graph')
    add_output_compression_type(parser)
    return parser
Esempio n. 5
0
def get_parser():
    epilog = """\
    Discard sequences based on whether or not their median k-mer abundance lies
    above a specified cutoff. Kept sequences will be placed in <fileN>.keep.

    By default, paired end reads will be considered together; if
    either read should be kept, both will be kept. (This keeps both
    reads from a fragment, and helps with retention of repeats.)
    Unpaired reads are treated individually.

    If :option:`-p`/:option:`--paired` is set, then proper pairing is required
    and the script will exit on unpaired reads, although
    :option:`--unpaired-reads` can be used to supply a file of orphan
    reads to be read after the paired reads.

    :option:`--force_single` will ignore all pairing information and treat
    reads individually.

    With :option:`-s`/:option:`--savegraph`, the k-mer countgraph
    will be saved to the specified file after all sequences have been
    processed. :option:`-l`/:option:`--loadgraph` will load the
    specified k-mer countgraph before processing the specified
    files.  Note that these graphs are are in the same format as those
    produced by :program:`load-into-counting.py` and consumed by
    :program:`abundance-dist.py`.

    To append reads to an output file (rather than overwriting it), send output
    to STDOUT with `--output -` and use UNIX file redirection syntax (`>>`) to
    append to the file.

    Example::

        normalize-by-median.py -k 17 tests/test-data/test-abund-read-2.fa

    Example::

        normalize-by-median.py -p -k 17 \\
        tests/test-data/test-abund-read-paired.fa

    Example::

        normalize-by-median.py -p -k 17 -o - tests/test-data/paired.fq \\
        >> appended-output.fq

    Example::

        normalize-by-median.py -k 17 -f tests/test-data/test-error-reads.fq \\
        tests/test-data/test-fastq-reads.fq

    Example::

        normalize-by-median.py -k 17 -s test.ct \\
        tests/test-data/test-abund-read-2.fa \\
        tests/test-data/test-fastq-reads.fq"""
    parser = build_counting_args(
        descr="Do digital normalization (remove mostly redundant sequences)",
        epilog=textwrap.dedent(epilog),
        citations=['diginorm'])
    parser.add_argument('-q', '--quiet', dest='quiet', default=False,
                        action='store_true')
    parser.add_argument('-C', '--cutoff', help="when the median "
                        "k-mer coverage level is above this number the "
                        "read is not kept.",
                        type=check_argument_range(0, 256, "cutoff"),
                        default=DEFAULT_DESIRED_COVERAGE)
    parser.add_argument('-p', '--paired', action='store_true',
                        help='require that all sequences be properly paired')
    parser.add_argument('--force_single', dest='force_single',
                        action='store_true',
                        help='treat all sequences as single-ended/unpaired')
    parser.add_argument('-u', '--unpaired-reads',
                        metavar="unpaired_reads_filename",
                        help='include a file of unpaired reads to which '
                        '-p/--paired does not apply.')
    parser.add_argument('-s', '--savegraph', metavar="filename", default=None,
                        help='save the k-mer countgraph to disk after all '
                        'reads are loaded.')
    parser.add_argument('-R', '--report',
                        help='write progress report to report_filename',
                        metavar='report_filename', type=argparse.FileType('w'))
    parser.add_argument('--report-frequency',
                        metavar='report_frequency', type=int, default=100000,
                        help='report progress every report_frequency reads')
    parser.add_argument('-f', '--force', dest='force',
                        help='continue past file reading errors',
                        action='store_true')
    parser.add_argument('-o', '--output', metavar="filename",
                        type=khFileType('wb'),
                        default=None, dest='single_output_file',
                        help='only output a single file with '
                        'the specified filename; use a single dash "-" to '
                        'specify that output should go to STDOUT (the '
                        'terminal)')
    parser.add_argument('input_filenames', metavar='input_sequence_filename',
                        help='Input FAST[AQ] sequence filename.', nargs='+')
    add_loadgraph_args(parser)
    add_output_compression_type(parser)
    return parser
def get_parser():
    epilog = ("""
    Discard sequences based on whether or not their median k-mer abundance lies
    above a specified cutoff. Kept sequences will be placed in <fileN>.keep.

    Paired end reads will be considered together if :option:`-p` is set. If
    either read will be kept, then both will be kept. This should result in
    keeping (or discarding) each sequencing fragment. This helps with retention
    of repeats, especially.

    With :option:`-s`/:option:`--savegraph`, the k-mer countgraph
    will be saved to the specified file after all sequences have been
    processed. With :option:`-d`, the k-mer countgraph will be
    saved every d files for multifile runs; if :option:`-s` is set,
    the specified name will be used, and if not, the name `backup.ct`
    will be used.  :option:`-l`/:option:`--loadgraph` will load the
    specified k-mer countgraph before processing the specified
    files.  Note that these tables are are in the same format as those
    produced by :program:`load-into-counting.py` and consumed by
    :program:`abundance-dist.py`.

    :option:`-f`/:option:`--fault-tolerant` will force the program to continue
    upon encountering a formatting error in a sequence file; the k-mer counting
    table up to that point will be dumped, and processing will continue on the
    next file.

    Example::

        saturate-by-median.py -k 17 tests/test-data/test-abund-read-2.fa

    Example::

""" "        saturate-by-median.py -p -k 17 tests/test-data/test-abund-read-paired.fa"  # noqa
    """

    Example::

""" "        saturate-by-median.py -k 17 -f tests/test-data/test-error-reads.fq tests/test-data/test-fastq-reads.fq"  # noqa
    """

    Example::

""" "        saturate-by-median.py -k 17 -d 2 -s test.ct tests/test-data/test-abund-read-2.fa tests/test-data/test-fastq-reads")   # noqa
    parser = build_counting_args(
        descr="Do digital normalization (remove mostly redundant sequences)",
        epilog=textwrap.dedent(epilog))
    parser.add_argument('-C', '--cutoff', type=int,
                        default=DEFAULT_DESIRED_COVERAGE)
    parser.add_argument('-p', '--paired', action='store_true')
    parser.add_argument('-s', '--savegraph', metavar="filename", default='')
    parser.add_argument('-R', '--report',
                        metavar='filename', type=argparse.FileType('w'))
    parser.add_argument('--report-frequency',
                        metavar='report_frequency', default=100000, type=int)
    parser.add_argument('-f', '--fault-tolerant', dest='force',
                        help='continue on next file if read errors are \
                         encountered', action='store_true')
    parser.add_argument('-o', '--out', metavar="filename",
                        dest='single_output_filename',
                        default='', help='only output a single'
                        ' file with the specified filename')
    parser.add_argument('input_filenames', metavar='input_sequence_filename',
                        help='Input FAST[AQ] sequence filename.', nargs='+')
    add_loadgraph_args(parser)
    return parser
Esempio n. 7
0
def get_parser():
    epilog = """\
    The output is one file for each input file, ``<input file>.abundtrim``,
    placed in the current directory.  This output contains the input sequences
    trimmed at low-abundance k-mers.

    The :option:`-V`/:option:`--variable-coverage` parameter will, if
    specified, prevent elimination of low-abundance reads by only trimming
    low-abundance k-mers from high-abundance reads; use this for
    non-genomic data sets that may have variable coverage.

    Note that the output reads will not necessarily be in the same order
    as the reads in the input files; if this is an important consideration,
    use :program:`load-into-counting.py` and :program:`filter-abund.py`.
    However, read pairs will be kept together, in "broken-paired" format; you
    can use :program:`extract-paired-reads.py` to extract read pairs and
    orphans.

    Example::

        trim-low-abund.py -x 5e7 -k 20 -C 2 data/100k-filtered.fa
    """

    parser = build_counting_args(
        descr='Trim low-abundance k-mers using a streaming algorithm.',
        epilog=textwrap.dedent(epilog))

    parser.add_argument('input_filenames', nargs='+')

    parser.add_argument('--cutoff',
                        '-C',
                        type=int,
                        help='remove k-mers below this abundance',
                        default=DEFAULT_CUTOFF)

    parser.add_argument('--trim-at-coverage',
                        '-Z',
                        '--normalize-to',
                        type=int,
                        help='trim reads when entire read above this coverage',
                        default=DEFAULT_TRIM_AT_COVERAGE)

    parser.add_argument('-o',
                        '--output',
                        metavar="output_filename",
                        type=argparse.FileType('wb'),
                        help='only output a single file with '
                        'the specified filename; use a single dash "-" to '
                        'specify that output should go to STDOUT (the '
                        'terminal)')

    parser.add_argument('--variable-coverage',
                        '-V',
                        action='store_true',
                        default=False,
                        help='Only trim low-abundance k-mers from sequences '
                        'that have high coverage.')

    add_loadgraph_args(parser)
    parser.add_argument('-s',
                        '--savegraph',
                        metavar="filename",
                        default='',
                        help='save the k-mer countgraph to disk after all'
                        'reads are loaded.')
    parser.add_argument('-q',
                        '--quiet',
                        dest='quiet',
                        default=False,
                        action='store_true')

    # expert options
    parser.add_argument('--force', default=False, action='store_true')
    parser.add_argument('--ignore-pairs', default=False, action='store_true')
    parser.add_argument('--tempdir',
                        '-T',
                        type=str,
                        default='./',
                        help="Set location of temporary directory for "
                        "second pass")
    add_output_compression_type(parser)

    parser.add_argument('--diginorm',
                        default=False,
                        action='store_true',
                        help="Eliminate high-coverage reads altogether "
                        "(digital normalization).")
    parser.add_argument('--diginorm-coverage',
                        type=int,
                        default=DEFAULT_DIGINORM_COVERAGE,
                        help="Coverage threshold for --diginorm")
    parser.add_argument('--single-pass',
                        default=False,
                        action='store_true',
                        help="Do not do a second pass across the low coverage "
                        "data")

    return parser
Esempio n. 8
0
def get_parser():
    epilog = (
        """
    Discard sequences based on whether or not their median k-mer abundance lies
    above a specified cutoff. Kept sequences will be placed in <fileN>.keep.

    By default, paired end reads will be considered together; if
    either read should be kept, both will be kept. (This keeps both
    reads from a fragment, and helps with retention of repeats.)
    Unpaired reads are treated individually.

    If :option:`-p`/`--paired` is set, then proper pairing is required
    and the script will exit on unpaired reads, although
    :option:`--unpaired-reads` can be used to supply a file of orphan
    reads to be read after the paired reads.

    :option:`--force-single` will ignore all pairing information and treat
    reads individually.

    With :option:`-s`/:option:`--savegraph`, the k-mer countgraph
    will be saved to the specified file after all sequences have been
    processed. :option:`-l`/:option:`--loadgraph` will load the
    specified k-mer countgraph before processing the specified
    files.  Note that these graphs are are in the same format as those
    produced by :program:`load-into-counting.py` and consumed by
    :program:`abundance-dist.py`.

    To append reads to an output file (rather than overwriting it), send output
    to STDOUT with `--output -` and use UNIX file redirection syntax (`>>`) to
    append to the file.

    Example::

        normalize-by-median.py -k 17 tests/test-data/test-abund-read-2.fa

    Example::

"""
        "        normalize-by-median.py -p -k 17 tests/test-data/test-abund-read-paired.fa"  # noqa
        """

    Example::

"""
        "        normalize-by-median.py -p -k 17 -o - tests/test-data/paired.fq >> appended-output.fq"  # noqa
        """

    Example::

"""
        "        normalize-by-median.py -k 17 -f tests/test-data/test-error-reads.fq tests/test-data/test-fastq-reads.fq"  # noqa
        """

    Example::

"""
        "        normalize-by-median.py -k 17 -d 2 -s test.ct tests/test-data/test-abund-read-2.fa tests/test-data/test-fastq-reads"
    )  # noqa
    parser = build_counting_args(
        descr="Do digital normalization (remove mostly redundant sequences)", epilog=textwrap.dedent(epilog)
    )
    parser.add_argument("-q", "--quiet", dest="quiet", default=False, action="store_true")
    parser.add_argument("-C", "--cutoff", type=int, default=DEFAULT_DESIRED_COVERAGE)
    parser.add_argument("-p", "--paired", action="store_true", help="require that all sequences be properly paired")
    parser.add_argument(
        "--force-single", dest="force_single", action="store_true", help="treat all sequences as single-ended/unpaired"
    )
    parser.add_argument(
        "-u",
        "--unpaired-reads",
        metavar="unpaired_reads_filename",
        help="include a file of unpaired reads to which " "-p/--paired does not apply.",
    )
    parser.add_argument(
        "-s",
        "--savegraph",
        metavar="filename",
        default="",
        help="save the k-mer countgraph to disk after all" "reads are loaded.",
    )
    parser.add_argument("-R", "--report", metavar="report_filename", type=argparse.FileType("w"))
    parser.add_argument("--report-frequency", metavar="report_frequency", type=int, default=100000)
    parser.add_argument("-f", "--force", dest="force", help="continue past file reading errors", action="store_true")
    parser.add_argument(
        "-o",
        "--output",
        metavar="filename",
        type=argparse.FileType("wb"),
        default=None,
        dest="single_output_file",
        help="only output a single file with "
        'the specified filename; use a single dash "-" to '
        "specify that output should go to STDOUT (the "
        "terminal)",
    )
    parser.add_argument(
        "input_filenames", metavar="input_sequence_filename", help="Input FAST[AQ] sequence filename.", nargs="+"
    )
    add_loadgraph_args(parser)
    add_output_compression_type(parser)
    return parser
Esempio n. 9
0
def get_parser():
    epilog = """\
    The output is one file for each input file, ``<input file>.abundtrim``,
    placed in the current directory.  This output contains the input sequences
    trimmed at low-abundance k-mers.

    The :option:`-V`/:option:`--variable-coverage` parameter will, if
    specified, prevent elimination of low-abundance reads by only trimming
    low-abundance k-mers from high-abundance reads; use this for
    non-genomic data sets that may have variable coverage.

    Note that the output reads will not necessarily be in the same order
    as the reads in the input files; if this is an important consideration,
    use :program:`load-into-counting.py` and :program:`filter-abund.py`.
    However, read pairs will be kept together, in "broken-paired" format; you
    can use :program:`extract-paired-reads.py` to extract read pairs and
    orphans.

    Example::

        trim-low-abund.py -x 5e7 -k 20 -C 2 data/100k-filtered.fa
    """

    parser = build_counting_args(
        descr='Trim low-abundance k-mers using a streaming algorithm.',
        epilog=textwrap.dedent(epilog))

    parser.add_argument('input_filenames', nargs='+')

    parser.add_argument('--cutoff', '-C', type=int,
                        help='remove k-mers below this abundance',
                        default=DEFAULT_CUTOFF)

    parser.add_argument('--trim-at-coverage', '-Z', '--normalize-to',
                        type=int,
                        help='trim reads when entire read above this coverage',
                        default=DEFAULT_TRIM_AT_COVERAGE)

    parser.add_argument('-o', '--output', metavar="output_filename",
                        type=argparse.FileType('wb'),
                        help='only output a single file with '
                        'the specified filename; use a single dash "-" to '
                        'specify that output should go to STDOUT (the '
                        'terminal)')

    parser.add_argument('--variable-coverage', '-V', action='store_true',
                        default=False,
                        help='Only trim low-abundance k-mers from sequences '
                        'that have high coverage.')

    add_loadgraph_args(parser)
    parser.add_argument('-s', '--savegraph', metavar="filename", default='',
                        help='save the k-mer countgraph to disk after all'
                        'reads are loaded.')
    parser.add_argument('-q', '--quiet', dest='quiet', default=False,
                        action='store_true')

    # expert options
    parser.add_argument('--force', default=False, action='store_true')
    parser.add_argument('--ignore-pairs', default=False, action='store_true')
    parser.add_argument('--tempdir', '-T', type=str, default='./',
                        help="Set location of temporary directory for "
                        "second pass")
    add_output_compression_type(parser)

    parser.add_argument('--diginorm', default=False, action='store_true',
                        help="Eliminate high-coverage reads altogether "
                        "(digital normalization).")
    parser.add_argument('--diginorm-coverage', type=int,
                        default=DEFAULT_DIGINORM_COVERAGE,
                        help="Coverage threshold for --diginorm")
    parser.add_argument('--single-pass', default=False, action='store_true',
                        help="Do not do a second pass across the low coverage "
                        "data")

    return parser