Beispiel #1
0
def addCommandLineOptions(parser, outfileDefaultName=None):
    """
    Add standard command-line options to an argument parser.

    @param parser: An C{ArgumentParser} instance.
    @param outfileDefaultName: The C{str} output file to use as a default
        in case the user does not give one on the command line.
    """

    addCommonOptions(parser)
    SAMFilter.addFilteringOptions(parser)

    parser.add_argument('--outfile',
                        default=outfileDefaultName,
                        help='The filename to store the resulting HTML.')

    parser.add_argument('--show',
                        action='store_true',
                        default=False,
                        help='If specified, show the figure interactively.')
from collections import Counter
from numpy import std

from dark.filter import (addFASTAFilteringCommandLineOptions,
                         parseFASTAFilteringCommandLineOptions)
from dark.reads import Reads
from dark.sam import samfile, SAMFilter, samReferences, UnknownReference
from dark.utils import baseCountsToStr, pct

parser = argparse.ArgumentParser(
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    description=('Print SAM/BAM file coverage statistics by offset. '
                 'Output lines show the offset.'))

addFASTAFilteringCommandLineOptions(parser)
SAMFilter.addFilteringOptions(parser, samfileIsPositional=True)

parser.add_argument('--noOffsets',
                    default=False,
                    action='store_true',
                    help='Do not print per-offset details of base counts.')

parser.add_argument(
    '--noStats',
    default=False,
    action='store_true',
    help='Do not print final average and standard deviation statistics.')

parser.add_argument(
    '--noFilter',
    default=False,
Beispiel #3
0
    parser.add_argument('--bam',
                        action='store_const',
                        const='b',
                        default='',
                        help='If given, write (gzip compressed) BAM output.')

    parser.add_argument(
        '--checkResultCount',
        type=int,
        help=('The number of alignments expected in the output. If this '
              'number is not seen, the script exits with status 1 (and an '
              'error message is also printed, unless --quiet was used).'))

    addFASTAFilteringCommandLineOptions(parser)
    SAMFilter.addFilteringOptions(parser)

    args = parser.parse_args()
    reads = parseFASTAFilteringCommandLineOptions(args, Reads())
    samFilter = SAMFilter.parseFilteringOptions(args,
                                                reads.filterRead,
                                                storeQueryIds=True)

    # The following 'if' has a False in it to make it always fail. That's
    # because pysam issue 716 (see below) did not fix the problem as I had
    # hoped. Instead it throws an error if you pass a header that has a
    # modified SQ key with reference ids and there's a difference it
    # doesn't like. It's always safe to use the 'else' below, with the
    # slight downside being that its header will mention all sequence ids,
    # even if you only want a lesser number (via --referenceId). I'm
    # leaving the code here because this is how you would do it, and it
    help=('If given, information about reference sequence insertions will be '
          'printed to standard error. These correspond to "I" CIGAR '
          'operations that for the match would require inserting query bases '
          'into the reference. Because we cannot change the reference (in '
          'fact we typically do not have the reference in the SAM/BAM file), '
          'we cut the inserted bases out of the aligned query and save the '
          'information about what would have been inserted and where. That '
          'information is printed by this option. The output gives the '
          '0-based offset where the inserted base would be placed, followed '
          'by a list of the nucleotides that were suggested as being '
          'inserted and the number of times each nucleotide was suggested. '
          'So for example the output might contain "27: T:3, G:10" which '
          'indicates that 13 query (3 with T and 10 with G) matches would '
          'insert a nucleotide into the reference at offset 27.'))

SAMFilter.addFilteringOptions(parser)
addFASTAFilteringCommandLineOptions(parser)

args = parser.parse_args()
reads = parseFASTAFilteringCommandLineOptions(args, Reads())
samFilter = SAMFilter.parseFilteringOptions(
    args, filterRead=reads.filterRead)
paddedSAM = PaddedSAM(samFilter)

for read in paddedSAM.queries(rcSuffix=args.rcSuffix, rcNeeded=args.rcNeeded):
    print(read.toString('fasta'), end='')

if args.listReferenceInsertions:
    if paddedSAM.referenceInsertions:
        print('(0-based) insertions into the reference:\n%s' %
              nucleotidesToStr(paddedSAM.referenceInsertions, '  '),
              'intersecting the protein. Use this to prevent reads that '
              'just overlap the protein in a very small number offsets '
              'from being counted.'))

    parser.add_argument(
        '--skipTranslationChecks',
        dest='checkTranslations',
        action='store_false',
        default=True,
        help=('Skip the sanity check that database protein sequences can all '
              'be translated from the database genome sequence.'))

    addFASTAFilteringCommandLineOptions(parser)
    SAMFilter.addFilteringOptions(parser,
                                  samfileIsPositional=False,
                                  samfileAction='append',
                                  samfileNargs='*',
                                  samfileRequired=False)

    args = parser.parse_args()

    samfiles = list(chain.from_iterable(args.samfile)) if args.samfile else []

    if samfiles:
        if args.referenceId:
            referenceIds = args.referenceId
        else:
            # If all SAM files have just one reference and they're all the
            # same, use that. Else complain.
            referenceIds = set()
            for filename in samfiles: