Ejemplo n.º 1
0
def addCommandLineOptions(parser, outfileDefaultName=None):
    """
    Add standard command-line options to an argument parser.

    @param parser: An C{ArgumentParser} instance.
    @param outfileDefaultName: The C{str} output file to use as a default
        in case the user does not give one on the command line.
    """

    addCommonOptions(parser)
    SAMFilter.addFilteringOptions(parser)

    parser.add_argument('--outfile',
                        default=outfileDefaultName,
                        help='The filename to store the resulting HTML.')

    parser.add_argument('--show',
                        action='store_true',
                        default=False,
                        help='If specified, show the figure interactively.')
Ejemplo n.º 2
0
from collections import Counter
from numpy import std

from dark.filter import (addFASTAFilteringCommandLineOptions,
                         parseFASTAFilteringCommandLineOptions)
from dark.reads import Reads
from dark.sam import samfile, SAMFilter, samReferences, UnknownReference
from dark.utils import baseCountsToStr, pct

parser = argparse.ArgumentParser(
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    description=('Print SAM/BAM file coverage statistics by offset. '
                 'Output lines show the offset.'))

addFASTAFilteringCommandLineOptions(parser)
SAMFilter.addFilteringOptions(parser, samfileIsPositional=True)

parser.add_argument('--noOffsets',
                    default=False,
                    action='store_true',
                    help='Do not print per-offset details of base counts.')

parser.add_argument(
    '--noStats',
    default=False,
    action='store_true',
    help='Do not print final average and standard deviation statistics.')

parser.add_argument(
    '--noFilter',
    default=False,
Ejemplo n.º 3
0
    parser.add_argument('--bam',
                        action='store_const',
                        const='b',
                        default='',
                        help='If given, write (gzip compressed) BAM output.')

    parser.add_argument(
        '--checkResultCount',
        type=int,
        help=('The number of alignments expected in the output. If this '
              'number is not seen, the script exits with status 1 (and an '
              'error message is also printed, unless --quiet was used).'))

    addFASTAFilteringCommandLineOptions(parser)
    SAMFilter.addFilteringOptions(parser)

    args = parser.parse_args()
    reads = parseFASTAFilteringCommandLineOptions(args, Reads())
    samFilter = SAMFilter.parseFilteringOptions(args,
                                                reads.filterRead,
                                                storeQueryIds=True)

    # The following 'if' has a False in it to make it always fail. That's
    # because pysam issue 716 (see below) did not fix the problem as I had
    # hoped. Instead it throws an error if you pass a header that has a
    # modified SQ key with reference ids and there's a difference it
    # doesn't like. It's always safe to use the 'else' below, with the
    # slight downside being that its header will mention all sequence ids,
    # even if you only want a lesser number (via --referenceId). I'm
    # leaving the code here because this is how you would do it, and it
Ejemplo n.º 4
0
    help=('If given, information about reference sequence insertions will be '
          'printed to standard error. These correspond to "I" CIGAR '
          'operations that for the match would require inserting query bases '
          'into the reference. Because we cannot change the reference (in '
          'fact we typically do not have the reference in the SAM/BAM file), '
          'we cut the inserted bases out of the aligned query and save the '
          'information about what would have been inserted and where. That '
          'information is printed by this option. The output gives the '
          '0-based offset where the inserted base would be placed, followed '
          'by a list of the nucleotides that were suggested as being '
          'inserted and the number of times each nucleotide was suggested. '
          'So for example the output might contain "27: T:3, G:10" which '
          'indicates that 13 query (3 with T and 10 with G) matches would '
          'insert a nucleotide into the reference at offset 27.'))

SAMFilter.addFilteringOptions(parser)
addFASTAFilteringCommandLineOptions(parser)

args = parser.parse_args()
reads = parseFASTAFilteringCommandLineOptions(args, Reads())
samFilter = SAMFilter.parseFilteringOptions(
    args, filterRead=reads.filterRead)
paddedSAM = PaddedSAM(samFilter)

for read in paddedSAM.queries(rcSuffix=args.rcSuffix, rcNeeded=args.rcNeeded):
    print(read.toString('fasta'), end='')

if args.listReferenceInsertions:
    if paddedSAM.referenceInsertions:
        print('(0-based) insertions into the reference:\n%s' %
              nucleotidesToStr(paddedSAM.referenceInsertions, '  '),
Ejemplo n.º 5
0
              'intersecting the protein. Use this to prevent reads that '
              'just overlap the protein in a very small number offsets '
              'from being counted.'))

    parser.add_argument(
        '--skipTranslationChecks',
        dest='checkTranslations',
        action='store_false',
        default=True,
        help=('Skip the sanity check that database protein sequences can all '
              'be translated from the database genome sequence.'))

    addFASTAFilteringCommandLineOptions(parser)
    SAMFilter.addFilteringOptions(parser,
                                  samfileIsPositional=False,
                                  samfileAction='append',
                                  samfileNargs='*',
                                  samfileRequired=False)

    args = parser.parse_args()

    samfiles = list(chain.from_iterable(args.samfile)) if args.samfile else []

    if samfiles:
        if args.referenceId:
            referenceIds = args.referenceId
        else:
            # If all SAM files have just one reference and they're all the
            # same, use that. Else complain.
            referenceIds = set()
            for filename in samfiles: