Ejemplo n.º 1
0
          'any filtering option (other than --referenceId) you also specify '
          'that is provided by the SAMFilter.addFilteringOptions will be '
          'silently ignored!'))

args = parser.parse_args()

if args.noOffsets and args.noStats:
    print(
        'You have used both --noOffsets and --noStats, so there is no '
        'output!',
        file=sys.stderr)
    sys.exit(1)

# We don't have a file of reads, we just want a read filter that we can use
# to filter the SAM file query sequences and to get reference lengths from.
reads = parseFASTAFilteringCommandLineOptions(args, Reads())
samFilter = SAMFilter.parseFilteringOptions(args, reads.filterRead)

printOffsets = not args.noOffsets
printStats = not args.noStats

if samFilter.referenceIds and len(samFilter.referenceIds) > 1:
    print(
        'Only one reference id can be given. To calculate coverage for more '
        'than one reference, run this script multiple times.',
        file=sys.stderr)
    sys.exit(1)

try:
    referenceLengths = samFilter.referenceLengths()
except UnknownReference:
Ejemplo n.º 2
0
          'fact we typically do not have the reference in the SAM/BAM file), '
          'we cut the inserted bases out of the aligned query and save the '
          'information about what would have been inserted and where. That '
          'information is printed by this option. The output gives the '
          '0-based offset where the inserted base would be placed, followed '
          'by a list of the nucleotides that were suggested as being '
          'inserted and the number of times each nucleotide was suggested. '
          'So for example the output might contain "27: T:3, G:10" which '
          'indicates that 13 query (3 with T and 10 with G) matches would '
          'insert a nucleotide into the reference at offset 27.'))

SAMFilter.addFilteringOptions(parser)
addFASTAFilteringCommandLineOptions(parser)

args = parser.parse_args()
reads = parseFASTAFilteringCommandLineOptions(args, Reads())
samFilter = SAMFilter.parseFilteringOptions(
    args, filterRead=reads.filterRead)
paddedSAM = PaddedSAM(samFilter)

for read in paddedSAM.queries(rcSuffix=args.rcSuffix, rcNeeded=args.rcNeeded):
    print(read.toString('fasta'), end='')

if args.listReferenceInsertions:
    if paddedSAM.referenceInsertions:
        print('(0-based) insertions into the reference:\n%s' %
              nucleotidesToStr(paddedSAM.referenceInsertions, '  '),
              file=sys.stderr)
    else:
        print('No matches required an insertion into the reference.',
              file=sys.stderr)
Ejemplo n.º 3
0
              'used to force conversion from FASTQ to FASTA'))

    parser.add_argument(
        '--checkResultCount', type=int,
        help=('The number of reads expected in the output. If this number is '
              'not seen, the script exits with status 1 and an error '
              'message is printed unless --quiet was used.'))

    addFASTACommandLineOptions(parser)
    addFASTAFilteringCommandLineOptions(parser)
    addFASTAEditingCommandLineOptions(parser)

    args = parser.parse_args()

    reads = parseFASTAEditingCommandLineOptions(
        args, parseFASTAFilteringCommandLineOptions(
            args, parseFASTACommandLineOptions(args)))

    saveAs = (
        args.saveAs or
        (args.fasta and 'fasta') or
        (args.fastq and 'fastq') or
        (args.fasta_ss and 'fasta-ss'))

    # Check for incompatible read/write formats. We can't write FASTQ
    # unless we have FASTQ on input (else we won't have quality information),
    # and we can't write PDB FASTA with secondary structure information
    # unless we have that on input.
    if saveAs == 'fastq' and not args.fastq:
        raise ValueError(
            'You have specified --saveAs fastq without using --fastq '
            'to indicate that the input is FASTQ. Please be explicit.')
Ejemplo n.º 4
0
    parser.add_argument(
        '--checkResultCount',
        type=int,
        help=('The number of reads expected in the output. If this number is '
              'not seen, the script exits with status 1 and an error '
              'message is printed unless --quiet was used.'))

    addFASTACommandLineOptions(parser)
    addFASTAFilteringCommandLineOptions(parser)
    addFASTAEditingCommandLineOptions(parser)

    args = parser.parse_args()

    reads = parseFASTAEditingCommandLineOptions(
        args,
        parseFASTAFilteringCommandLineOptions(
            args, parseFASTACommandLineOptions(args)))

    saveAs = (args.saveAs or (args.fasta and 'fasta')
              or (args.fastq and 'fastq') or (args.fasta_ss and 'fasta-ss'))

    # Check for incompatible read/write formats. We can't write FASTQ
    # unless we have FASTQ on input (else we won't have quality information),
    # and we can't write PDB FASTA with secondary structure information
    # unless we have that on input.
    if saveAs == 'fastq' and not args.fastq:
        raise ValueError(
            'You have specified --saveAs fastq without using --fastq '
            'to indicate that the input is FASTQ. Please be explicit.')
    elif saveAs == 'fasta-ss' and not args.fasta_ss:
        raise ValueError(
            'You have specified --saveAs fasta-ss without using --fasta-ss '