Ejemplo n.º 1
0
        'quality strings on a single TAB-separated line to stdout.'))

    parser.add_argument('--separator',
                        default='\t',
                        help=('The character string to separate ids from '
                              'sequences and quality strings (if any)'))

    parser.add_argument('--removeIds',
                        default=False,
                        action='store_true',
                        help='Do not print sequence ids')

    addFASTACommandLineOptions(parser)
    args = parser.parse_args()
    sep = args.separator
    reads = parseFASTACommandLineOptions(args)

    # Duplicate code a little so as not to repeatedly do two tests per read.
    if args.fastq:
        if args.removeIds:
            for read in reads:
                print(read.sequence + sep + read.quality)
        else:
            for read in reads:
                print(read.id + sep + read.sequence + sep + read.quality)
    else:
        if args.removeIds:
            for read in reads:
                print(read.sequence)
        else:
            for read in reads:
Ejemplo n.º 2
0
          'be ignored. The sites must be given in the form e.g., '
          '24,100-200,260.'))

parser.add_argument(
    '--showDiffs',
    default=False,
    action='store_true',
    help='Print (1-based) sites where the sequence nucleotides differ.')

addFASTACommandLineOptions(parser)
args = parser.parse_args()

keepSequences = set([args.index1 - 1, args.index2 - 1])

reads = list(
    parseFASTACommandLineOptions(args).filter(keepSequences=keepSequences))

if len(reads) == 1:
    if len(keepSequences) == 1:
        # This is ok, they want to compare a sequence with itself.
        reads = Reads([reads[0], reads[0]])
    else:
        print('Could not find both requested sequence indices. Exiting.')
        sys.exit(1)
elif len(reads) != 2:
    print('Could not find both requested sequence indices. Exiting.')
    sys.exit(1)

if args.alignmentFile:
    args.align = True
Ejemplo n.º 3
0
    parser = argparse.ArgumentParser(
        description=(
            'Given FASTA on stdin, write the ids, sequences, and '
            'quality strings on a single TAB-separated line to stdout.'))

    parser.add_argument('--separator', default='\t',
                        help=('The character string to separate ids from '
                              'sequences and quality strings (if any)'))

    parser.add_argument('--removeIds', default=False, action='store_true',
                        help='Do not print sequence ids')

    addFASTACommandLineOptions(parser)
    args = parser.parse_args()
    sep = args.separator
    reads = parseFASTACommandLineOptions(args)

    # Duplicate code a little so as not to repeatedly do two tests for read.
    if args.fastq:
        if args.removeIds:
            for read in reads:
                print(read.sequence + sep + read.quality)
        else:
            for read in reads:
                print(read.id + sep + read.sequence + sep + read.quality)
    else:
        if args.removeIds:
            for read in reads:
                print(read.sequence)
        else:
            for read in reads:
Ejemplo n.º 4
0
parser.add_argument('--force',
                    default=False,
                    action='store_true',
                    help='If given, overwrite pre-existing files.')

parser.add_argument(
    '--saveAs',
    choices=('fasta', 'fastq', 'fasta-ss'),
    help=('The output format. The default is to match the input format, '
          'so there is usually no need to specify this option. It can be '
          'used to force conversion from FASTQ to FASTA'))

addFASTACommandLineOptions(parser)
args = parser.parse_args()
reads = parseFASTACommandLineOptions(args)

if not exists(args.outDir):
    mkdir(args.outDir)

saveAs = (args.saveAs or (args.fasta and 'fasta') or (args.fastq and 'fastq')
          or (args.fasta_ss and 'fasta-ss'))

# Note: we may be reading the FASTA input from stdin, so we cannot read it
# more than once (and I don't want to store it all because it may be very
# large). That's why we do a second phase of processing to renumber the
# files we created if --numeric is used (and --noLeadingZeroes is not).

count = 0
for count, read in enumerate(parseFASTACommandLineOptions(args), start=1):
    id_ = read.id.split()[0]
Ejemplo n.º 5
0
    parser.add_argument(
        '--checkResultCount', type=int,
        help=('The number of reads expected in the output. If this number is '
              'not seen, the script exits with status 1 and an error '
              'message is printed unless --quiet was used.'))

    addFASTACommandLineOptions(parser)
    addFASTAFilteringCommandLineOptions(parser)
    addFASTAEditingCommandLineOptions(parser)

    args = parser.parse_args()

    reads = parseFASTAEditingCommandLineOptions(
        args, parseFASTAFilteringCommandLineOptions(
            args, parseFASTACommandLineOptions(args)))

    saveAs = (
        args.saveAs or
        (args.fasta and 'fasta') or
        (args.fastq and 'fastq') or
        (args.fasta_ss and 'fasta-ss'))

    # Check for incompatible read/write formats. We can't write FASTQ
    # unless we have FASTQ on input (else we won't have quality information),
    # and we can't write PDB FASTA with secondary structure information
    # unless we have that on input.
    if saveAs == 'fastq' and not args.fastq:
        raise ValueError(
            'You have specified --saveAs fastq without using --fastq '
            'to indicate that the input is FASTQ. Please be explicit.')
Ejemplo n.º 6
0
              'start of this sequence will be counted and offsets will be '
              'incremented by that amount (and lower offsets in sequences '
              'that start before the specified sequence will be ignored).'))

    parser.add_argument(
        '--unknownAreAmbiguous',
        action='store_true',
        default=False,
        help=("Any unknown character (e.g., a '-' gap or '?' unknown base) "
              "will be treated as being fully ambiguous (i.e., could be any "
              "of ACGT). Otherwise, all unknown characters are counted "
              "as '-' characters."))

    addFASTACommandLineOptions(parser)
    args = parser.parse_args()
    reads = Reads(list(parseFASTACommandLineOptions(args)))

    if not reads:
        sys.exit(0)

    if args.reference:
        for read in reads:
            if read.id == args.reference:
                break
        else:
            print('Could not find --reference sequence %r.' % args.reference,
                  file=sys.stderr)
            sys.exit(1)
        baseOffset = len(read.sequence) - len(read.sequence.lstrip('-'))
        reference = read
    else:
Ejemplo n.º 7
0
parser.add_argument(
    '--sites',
    help=('Specify (1-based) sequence sites to keep. All other sites will '
          'be ignored. The sites must be given in the form e.g., '
          '24,100-200,260.'))

parser.add_argument(
    '--showDiffs', default=False, action='store_true',
    help='Print (1-based) sites where the sequence nucleotides differ.')

addFASTACommandLineOptions(parser)
args = parser.parse_args()

keepSequences = set([args.index1 - 1, args.index2 - 1])

reads = list(parseFASTACommandLineOptions(args).filter(
    keepSequences=keepSequences))

if len(reads) == 1:
    if len(keepSequences) == 1:
        # This is ok, they want to compare a sequence with itself.
        reads = Reads([reads[0], reads[0]])
    else:
        print('Could not find both requested sequence indices. Exiting.')
        sys.exit(1)
elif len(reads) != 2:
    print('Could not find both requested sequence indices. Exiting.')
    sys.exit(1)

if args.alignmentFile:
    args.align = True
Ejemplo n.º 8
0
    '--noLeadingZeroes', default=False, action='store_true',
    help='If given, numeric filenames will not have leading zeroes.')

parser.add_argument(
    '--force', default=False, action='store_true',
    help='If given, overwrite pre-existing files.')

parser.add_argument(
    '--saveAs', choices=('fasta', 'fastq', 'fasta-ss'),
    help=('The output format. The default is to match the input format, '
          'so there is usually no need to specify this option. It can be '
          'used to force conversion from FASTQ to FASTA'))

addFASTACommandLineOptions(parser)
args = parser.parse_args()
reads = parseFASTACommandLineOptions(args)

if not exists(args.outDir):
    mkdir(args.outDir)

saveAs = (
    args.saveAs or
    (args.fasta and 'fasta') or
    (args.fastq and 'fastq') or
    (args.fasta_ss and 'fasta-ss'))

# Note: we may be reading the FASTA input from stdin, so we cannot read it
# more than once (and I don't want to store it all because it may be very
# large). That's why we do a second phase of processing to renumber the
# files we created if --numeric is used (and --noLeadingZeroes is not).
Ejemplo n.º 9
0
    help='If given, numeric filenames will not have leading zeroes.')

parser.add_argument('--force',
                    action='store_true',
                    help='If given, overwrite pre-existing files.')

parser.add_argument(
    '--saveAs',
    choices=('fasta', 'fastq', 'fasta-ss'),
    help=('The output format. The default is to match the input format, '
          'so there is usually no need to specify this option. It can be '
          'used to force conversion from FASTQ to FASTA'))

addFASTACommandLineOptions(parser)
args = parser.parse_args()
reads = parseFASTACommandLineOptions(args)

if not exists(args.outDir):
    mkdir(args.outDir)

saveAs = (args.saveAs or (args.fasta and 'fasta') or (args.fastq and 'fastq')
          or (args.fasta_ss and 'fasta-ss'))

# Note: we may be reading the FASTA input from stdin, so we cannot read it
# more than once (and I don't want to store it all because it may be very
# large). That's why we do a second phase of processing to renumber the
# files we created (if --noLeadingZeroes is not used).

outDir = Path(args.outDir)

count = 0
Ejemplo n.º 10
0
        '--checkResultCount',
        type=int,
        help=('The number of reads expected in the output. If this number is '
              'not seen, the script exits with status 1 and an error '
              'message is printed unless --quiet was used.'))

    addFASTACommandLineOptions(parser)
    addFASTAFilteringCommandLineOptions(parser)
    addFASTAEditingCommandLineOptions(parser)

    args = parser.parse_args()

    reads = parseFASTAEditingCommandLineOptions(
        args,
        parseFASTAFilteringCommandLineOptions(
            args, parseFASTACommandLineOptions(args)))

    saveAs = (args.saveAs or (args.fasta and 'fasta')
              or (args.fastq and 'fastq') or (args.fasta_ss and 'fasta-ss'))

    # Check for incompatible read/write formats. We can't write FASTQ
    # unless we have FASTQ on input (else we won't have quality information),
    # and we can't write PDB FASTA with secondary structure information
    # unless we have that on input.
    if saveAs == 'fastq' and not args.fastq:
        raise ValueError(
            'You have specified --saveAs fastq without using --fastq '
            'to indicate that the input is FASTQ. Please be explicit.')
    elif saveAs == 'fasta-ss' and not args.fasta_ss:
        raise ValueError(
            'You have specified --saveAs fasta-ss without using --fasta-ss '