'quality strings on a single TAB-separated line to stdout.')) parser.add_argument('--separator', default='\t', help=('The character string to separate ids from ' 'sequences and quality strings (if any)')) parser.add_argument('--removeIds', default=False, action='store_true', help='Do not print sequence ids') addFASTACommandLineOptions(parser) args = parser.parse_args() sep = args.separator reads = parseFASTACommandLineOptions(args) # Duplicate code a little so as not to repeatedly do two tests per read. if args.fastq: if args.removeIds: for read in reads: print(read.sequence + sep + read.quality) else: for read in reads: print(read.id + sep + read.sequence + sep + read.quality) else: if args.removeIds: for read in reads: print(read.sequence) else: for read in reads:
'be ignored. The sites must be given in the form e.g., ' '24,100-200,260.')) parser.add_argument( '--showDiffs', default=False, action='store_true', help='Print (1-based) sites where the sequence nucleotides differ.') addFASTACommandLineOptions(parser) args = parser.parse_args() keepSequences = set([args.index1 - 1, args.index2 - 1]) reads = list( parseFASTACommandLineOptions(args).filter(keepSequences=keepSequences)) if len(reads) == 1: if len(keepSequences) == 1: # This is ok, they want to compare a sequence with itself. reads = Reads([reads[0], reads[0]]) else: print('Could not find both requested sequence indices. Exiting.') sys.exit(1) elif len(reads) != 2: print('Could not find both requested sequence indices. Exiting.') sys.exit(1) if args.alignmentFile: args.align = True
parser = argparse.ArgumentParser( description=( 'Given FASTA on stdin, write the ids, sequences, and ' 'quality strings on a single TAB-separated line to stdout.')) parser.add_argument('--separator', default='\t', help=('The character string to separate ids from ' 'sequences and quality strings (if any)')) parser.add_argument('--removeIds', default=False, action='store_true', help='Do not print sequence ids') addFASTACommandLineOptions(parser) args = parser.parse_args() sep = args.separator reads = parseFASTACommandLineOptions(args) # Duplicate code a little so as not to repeatedly do two tests for read. if args.fastq: if args.removeIds: for read in reads: print(read.sequence + sep + read.quality) else: for read in reads: print(read.id + sep + read.sequence + sep + read.quality) else: if args.removeIds: for read in reads: print(read.sequence) else: for read in reads:
parser.add_argument('--force', default=False, action='store_true', help='If given, overwrite pre-existing files.') parser.add_argument( '--saveAs', choices=('fasta', 'fastq', 'fasta-ss'), help=('The output format. The default is to match the input format, ' 'so there is usually no need to specify this option. It can be ' 'used to force conversion from FASTQ to FASTA')) addFASTACommandLineOptions(parser) args = parser.parse_args() reads = parseFASTACommandLineOptions(args) if not exists(args.outDir): mkdir(args.outDir) saveAs = (args.saveAs or (args.fasta and 'fasta') or (args.fastq and 'fastq') or (args.fasta_ss and 'fasta-ss')) # Note: we may be reading the FASTA input from stdin, so we cannot read it # more than once (and I don't want to store it all because it may be very # large). That's why we do a second phase of processing to renumber the # files we created if --numeric is used (and --noLeadingZeroes is not). count = 0 for count, read in enumerate(parseFASTACommandLineOptions(args), start=1): id_ = read.id.split()[0]
parser.add_argument( '--checkResultCount', type=int, help=('The number of reads expected in the output. If this number is ' 'not seen, the script exits with status 1 and an error ' 'message is printed unless --quiet was used.')) addFASTACommandLineOptions(parser) addFASTAFilteringCommandLineOptions(parser) addFASTAEditingCommandLineOptions(parser) args = parser.parse_args() reads = parseFASTAEditingCommandLineOptions( args, parseFASTAFilteringCommandLineOptions( args, parseFASTACommandLineOptions(args))) saveAs = ( args.saveAs or (args.fasta and 'fasta') or (args.fastq and 'fastq') or (args.fasta_ss and 'fasta-ss')) # Check for incompatible read/write formats. We can't write FASTQ # unless we have FASTQ on input (else we won't have quality information), # and we can't write PDB FASTA with secondary structure information # unless we have that on input. if saveAs == 'fastq' and not args.fastq: raise ValueError( 'You have specified --saveAs fastq without using --fastq ' 'to indicate that the input is FASTQ. Please be explicit.')
'start of this sequence will be counted and offsets will be ' 'incremented by that amount (and lower offsets in sequences ' 'that start before the specified sequence will be ignored).')) parser.add_argument( '--unknownAreAmbiguous', action='store_true', default=False, help=("Any unknown character (e.g., a '-' gap or '?' unknown base) " "will be treated as being fully ambiguous (i.e., could be any " "of ACGT). Otherwise, all unknown characters are counted " "as '-' characters.")) addFASTACommandLineOptions(parser) args = parser.parse_args() reads = Reads(list(parseFASTACommandLineOptions(args))) if not reads: sys.exit(0) if args.reference: for read in reads: if read.id == args.reference: break else: print('Could not find --reference sequence %r.' % args.reference, file=sys.stderr) sys.exit(1) baseOffset = len(read.sequence) - len(read.sequence.lstrip('-')) reference = read else:
parser.add_argument( '--sites', help=('Specify (1-based) sequence sites to keep. All other sites will ' 'be ignored. The sites must be given in the form e.g., ' '24,100-200,260.')) parser.add_argument( '--showDiffs', default=False, action='store_true', help='Print (1-based) sites where the sequence nucleotides differ.') addFASTACommandLineOptions(parser) args = parser.parse_args() keepSequences = set([args.index1 - 1, args.index2 - 1]) reads = list(parseFASTACommandLineOptions(args).filter( keepSequences=keepSequences)) if len(reads) == 1: if len(keepSequences) == 1: # This is ok, they want to compare a sequence with itself. reads = Reads([reads[0], reads[0]]) else: print('Could not find both requested sequence indices. Exiting.') sys.exit(1) elif len(reads) != 2: print('Could not find both requested sequence indices. Exiting.') sys.exit(1) if args.alignmentFile: args.align = True
'--noLeadingZeroes', default=False, action='store_true', help='If given, numeric filenames will not have leading zeroes.') parser.add_argument( '--force', default=False, action='store_true', help='If given, overwrite pre-existing files.') parser.add_argument( '--saveAs', choices=('fasta', 'fastq', 'fasta-ss'), help=('The output format. The default is to match the input format, ' 'so there is usually no need to specify this option. It can be ' 'used to force conversion from FASTQ to FASTA')) addFASTACommandLineOptions(parser) args = parser.parse_args() reads = parseFASTACommandLineOptions(args) if not exists(args.outDir): mkdir(args.outDir) saveAs = ( args.saveAs or (args.fasta and 'fasta') or (args.fastq and 'fastq') or (args.fasta_ss and 'fasta-ss')) # Note: we may be reading the FASTA input from stdin, so we cannot read it # more than once (and I don't want to store it all because it may be very # large). That's why we do a second phase of processing to renumber the # files we created if --numeric is used (and --noLeadingZeroes is not).
help='If given, numeric filenames will not have leading zeroes.') parser.add_argument('--force', action='store_true', help='If given, overwrite pre-existing files.') parser.add_argument( '--saveAs', choices=('fasta', 'fastq', 'fasta-ss'), help=('The output format. The default is to match the input format, ' 'so there is usually no need to specify this option. It can be ' 'used to force conversion from FASTQ to FASTA')) addFASTACommandLineOptions(parser) args = parser.parse_args() reads = parseFASTACommandLineOptions(args) if not exists(args.outDir): mkdir(args.outDir) saveAs = (args.saveAs or (args.fasta and 'fasta') or (args.fastq and 'fastq') or (args.fasta_ss and 'fasta-ss')) # Note: we may be reading the FASTA input from stdin, so we cannot read it # more than once (and I don't want to store it all because it may be very # large). That's why we do a second phase of processing to renumber the # files we created (if --noLeadingZeroes is not used). outDir = Path(args.outDir) count = 0
'--checkResultCount', type=int, help=('The number of reads expected in the output. If this number is ' 'not seen, the script exits with status 1 and an error ' 'message is printed unless --quiet was used.')) addFASTACommandLineOptions(parser) addFASTAFilteringCommandLineOptions(parser) addFASTAEditingCommandLineOptions(parser) args = parser.parse_args() reads = parseFASTAEditingCommandLineOptions( args, parseFASTAFilteringCommandLineOptions( args, parseFASTACommandLineOptions(args))) saveAs = (args.saveAs or (args.fasta and 'fasta') or (args.fastq and 'fastq') or (args.fasta_ss and 'fasta-ss')) # Check for incompatible read/write formats. We can't write FASTQ # unless we have FASTQ on input (else we won't have quality information), # and we can't write PDB FASTA with secondary structure information # unless we have that on input. if saveAs == 'fastq' and not args.fastq: raise ValueError( 'You have specified --saveAs fastq without using --fastq ' 'to indicate that the input is FASTQ. Please be explicit.') elif saveAs == 'fasta-ss' and not args.fasta_ss: raise ValueError( 'You have specified --saveAs fasta-ss without using --fasta-ss '