def _fasta_sniffer(fh): # Strategy: # Ignore up to 5 blank/whitespace-only lines at the beginning of the # file. Read up to 10 records. If at least one record is read (i.e. # the file isn't empty) and no errors are thrown during reading, assume # the file is in FASTA format. If a record appears to be QUAL, do *not* # identify the file as FASTA since we don't want to sniff QUAL files as # FASTA (technically they can be read as FASTA since the sequences may # not be validated but it probably isn't what the user wanted). Also, if # we add QUAL as its own file format in the future, we wouldn't want the # FASTA and QUAL sniffers to both positively identify a QUAL file. if _too_many_blanks(fh, 5): return False, {} num_records = 10 empty = True try: parser = _parse_fasta_raw(fh, _sniffer_data_parser, FASTAFormatError) for _ in zip(range(num_records), parser): empty = False except FASTAFormatError: return False, {} if empty: return False, {} else: return True, {}
def _fasta_sniffer(fh): # Strategy: # Ignore up to 5 blank/whitespace-only lines at the beginning of the # file. Read up to 10 FASTA records. If at least one record is read (i.e. # the file isn't empty) and no errors are thrown during reading, assume # the file is in FASTA format. Next, try to parse the file as QUAL, which # has stricter requirements. If this succeeds, do *not* identify the file # as FASTA since we don't want to sniff QUAL files as FASTA (technically # they can be read as FASTA since the sequences aren't validated but it # probably isn't what the user wanted). Also, if we add QUAL as its own # file format in the future, we wouldn't want the FASTA and QUAL sniffers # to both identify a QUAL file. if _too_many_blanks(fh, 5): return False, {} num_records = 10 try: not_empty = False for _ in zip(range(num_records), _fasta_to_generator(fh)): not_empty = True if not_empty: fh.seek(0) try: list(zip(range(num_records), _parse_fasta_raw(fh, _parse_quality_scores, 'QUAL'))) except FASTAFormatError: return True, {} else: return False, {} else: return False, {} except FASTAFormatError: return False, {}
def _fastq_sniffer(fh): # Strategy: # Ignore up to 5 blank/whitespace-only lines at the beginning of the # file. Read up to 10 records. If at least one record is read (i.e. the # file isn't empty) and the quality scores are in printable ASCII range, # assume the file is FASTQ. if _too_many_blanks(fh, 5): return False, {} try: not_empty = False for _ in zip(range(10), _fastq_to_generator(fh, phred_offset=33)): not_empty = True return not_empty, {} except (FASTQFormatError, ValueError): return False, {}