Esempio n. 1
0
def _fasta_sniffer(fh):
    # Strategy:
    #   Ignore up to 5 blank/whitespace-only lines at the beginning of the
    #   file. Read up to 10 records. If at least one record is read (i.e.
    #   the file isn't empty) and no errors are thrown during reading, assume
    #   the file is in FASTA format. If a record appears to be QUAL, do *not*
    #   identify the file as FASTA since we don't want to sniff QUAL files as
    #   FASTA (technically they can be read as FASTA since the sequences may
    #   not be validated but it probably isn't what the user wanted). Also, if
    #   we add QUAL as its own file format in the future, we wouldn't want the
    #   FASTA and QUAL sniffers to both positively identify a QUAL file.
    if _too_many_blanks(fh, 5):
        return False, {}

    num_records = 10
    empty = True
    try:
        parser = _parse_fasta_raw(fh, _sniffer_data_parser, FASTAFormatError)
        for _ in zip(range(num_records), parser):
            empty = False
    except FASTAFormatError:
        return False, {}

    if empty:
        return False, {}
    else:
        return True, {}
Esempio n. 2
0
def _fasta_sniffer(fh):
    # Strategy:
    #   Ignore up to 5 blank/whitespace-only lines at the beginning of the
    #   file. Read up to 10 records. If at least one record is read (i.e.
    #   the file isn't empty) and no errors are thrown during reading, assume
    #   the file is in FASTA format. If a record appears to be QUAL, do *not*
    #   identify the file as FASTA since we don't want to sniff QUAL files as
    #   FASTA (technically they can be read as FASTA since the sequences may
    #   not be validated but it probably isn't what the user wanted). Also, if
    #   we add QUAL as its own file format in the future, we wouldn't want the
    #   FASTA and QUAL sniffers to both positively identify a QUAL file.
    if _too_many_blanks(fh, 5):
        return False, {}

    num_records = 10
    empty = True
    try:
        parser = _parse_fasta_raw(fh, _sniffer_data_parser, FASTAFormatError)
        for _ in zip(range(num_records), parser):
            empty = False
    except FASTAFormatError:
        return False, {}

    if empty:
        return False, {}
    else:
        return True, {}
Esempio n. 3
0
def _fasta_sniffer(fh):
    # Strategy:
    #   Ignore up to 5 blank/whitespace-only lines at the beginning of the
    #   file. Read up to 10 FASTA records. If at least one record is read (i.e.
    #   the file isn't empty) and no errors are thrown during reading, assume
    #   the file is in FASTA format. Next, try to parse the file as QUAL, which
    #   has stricter requirements. If this succeeds, do *not* identify the file
    #   as FASTA since we don't want to sniff QUAL files as FASTA (technically
    #   they can be read as FASTA since the sequences aren't validated but it
    #   probably isn't what the user wanted). Also, if we add QUAL as its own
    #   file format in the future, we wouldn't want the FASTA and QUAL sniffers
    #   to both identify a QUAL file.
    if _too_many_blanks(fh, 5):
        return False, {}

    num_records = 10
    try:
        not_empty = False
        for _ in zip(range(num_records), _fasta_to_generator(fh)):
            not_empty = True

        if not_empty:
            fh.seek(0)
            try:
                list(zip(range(num_records),
                         _parse_fasta_raw(fh, _parse_quality_scores, 'QUAL')))
            except FASTAFormatError:
                return True, {}
            else:
                return False, {}
        else:
            return False, {}
    except FASTAFormatError:
        return False, {}
Esempio n. 4
0
def _fastq_sniffer(fh):
    # Strategy:
    #   Ignore up to 5 blank/whitespace-only lines at the beginning of the
    #   file. Read up to 10 records. If at least one record is read (i.e. the
    #   file isn't empty) and the quality scores are in printable ASCII range,
    #   assume the file is FASTQ.
    if _too_many_blanks(fh, 5):
        return False, {}

    try:
        not_empty = False
        for _ in zip(range(10), _fastq_to_generator(fh, phred_offset=33)):
            not_empty = True
        return not_empty, {}
    except (FASTQFormatError, ValueError):
        return False, {}
Esempio n. 5
0
def _fastq_sniffer(fh):
    # Strategy:
    #   Ignore up to 5 blank/whitespace-only lines at the beginning of the
    #   file. Read up to 10 records. If at least one record is read (i.e. the
    #   file isn't empty) and the quality scores are in printable ASCII range,
    #   assume the file is FASTQ.
    if _too_many_blanks(fh, 5):
        return False, {}

    try:
        not_empty = False
        for _ in zip(range(10), _fastq_to_generator(fh, phred_offset=33)):
            not_empty = True
        return not_empty, {}
    except (FASTQFormatError, ValueError):
        return False, {}