예제 #1
0
def _fastq_to_generator(fh, variant=None, phred_offset=None,
                        constructor=Sequence, **kwargs):
    # Skip any blank or whitespace-only lines at beginning of file
    seq_header = next(_line_generator(fh, skip_blanks=True))

    if not seq_header.startswith('@'):
        raise FASTQFormatError(
            "Expected sequence (@) header line at start of file: %r"
            % str(seq_header))

    while seq_header is not None:
        id_, desc = _parse_fasta_like_header(seq_header)
        seq, qual_header = _parse_sequence_data(fh, seq_header)

        if qual_header != '+' and qual_header[1:] != seq_header[1:]:
            raise FASTQFormatError(
                "Sequence (@) and quality (+) header lines do not match: "
                "%r != %r" % (str(seq_header[1:]), str(qual_header[1:])))

        phred_scores, seq_header = _parse_quality_scores(fh, len(seq),
                                                         variant,
                                                         phred_offset,
                                                         qual_header)
        yield constructor(seq, metadata={'id': id_, 'description': desc},
                          positional_metadata={'quality': phred_scores},
                          **kwargs)
예제 #2
0
def _fastq_to_generator(fh,
                        variant=None,
                        phred_offset=None,
                        constructor=Sequence,
                        **kwargs):
    # Skip any blank or whitespace-only lines at beginning of file
    seq_header = next(_line_generator(fh, skip_blanks=True))

    if not seq_header.startswith('@'):
        raise FASTQFormatError(
            "Expected sequence (@) header line at start of file: %r" %
            str(seq_header))

    while seq_header is not None:
        id_, desc = _parse_fasta_like_header(seq_header)
        seq, qual_header = _parse_sequence_data(fh, seq_header)

        if qual_header != '+' and qual_header[1:] != seq_header[1:]:
            raise FASTQFormatError(
                "Sequence (@) and quality (+) header lines do not match: "
                "%r != %r" % (str(seq_header[1:]), str(qual_header[1:])))

        phred_scores, seq_header = _parse_quality_scores(
            fh, len(seq), variant, phred_offset, qual_header)
        yield constructor(seq,
                          metadata={
                              'id': id_,
                              'description': desc
                          },
                          positional_metadata={'quality': phred_scores},
                          **kwargs)
예제 #3
0
def _parse_fasta_raw(fh, data_parser, error_type):
    """Raw parser for FASTA or QUAL files.

    Returns raw values (seq/qual, id, description). It is the responsibility of
    the caller to construct the correct in-memory object to hold the data.

    """
    # Skip any blank or whitespace-only lines at beginning of file
    try:
        seq_header = next(_line_generator(fh, skip_blanks=True))
    except StopIteration:
        return

    # header check inlined here and below for performance
    if seq_header.startswith('>'):
        id_, desc = _parse_fasta_like_header(seq_header)
    else:
        raise error_type(
            "Found non-header line when attempting to read the 1st record:"
            "\n%s" % seq_header)

    data_chunks = []
    prev = seq_header
    for line in _line_generator(fh, skip_blanks=False):
        if line.startswith('>'):
            # new header, so yield current record and reset state
            yield data_parser(data_chunks), id_, desc
            data_chunks = []
            id_, desc = _parse_fasta_like_header(line)
        else:
            if line:
                # ensure no blank lines within a single record
                if not prev:
                    raise error_type(
                        "Found blank or whitespace-only line within record.")
                data_chunks.append(line)
        prev = line
    # yield last record in file
    yield data_parser(data_chunks), id_, desc
예제 #4
0
def _parse_fasta_raw(fh, data_parser, error_type):
    """Raw parser for FASTA or QUAL files.

    Returns raw values (seq/qual, id, description). It is the responsibility of
    the caller to construct the correct in-memory object to hold the data.

    """
    # Skip any blank or whitespace-only lines at beginning of file
    try:
        seq_header = next(_line_generator(fh, skip_blanks=True))
    except StopIteration:
        return

    # header check inlined here and below for performance
    if seq_header.startswith('>'):
        id_, desc = _parse_fasta_like_header(seq_header)
    else:
        raise error_type(
            "Found non-header line when attempting to read the 1st record:"
            "\n%s" % seq_header)

    data_chunks = []
    prev = seq_header
    for line in _line_generator(fh, skip_blanks=False):
        if line.startswith('>'):
            # new header, so yield current record and reset state
            yield data_parser(data_chunks), id_, desc
            data_chunks = []
            id_, desc = _parse_fasta_like_header(line)
        else:
            if line:
                # ensure no blank lines within a single record
                if not prev:
                    raise error_type(
                        "Found blank or whitespace-only line within record.")
                data_chunks.append(line)
        prev = line
    # yield last record in file
    yield data_parser(data_chunks), id_, desc
예제 #5
0
 def test_id_and_description(self):
     obs = _parse_fasta_like_header('>!thus  suht! \t\t  \n')
     self.assertEqual(obs, ('!thus', 'suht!'))
예제 #6
0
 def test_description_only(self):
     obs = _parse_fasta_like_header('> suht! \t\t  \n')
     self.assertEqual(obs, ('', 'suht!'))
예제 #7
0
 def test_id_only(self):
     obs = _parse_fasta_like_header('>suht! \t\t  \n')
     self.assertEqual(obs, ('suht!', ''))
예제 #8
0
 def test_no_id_or_description(self):
     obs = _parse_fasta_like_header('> \t\t  \n')
     self.assertEqual(obs, ('', ''))
예제 #9
0
 def test_id_and_description(self):
     obs = _parse_fasta_like_header(">!thus  suht! \t\t  \n")
     self.assertEqual(obs, ("!thus", "suht!"))
예제 #10
0
 def test_description_only(self):
     obs = _parse_fasta_like_header("> suht! \t\t  \n")
     self.assertEqual(obs, ("", "suht!"))
예제 #11
0
 def test_id_only(self):
     obs = _parse_fasta_like_header(">suht! \t\t  \n")
     self.assertEqual(obs, ("suht!", ""))
예제 #12
0
 def test_no_id_or_description(self):
     obs = _parse_fasta_like_header("> \t\t  \n")
     self.assertEqual(obs, ("", ""))
예제 #13
0
 def test_id_and_description(self):
     obs = _parse_fasta_like_header('>!thus  suht! \t\t  \n')
     self.assertEqual(obs, ('!thus', 'suht!'))
예제 #14
0
 def test_description_only(self):
     obs = _parse_fasta_like_header('> suht! \t\t  \n')
     self.assertEqual(obs, ('', 'suht!'))
예제 #15
0
 def test_id_only(self):
     obs = _parse_fasta_like_header('>suht! \t\t  \n')
     self.assertEqual(obs, ('suht!', ''))
예제 #16
0
 def test_no_id_or_description(self):
     obs = _parse_fasta_like_header('> \t\t  \n')
     self.assertEqual(obs, ('', ''))