Beispiel #1
0
    def test_sanger_variant(self):
        # test entire range of possible ascii chars for sanger
        all_sanger_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP'
                            'QRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
        obs = _decode_qual_to_phred(all_sanger_ascii, variant='sanger')
        npt.assert_equal(obs, np.arange(94))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('a b', variant='sanger')
        self.assertIn('[0, 93]', str(cm.exception))
Beispiel #2
0
    def test_sanger_variant(self):
        # test entire range of possible ascii chars for sanger
        all_sanger_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP'
                            'QRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
        obs = _decode_qual_to_phred(all_sanger_ascii, variant='sanger')
        npt.assert_equal(obs, np.arange(94))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('a b', variant='sanger')
        self.assertIn('[0, 93]', str(cm.exception))
Beispiel #3
0
    def test_illumina18_variant(self):
        # test entire range of possible ascii chars for illumina1.8
        all_illumina18_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL'
                                'MNOPQRSTUVWXYZ[\\]^_')
        obs = _decode_qual_to_phred(all_illumina18_ascii,
                                    variant='illumina1.8')
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('AaB', variant='illumina1.8')
        self.assertIn('[0, 62]', str(cm.exception))
Beispiel #4
0
    def test_illumina13_variant(self):
        # test entire range of possible ascii chars for illumina1.3
        all_illumina13_ascii = ('@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijk'
                                'lmnopqrstuvwxyz{|}~')
        obs = _decode_qual_to_phred(all_illumina13_ascii,
                                    variant='illumina1.3')
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('a!b', variant='illumina1.3')
        self.assertIn('[0, 62]', str(cm.exception))
Beispiel #5
0
    def test_illumina13_variant(self):
        # test entire range of possible ascii chars for illumina1.3
        all_illumina13_ascii = ('@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijk'
                                'lmnopqrstuvwxyz{|}~')
        obs = _decode_qual_to_phred(all_illumina13_ascii,
                                    variant='illumina1.3')
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('a!b', variant='illumina1.3')
        self.assertIn('[0, 62]', str(cm.exception))
Beispiel #6
0
    def test_illumina18_variant(self):
        # test entire range of possible ascii chars for illumina1.8
        all_illumina18_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL'
                                'MNOPQRSTUVWXYZ[\\]^_')
        obs = _decode_qual_to_phred(all_illumina18_ascii,
                                    variant='illumina1.8')
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('AaB', variant='illumina1.8')
        self.assertIn('[0, 62]', str(cm.exception))
Beispiel #7
0
def _parse_quality_scores(fh, seq_len, variant, phred_offset, prev):
    phred_scores = []
    qual_len = 0
    for chunk in _line_generator(fh, skip_blanks=False):
        if chunk:
            if chunk.startswith('@') and qual_len == seq_len:
                return np.hstack(phred_scores), chunk
            else:
                if not prev:
                    _blank_error("after '+' or within quality scores")
                qual_len += len(chunk)

                if qual_len > seq_len:
                    raise FASTQFormatError(
                        "Found more quality score characters than sequence "
                        "characters. Extra quality score characters: %r" %
                        chunk[-(qual_len - seq_len):])

                phred_scores.append(
                    _decode_qual_to_phred(chunk, variant=variant,
                                          phred_offset=phred_offset))
        prev = chunk

    if qual_len != seq_len:
        raise FASTQFormatError(
            "Found incomplete/truncated FASTQ record at end of file.")
    return np.hstack(phred_scores), None
Beispiel #8
0
def _qseq_to_generator(fh,
                       constructor=Sequence,
                       filter=_will_filter,
                       phred_offset=_default_phred_offset,
                       variant=_default_variant,
                       **kwargs):
    for line in fh:
        (machine_name, run, lane, tile, x, y, index, read, seq, raw_qual,
         filtered) = _record_parser(line)
        if not filter or not filtered:
            phred = _decode_qual_to_phred(raw_qual, variant, phred_offset)
            seq_id = '%s_%s:%s:%s:%s:%s#%s/%s' % (machine_name, run, lane,
                                                  tile, x, y, index, read)
            yield constructor(seq,
                              metadata={
                                  'id': seq_id,
                                  'machine_name': machine_name,
                                  'run_number': int(run),
                                  'lane_number': int(lane),
                                  'tile_number': int(tile),
                                  'x': int(x),
                                  'y': int(y),
                                  'index': int(index),
                                  'read_number': int(read)
                              },
                              positional_metadata={'quality': phred},
                              **kwargs)
Beispiel #9
0
def _parse_quality_scores(fh, seq_len, variant, phred_offset, prev):
    phred_scores = []
    qual_len = 0
    for chunk in _line_generator(fh, skip_blanks=False):
        if chunk:
            if chunk.startswith('@') and qual_len == seq_len:
                return np.hstack(phred_scores), chunk
            else:
                if not prev:
                    _blank_error("after '+' or within quality scores")
                qual_len += len(chunk)

                if qual_len > seq_len:
                    raise FASTQFormatError(
                        "Found more quality score characters than sequence "
                        "characters. Extra quality score characters: %r" %
                        chunk[-(qual_len - seq_len):])

                phred_scores.append(
                    _decode_qual_to_phred(chunk, variant=variant,
                                          phred_offset=phred_offset))
        prev = chunk

    if qual_len != seq_len:
        raise FASTQFormatError(
            "Found incomplete/truncated FASTQ record at end of file.")
    return np.hstack(phred_scores), None
Beispiel #10
0
def _qseq_to_generator(fh, constructor=Sequence, filter=_will_filter,
                       phred_offset=_default_phred_offset,
                       variant=_default_variant):
    for line in fh:
        (machine_name, run, lane, tile, x, y, index, read, seq, raw_qual,
         filtered) = _record_parser(line)
        if not filter or not filtered:
            phred = _decode_qual_to_phred(raw_qual, variant, phred_offset)
            seq_id = '%s_%s:%s:%s:%s:%s#%s/%s' % (
                machine_name, run, lane, tile, x, y, index, read)
            yield constructor(seq, quality=phred, id=seq_id)
Beispiel #11
0
def _qseq_to_generator(fh, constructor=BiologicalSequence, filter=_will_filter,
                       phred_offset=_default_phred_offset,
                       variant=_default_variant):
    for line in fh:
        (machine_name, run, lane, tile, x, y, index, read, seq, raw_qual,
         filtered) = _record_parser(line)
        if not filter or not filtered:
            phred = _decode_qual_to_phred(raw_qual, variant, phred_offset)
            seq_id = '%s_%s:%s:%s:%s:%s#%s/%s' % (
                machine_name, run, lane, tile, x, y, index, read)
            yield constructor(seq, quality=phred, id=seq_id)
Beispiel #12
0
    def test_custom_phred_offset(self):
        ascii_chars = '*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\'
        obs = _decode_qual_to_phred(ascii_chars, phred_offset=42)
        npt.assert_equal(obs, np.arange(51))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=43)
        self.assertIn('[0, 83]', str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=0)
        self.assertIn('`phred_offset`', str(cm.exception))
        self.assertIn('printable', str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=127)
        self.assertIn('`phred_offset`', str(cm.exception))
        self.assertIn('printable', str(cm.exception))
Beispiel #13
0
    def test_custom_phred_offset(self):
        ascii_chars = '*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\'
        obs = _decode_qual_to_phred(ascii_chars, phred_offset=42)
        npt.assert_equal(obs, np.arange(51))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=43)
        self.assertIn('[0, 83]', str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=0)
        self.assertIn('`phred_offset`', str(cm.exception))
        self.assertIn('printable', str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=127)
        self.assertIn('`phred_offset`', str(cm.exception))
        self.assertIn('printable', str(cm.exception))
Beispiel #14
0
def _parse_quality_scores(fh, seq_len, variant, phred_offset):
    phred_scores = []
    qual_len = 0
    for chunk in _line_generator(fh):
        if chunk.startswith('@') and qual_len == seq_len:
            return phred_scores, chunk
        else:
            qual_len += len(chunk)

            if qual_len > seq_len:
                raise FASTQFormatError(
                    "Found more quality score characters than sequence "
                    "characters. Extra quality score characters: %r" %
                    chunk[-(qual_len - seq_len):])

            phred_scores.extend(
                _decode_qual_to_phred(chunk, variant=variant,
                                      phred_offset=phred_offset))

    if qual_len != seq_len:
        raise FASTQFormatError(
            "Found incomplete/truncated FASTQ record at end of file.")
    return phred_scores, None
Beispiel #15
0
def _parse_quality_scores(fh, seq_len, variant, phred_offset):
    phred_scores = []
    qual_len = 0
    for chunk in _line_generator(fh):
        if chunk.startswith('@') and qual_len == seq_len:
            return phred_scores, chunk
        else:
            qual_len += len(chunk)

            if qual_len > seq_len:
                raise FASTQFormatError(
                    "Found more quality score characters than sequence "
                    "characters. Extra quality score characters: %r" %
                    chunk[-(qual_len - seq_len):])

            phred_scores.extend(
                _decode_qual_to_phred(chunk,
                                      variant=variant,
                                      phred_offset=phred_offset))

    if qual_len != seq_len:
        raise FASTQFormatError(
            "Found incomplete/truncated FASTQ record at end of file.")
    return phred_scores, None
Beispiel #16
0
 def test_solexa_variant(self):
     with self.assertRaises(NotImplementedError) as cm:
         _decode_qual_to_phred('abcd', variant='solexa')
     self.assertIn('719', str(cm.exception))
Beispiel #17
0
 def test_empty_qual_str(self):
     self.assertEqual(_decode_qual_to_phred('', variant='sanger'), [])
Beispiel #18
0
 def test_empty_qual_str(self):
     self.assertEqual(_decode_qual_to_phred('', variant='sanger'), [])
Beispiel #19
0
 def test_empty_qual_str(self):
     npt.assert_equal(_decode_qual_to_phred('', variant='sanger'),
                      np.array([], dtype=np.uint8))
Beispiel #20
0
 def test_unrecognized_variant(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='illumina')
     self.assertIn('variant', str(cm.exception))
     self.assertIn("'illumina'", str(cm.exception))
Beispiel #21
0
 def test_solexa_variant(self):
     with self.assertRaises(NotImplementedError) as cm:
         _decode_qual_to_phred('abcd', variant='solexa')
     self.assertIn('719', str(cm.exception))
Beispiel #22
0
 def test_variant_and_phred_offset_provided(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='sanger', phred_offset=64)
     self.assertIn('both', str(cm.exception))
     self.assertIn('`variant`', str(cm.exception))
     self.assertIn('`phred_offset`', str(cm.exception))
Beispiel #23
0
 def test_missing_variant_and_phred_offset(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd')
     self.assertIn('`variant`', str(cm.exception))
     self.assertIn('`phred_offset`', str(cm.exception))
     self.assertIn('decode', str(cm.exception))
Beispiel #24
0
 def test_missing_variant_and_phred_offset(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd')
     self.assertIn('`variant`', str(cm.exception))
     self.assertIn('`phred_offset`', str(cm.exception))
     self.assertIn('decode', str(cm.exception))
Beispiel #25
0
 def test_empty_qual_str(self):
     npt.assert_equal(_decode_qual_to_phred('', variant='sanger'),
                      np.array([], dtype=np.uint8))
Beispiel #26
0
 def test_unrecognized_variant(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='illumina')
     self.assertIn('variant', str(cm.exception))
     self.assertIn("'illumina'", str(cm.exception))
Beispiel #27
0
 def test_variant_and_phred_offset_provided(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='sanger', phred_offset=64)
     self.assertIn('both', str(cm.exception))
     self.assertIn('`variant`', str(cm.exception))
     self.assertIn('`phred_offset`', str(cm.exception))