コード例 #1
0
    def test_newline_decoder(self):
        import codecs
        decoder = codecs.getincrementaldecoder("utf-8")()
        decoder = io.IncrementalNewlineDecoder(decoder, translate=True)

        self.assertEquals(decoder.decode(b'\xe8\xa2\x88'), u"\u8888")

        self.assertEquals(decoder.decode(b'\xe8'), u"")
        self.assertEquals(decoder.decode(b'\xa2'), u"")
        self.assertEquals(decoder.decode(b'\x88'), u"\u8888")

        self.assertEquals(decoder.decode(b'\xe8'), u"")
        self.assertRaises(UnicodeDecodeError, decoder.decode, b'', final=True)

        decoder.setstate((b'', 0))
        self.assertEquals(decoder.decode(b'\n'), u"\n")
        self.assertEquals(decoder.decode(b'\r'), u"")
        self.assertEquals(decoder.decode(b'', final=True), u"\n")
        self.assertEquals(decoder.decode(b'\r', final=True), u"\n")

        self.assertEquals(decoder.decode(b'\r'), u"")
        self.assertEquals(decoder.decode(b'a'), u"\na")

        self.assertEquals(decoder.decode(b'\r\r\n'), u"\n\n")
        self.assertEquals(decoder.decode(b'\r'), u"")
        self.assertEquals(decoder.decode(b'\r'), u"\n")
        self.assertEquals(decoder.decode(b'\na'), u"\na")

        self.assertEquals(decoder.decode(b'\xe8\xa2\x88\r\n'), u"\u8888\n")
        self.assertEquals(decoder.decode(b'\xe8\xa2\x88'), u"\u8888")
        self.assertEquals(decoder.decode(b'\n'), u"\n")
        self.assertEquals(decoder.decode(b'\xe8\xa2\x88\r'), u"\u8888")
        self.assertEquals(decoder.decode(b'\n'), u"\n")

        decoder = codecs.getincrementaldecoder("utf-8")()
        decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
        self.assertEquals(decoder.newlines, None)
        decoder.decode(b"abc\n\r")
        self.assertEquals(decoder.newlines, u'\n')
        decoder.decode(b"\nabc")
        self.assertEquals(decoder.newlines, ('\n', '\r\n'))
        decoder.decode(b"abc\r")
        self.assertEquals(decoder.newlines, ('\n', '\r\n'))
        decoder.decode(b"abc")
        self.assertEquals(decoder.newlines, ('\r', '\n', '\r\n'))
        decoder.decode(b"abc\r")
        decoder.reset()
        self.assertEquals(decoder.decode(b"abc"), "abc")
        self.assertEquals(decoder.newlines, None)
コード例 #2
0
def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
    """Converts a bytes string with python source code to unicode.
    Unicode strings are passed through unchanged. Byte strings are checked
    for the python source file encoding cookie to determine encoding.
    txt can be either a bytes buffer or a string containing the source
    code.
    """
    if isinstance(txt, six.text_type):
        return txt
    if isinstance(txt, six.binary_type):
        buffer = io.BytesIO(txt)
    else:
        buffer = txt
    try:
        encoding, _ = detect_encoding(buffer.readline)
    except SyntaxError:
        encoding = "ascii"
    buffer.seek(0)

    newline_decoder = io.IncrementalNewlineDecoder(None, True)

    text = io.TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return u"".join(strip_encoding_cookie(text))
    else:
        return text.read()
コード例 #3
0
ファイル: abc.py プロジェクト: AntonKozlov/embox-1
    def get_source(self, fullname):
        """Concrete implementation of InspectLoader.get_source."""
        path = self.get_filename(fullname)
        try:
            source_bytes = self.get_data(path)
        except IOError:
            raise ImportError("source not available through get_data()")

        if py3k:
            import io, tokenize

            readsource = io.BytesIO(source_bytes).readline
            try:
                encoding = tokenize.detect_encoding(readsource)
            except SyntaxError as exc:
                raise ImportError("Failed to detect encoding")

            newline_decoder = io.IncrementalNewlineDecoder(None, True)
            try:
                return newline_decoder.decode(source_bytes.decode(encoding[0]))
            except UnicodeDecodeError as exc:
                raise ImportError("Failed to decode source file")

        else:
            return source_bytes  # XXX proper encoding
コード例 #4
0
 def test_universal_newlines(self):
     name = 'mod'
     mock = self.SourceOnlyLoaderMock('mod.file')
     source = 'x = 42\r\ny = -13\r\n'
     mock.source = source.encode('utf-8')
     expect = io.IncrementalNewlineDecoder(None, True).decode(source)
     self.assertEqual(mock.get_source(name), expect)
コード例 #5
0
 def test_universal_newlines(self):
     # PEP 302 says universal newlines should be used.
     name = 'mod'
     mock = SourceOnlyLoaderMock('mod.file')
     source = "x = 42\r\ny = -13\r\n"
     mock.source = source.encode('utf-8')
     expect = io.IncrementalNewlineDecoder(None, True).decode(source)
     self.assertEqual(mock.get_source(name), expect)
コード例 #6
0
def decode_source(source_bytes):
    """Decode bytes representing source code and return the string.
    Universal newline support is used in the decoding.
    """
    # source_bytes_readline = io.BytesIO(source_bytes).readline
    # encoding, _ = detect_encoding(source_bytes_readline)
    newline_decoder = io.IncrementalNewlineDecoder(None, True)
    return newline_decoder.decode(source_to_unicode(source_bytes))
コード例 #7
0
ファイル: test_io.py プロジェクト: PandoraClub/WizaTV
 def test_newline_decoder(self):
     encodings = (
         'utf-8',
         'latin-1',
         'utf-16',
         'utf-16-le',
         'utf-16-be',
         'utf-32',
         'utf-32-le',
         'utf-32-be',
     )
     for enc in encodings:
         decoder = codecs.getincrementaldecoder(enc)()
         decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
         self.check_newline_decoder(decoder, enc)
     decoder = codecs.getincrementaldecoder("utf-8")()
     decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
     self.check_newline_decoder_utf8(decoder)
コード例 #8
0
def decode_source(source_bytes: bytes) -> str:
    """Copied from importlib._bootstrap_external"""
    source_bytes_readline = io.BytesIO(source_bytes).readline
    encoding = tokenize.detect_encoding(source_bytes_readline)
    newline_decoder = io.IncrementalNewlineDecoder(None, True)
    return newline_decoder.decode(source_bytes.decode(encoding[0]))