Example #1
0
 def __init__(self, errors='strict', encoding=None,):
     assert encoding
     self.encoding = encoding
     self.errors = errors
     self.decoder = Charset.forName(self.encoding).newDecoder()
     self.output_buffer = CharBuffer.allocate(1024)
     self.buffer = ''
Example #2
0
 def __init__(self, errors='strict', encoding=None,):
     assert encoding
     self.encoding = encoding
     self.errors = errors
     self.decoder = Charset.forName(self.encoding).newDecoder()
     self.output_buffer = CharBuffer.allocate(1024)
     self.buffer = ''
Example #3
0
    def decode(self, input, errors='strict', final=True):
        error_function = codecs.lookup_error(errors)
        input_buffer = ByteBuffer.wrap(array('b', input))
        decoder = Charset.forName(self.encoding).newDecoder()
        output_buffer = CharBuffer.allocate(min(max(int(len(input) / 2), 256), 1024))
        builder = StringBuilder(int(decoder.averageCharsPerByte() * len(input)))

        while True:
            result = decoder.decode(input_buffer, output_buffer, False)
            pos = output_buffer.position()
            output_buffer.rewind()
            builder.append(output_buffer.subSequence(0, pos))
            if result.isUnderflow():
                if final:
                    _process_incomplete_decode(self.encoding, input, error_function, input_buffer, builder)
                break
            _process_decode_errors(self.encoding, input, result, error_function, input_buffer, builder)

        return builder.toString(), input_buffer.position()
Example #4
0
    def encode(self, input, final=False):
        error_function = codecs.lookup_error(self.errors)
        # workaround non-BMP issues - need to get the exact count of chars, not codepoints
        input_buffer = CharBuffer.allocate(StringBuilder(input).length())
        input_buffer.put(input)
        input_buffer.rewind()
        self.output_buffer.rewind()
        builder = StringIO()

        while True:
            result = self.encoder.encode(input_buffer, self.output_buffer, final)
            pos = self.output_buffer.position()
            self.output_buffer.rewind()
            builder.write(self.output_buffer.array()[0:pos].tostring())
            if result.isUnderflow():
                break
            _process_encode_errors(self.encoding, input, result, error_function, input_buffer, builder)

        return builder.getvalue()
Example #5
0
    def decode(self, input, errors='strict', final=True):
        error_function = codecs.lookup_error(errors)
        input_buffer = ByteBuffer.wrap(array('b', input))
        decoder = Charset.forName(self.encoding).newDecoder()
        output_buffer = CharBuffer.allocate(min(max(int(len(input) / 2), 256), 1024))
        builder = StringBuilder(int(decoder.averageCharsPerByte() * len(input)))

        while True:
            result = decoder.decode(input_buffer, output_buffer, False)
            pos = output_buffer.position()
            output_buffer.rewind()
            builder.append(output_buffer.subSequence(0, pos))
            if result.isUnderflow():
                if final:
                    _process_incomplete_decode(self.encoding, input, error_function, input_buffer, builder)
                break
            _process_decode_errors(self.encoding, input, result, error_function, input_buffer, builder)

        return builder.toString(), input_buffer.position()
Example #6
0
    def encode(self, input, final=False):
        error_function = codecs.lookup_error(self.errors)
        # workaround non-BMP issues - need to get the exact count of chars, not codepoints
        input_buffer = CharBuffer.allocate(StringBuilder(input).length())
        input_buffer.put(input)
        input_buffer.rewind()
        self.output_buffer.rewind()
        builder = StringIO()

        while True:
            result = self.encoder.encode(input_buffer, self.output_buffer, final)
            pos = self.output_buffer.position()
            self.output_buffer.rewind()
            builder.write(self.output_buffer.array()[0:pos].tostring())
            if result.isUnderflow():
                break
            _process_encode_errors(self.encoding, input, result, error_function, input_buffer, builder)

        return builder.getvalue()
Example #7
0
    def encode(self, input, errors='strict'):
        error_function = codecs.lookup_error(errors)
        # workaround non-BMP issues - need to get the exact count of chars, not codepoints
        input_buffer = CharBuffer.allocate(StringBuilder(input).length())
        input_buffer.put(input)
        input_buffer.rewind()
        encoder = Charset.forName(self.encoding).newEncoder()
        output_buffer = ByteBuffer.allocate(min(max(len(input) * 2, 256), 1024))
        builder = StringIO()

        while True:
            result = encoder.encode(input_buffer, output_buffer, True)
            pos = output_buffer.position()
            output_buffer.rewind()
            builder.write(output_buffer.array()[0:pos].tostring())
            if result.isUnderflow():
                break
            _process_encode_errors(self.encoding, input, result, error_function, input_buffer, builder)

        return builder.getvalue(), len(input)
Example #8
0
    def encode(self, input, errors='strict'):
        error_function = codecs.lookup_error(errors)
        # workaround non-BMP issues - need to get the exact count of chars, not codepoints
        input_buffer = CharBuffer.allocate(StringBuilder(input).length())
        input_buffer.put(input)
        input_buffer.rewind()
        encoder = Charset.forName(self.encoding).newEncoder()
        output_buffer = ByteBuffer.allocate(min(max(len(input) * 2, 256), 1024))
        builder = StringIO()

        while True:
            result = encoder.encode(input_buffer, output_buffer, True)
            pos = output_buffer.position()
            output_buffer.rewind()
            builder.write(output_buffer.array()[0:pos].tostring())
            if result.isUnderflow():
                break
            _process_encode_errors(self.encoding, input, result, error_function, input_buffer, builder)

        return builder.getvalue(), len(input)