Example #1
0
def utf9encode(string):
    """Takes a string and returns a utf9-encoded version."""
    bits = _bitarray()
    for char in string:
        for idx, byte in enumerate(char.encode('utf-8')):
            bits.append(idx)
            bits.extend('{0:b}'.format(ord(byte)).zfill(8))
    return bits.tobytes()
Example #2
0
def utf9encode(string):
    """Takes a string and returns a utf9-encoded version."""
    bits = _bitarray()
    for char in string:
        for idx, byte in enumerate(char.encode('utf-8')):
            bits.append(idx)
            bits.extend('{0:b}'.format(ord(byte)).zfill(8))
    return bits.tobytes()
Example #3
0
def utf9decode(data):
    """Takes utf9-encoded data and returns the corresponding string."""
    bits = _bitarray()
    bits.frombytes(data)
    chunks = (bits[x:x + 9] for x in xrange(0, len(bits), 9))
    string = u''
    codepoint = ''
    for chunk in chunks:
        if len(chunk) < 9:
            break
        if chunk[0] == 0:
            codepoint, string = '', string + codepoint.decode('utf-8')
        codepoint += chr(int(chunk[1:].to01(), 2))
    return string + codepoint.decode('utf-8')
Example #4
0
def utf9decode(data):
    """Takes utf9-encoded data and returns the corresponding string."""
    bits = _bitarray()
    bits.frombytes(data)
    chunks = (bits[x:x+9] for x in xrange(0, len(bits), 9))
    string = u''
    codepoint = ''
    for chunk in chunks:
        if len(chunk) < 9:
            break
        if chunk[0] == 0:
            codepoint, string = '', string + codepoint.decode('utf-8')
        codepoint += chr(int(chunk[1:].to01(), 2))
    return string + codepoint.decode('utf-8')