예제 #1
0
def test_codepoint_index_at_byte_position(u):
    b = u.encode('utf8')
    storage = rutf8.create_utf8_index_storage(b, len(u))
    for i in range(len(u) + 1):
        bytepos = len(u[:i].encode('utf8'))
        assert rutf8.codepoint_index_at_byte_position(
                       b, storage, bytepos, len(u)) == i
예제 #2
0
def test_codepoint_position_at_index_inverse(u):
    print u
    b = u.encode('utf8')
    storage = rutf8.create_utf8_index_storage(b, len(u))
    for i in range(len(u) + 1):
        bytepos = rutf8.codepoint_position_at_index(b, storage, i)
        assert rutf8.codepoint_index_at_byte_position(
                       b, storage, bytepos, len(u)) == i
예제 #3
0
 def bytepos_to_charindex(self, bytepos):
     # Transform a 'byte position', as returned by all methods from
     # rsre_core, back into a 'character index'.  This is for UTF8
     # handling.
     ctx = self.ctx
     if isinstance(ctx, rsre_utf8.Utf8MatchContext):
         index_storage = ctx.w_unicode_obj._get_index_storage()
         return rutf8.codepoint_index_at_byte_position(
             ctx.w_unicode_obj._utf8, index_storage, bytepos)
     else:
         return bytepos