Example #1
0
 def make_ctx(self, w_string, pos=0, endpos=sys.maxint):
     """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for
     searching in the given w_string object."""
     space = self.space
     if pos < 0:
         pos = 0
     if endpos < pos:
         endpos = pos
     if space.isinstance_w(w_string, space.w_unicode):
         w_unicode_obj = space.convert_arg_to_w_unicode(w_string)
         utf8str = w_unicode_obj._utf8
         length = w_unicode_obj._len()
         if pos <= 0:
             bytepos = 0
         elif pos >= length:
             bytepos = len(utf8str)
         else:
             index_storage = w_unicode_obj._get_index_storage()
             bytepos = rutf8.codepoint_position_at_index(
                 utf8str, index_storage, pos)
         if endpos >= length:
             endbytepos = len(utf8str)
         else:
             index_storage = w_unicode_obj._get_index_storage()
             endbytepos = rutf8.codepoint_position_at_index(
                 utf8str, index_storage, endpos)
         ctx = rsre_utf8.Utf8MatchContext(utf8str, bytepos, endbytepos,
                                          self.flags)
         # xxx we store the w_string on the ctx too, for
         # W_SRE_Match.bytepos_to_charindex()
         ctx.w_unicode_obj = w_unicode_obj
         return ctx
     elif space.isinstance_w(w_string, space.w_bytes):
         str = space.bytes_w(w_string)
         if pos > len(str):
             pos = len(str)
         if endpos > len(str):
             endpos = len(str)
         return self._make_str_match_context(str, pos, endpos)
     else:
         buf = space.readbuf_w(w_string)
         size = buf.getlength()
         assert size >= 0
         if pos > size:
             pos = size
         if endpos > size:
             endpos = size
         return rsre_core.BufMatchContext(buf, pos, endpos, self.flags)
Example #2
0
def test_codepoint_position_at_index_inverse(u):
    print u
    b = u.encode('utf8')
    storage = rutf8.create_utf8_index_storage(b, len(u))
    for i in range(len(u) + 1):
        bytepos = rutf8.codepoint_position_at_index(b, storage, i)
        assert rutf8.codepoint_index_at_byte_position(
                       b, storage, bytepos, len(u)) == i
Example #3
0
def test_codepoint_position_at_index(u):
    index = rutf8.create_utf8_index_storage(u.encode('utf8'), len(u))
    for i in range(len(u) + 1):
        assert (rutf8.codepoint_position_at_index(u.encode('utf8'), index, i) ==
                len(u[:i].encode('utf8')))