def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space if pos < 0: pos = 0 if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): if self.is_known_bytes(): raise oefmt( space.w_TypeError, "can't use a bytes pattern on a string-like " "object") w_unicode_obj = space.convert_arg_to_w_unicode(w_string) utf8str = w_unicode_obj._utf8 length = w_unicode_obj._len() if pos <= 0: bytepos = 0 elif pos >= length: bytepos = len(utf8str) else: bytepos = w_unicode_obj._index_to_byte(pos) if endpos >= length: endbytepos = len(utf8str) else: endbytepos = w_unicode_obj._index_to_byte(endpos) if w_unicode_obj.is_ascii(): ctx = UnicodeAsciiMatchContext(utf8str, bytepos, endbytepos) else: ctx = rsre_utf8.Utf8MatchContext(utf8str, bytepos, endbytepos) # we store the w_string on the ctx too, for # W_SRE_Match.bytepos_to_charindex() ctx.w_unicode_obj = w_unicode_obj return ctx elif self.is_known_unicode(): raise oefmt(space.w_TypeError, "can't use a string pattern on a bytes-like " "object") elif space.isinstance_w(w_string, space.w_bytes): string = space.bytes_w(w_string) length = len(string) if pos > length: pos = length if endpos > length: endpos = length return rsre_core.StrMatchContext(string, pos, endpos) else: buf = space.readbuf_w(w_string) size = buf.getlength() assert size >= 0 if pos > size: pos = size if endpos > size: endpos = size return rsre_core.BufMatchContext(buf, pos, endpos)
def fresh_copy(self, ctx): if isinstance(ctx, rsre_utf8.Utf8MatchContext): result = rsre_utf8.Utf8MatchContext(ctx._utf8, ctx.match_start, ctx.end, ctx.flags) result.w_unicode_obj = ctx.w_unicode_obj elif isinstance(ctx, rsre_core.StrMatchContext): result = self._make_str_match_context(ctx._string, ctx.match_start, ctx.end) elif isinstance(ctx, rsre_core.BufMatchContext): result = rsre_core.BufMatchContext(ctx._buffer, ctx.match_start, ctx.end, ctx.flags) else: raise AssertionError("bad ctx type") result.match_end = ctx.match_end return result
def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space if pos < 0: pos = 0 if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): w_unicode_obj = space.convert_arg_to_w_unicode(w_string) utf8str = w_unicode_obj._utf8 length = w_unicode_obj._len() if pos <= 0: bytepos = 0 elif pos >= length: bytepos = len(utf8str) else: index_storage = w_unicode_obj._get_index_storage() bytepos = rutf8.codepoint_position_at_index( utf8str, index_storage, pos) if endpos >= length: endbytepos = len(utf8str) else: index_storage = w_unicode_obj._get_index_storage() endbytepos = rutf8.codepoint_position_at_index( utf8str, index_storage, endpos) ctx = rsre_utf8.Utf8MatchContext(utf8str, bytepos, endbytepos, self.flags) # xxx we store the w_string on the ctx too, for # W_SRE_Match.bytepos_to_charindex() ctx.w_unicode_obj = w_unicode_obj return ctx elif space.isinstance_w(w_string, space.w_bytes): str = space.bytes_w(w_string) if pos > len(str): pos = len(str) if endpos > len(str): endpos = len(str) return self._make_str_match_context(str, pos, endpos) else: buf = space.readbuf_w(w_string) size = buf.getlength() assert size >= 0 if pos > size: pos = size if endpos > size: endpos = size return rsre_core.BufMatchContext(buf, pos, endpos, self.flags)