def make_ctx(self, w_string, pos=0, endpos=sys.maxint, flags=0): """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for searching in the given w_string object.""" space = self.space length, unicodestr, string, buf = self.getstring(w_string) if pos < 0: pos = 0 elif pos > length: pos = length if endpos < pos: endpos = pos elif endpos > length: endpos = length flags = self.flags | flags # if unicodestr is not None: if self.is_known_bytes(): raise oefmt( space.w_TypeError, "can't use a bytes pattern on a string-like " "object") return rsre_core.UnicodeMatchContext(unicodestr, pos, endpos, flags) else: if self.is_known_unicode(): raise oefmt( space.w_TypeError, "can't use a string pattern on a bytes-like " "object") if string is not None: return rsre_core.StrMatchContext(string, pos, endpos, flags) else: return rsre_core.BufMatchContext(buf, pos, endpos, flags)
def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a BufMatchContext or a UnicodeMatchContext for searching in the given w_string object.""" space = self.space if pos < 0: pos = 0 if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): unicodestr = space.unicode_w(w_string) if not (space.is_none(self.w_pattern) or space.isinstance_w(self.w_pattern, space.w_unicode)): raise OperationError(space.w_TypeError, space.wrap( "can't use a string pattern on a bytes-like object")) if pos > len(unicodestr): pos = len(unicodestr) if endpos > len(unicodestr): endpos = len(unicodestr) return rsre_core.UnicodeMatchContext(self.code, unicodestr, pos, endpos, self.flags) else: buf = space.readbuf_w(w_string) if (not space.is_none(self.w_pattern) and space.isinstance_w(self.w_pattern, space.w_unicode)): raise OperationError(space.w_TypeError, space.wrap( "can't use a bytes pattern on a string-like object")) size = buf.getlength() assert size >= 0 if pos > size: pos = size if endpos > size: endpos = size return rsre_core.BufMatchContext(self.code, buf, pos, endpos, self.flags)
def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for searching in the given w_string object.""" space = self.space if pos < 0: pos = 0 if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): unicodestr = space.unicode_w(w_string) if pos > len(unicodestr): pos = len(unicodestr) if endpos > len(unicodestr): endpos = len(unicodestr) return rsre_core.UnicodeMatchContext(self.code, unicodestr, pos, endpos, self.flags) elif space.isinstance_w(w_string, space.w_str): str = space.str_w(w_string) if pos > len(str): pos = len(str) if endpos > len(str): endpos = len(str) return rsre_core.StrMatchContext(self.code, str, pos, endpos, self.flags) else: buf = space.readbuf_w(w_string) size = buf.getlength() assert size >= 0 if pos > size: pos = size if endpos > size: endpos = size return rsre_core.BufMatchContext(self.code, buf, pos, endpos, self.flags)
def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space if pos < 0: pos = 0 if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): if self.is_known_bytes(): raise oefmt( space.w_TypeError, "can't use a bytes pattern on a string-like " "object") w_unicode_obj = space.convert_arg_to_w_unicode(w_string) utf8str = w_unicode_obj._utf8 length = w_unicode_obj._len() if pos <= 0: bytepos = 0 elif pos >= length: bytepos = len(utf8str) else: bytepos = w_unicode_obj._index_to_byte(pos) if endpos >= length: endbytepos = len(utf8str) else: endbytepos = w_unicode_obj._index_to_byte(endpos) if w_unicode_obj.is_ascii(): ctx = UnicodeAsciiMatchContext(utf8str, bytepos, endbytepos) else: ctx = rsre_utf8.Utf8MatchContext(utf8str, bytepos, endbytepos) # we store the w_string on the ctx too, for # W_SRE_Match.bytepos_to_charindex() ctx.w_unicode_obj = w_unicode_obj return ctx elif self.is_known_unicode(): raise oefmt(space.w_TypeError, "can't use a string pattern on a bytes-like " "object") elif space.isinstance_w(w_string, space.w_bytes): string = space.bytes_w(w_string) length = len(string) if pos > length: pos = length if endpos > length: endpos = length return rsre_core.StrMatchContext(string, pos, endpos) else: buf = space.readbuf_w(w_string) size = buf.getlength() assert size >= 0 if pos > size: pos = size if endpos > size: endpos = size return rsre_core.BufMatchContext(buf, pos, endpos)
def fresh_copy(self, ctx): if isinstance(ctx, rsre_utf8.Utf8MatchContext): result = rsre_utf8.Utf8MatchContext(ctx._utf8, ctx.match_start, ctx.end, ctx.flags) result.w_unicode_obj = ctx.w_unicode_obj elif isinstance(ctx, rsre_core.StrMatchContext): result = self._make_str_match_context(ctx._string, ctx.match_start, ctx.end) elif isinstance(ctx, rsre_core.BufMatchContext): result = rsre_core.BufMatchContext(ctx._buffer, ctx.match_start, ctx.end, ctx.flags) else: raise AssertionError("bad ctx type") result.match_end = ctx.match_end return result
def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space if pos < 0: pos = 0 if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): w_unicode_obj = space.convert_arg_to_w_unicode(w_string) utf8str = w_unicode_obj._utf8 length = w_unicode_obj._len() if pos <= 0: bytepos = 0 elif pos >= length: bytepos = len(utf8str) else: index_storage = w_unicode_obj._get_index_storage() bytepos = rutf8.codepoint_position_at_index( utf8str, index_storage, pos) if endpos >= length: endbytepos = len(utf8str) else: index_storage = w_unicode_obj._get_index_storage() endbytepos = rutf8.codepoint_position_at_index( utf8str, index_storage, endpos) ctx = rsre_utf8.Utf8MatchContext(utf8str, bytepos, endbytepos, self.flags) # xxx we store the w_string on the ctx too, for # W_SRE_Match.bytepos_to_charindex() ctx.w_unicode_obj = w_unicode_obj return ctx elif space.isinstance_w(w_string, space.w_bytes): str = space.bytes_w(w_string) if pos > len(str): pos = len(str) if endpos > len(str): endpos = len(str) return self._make_str_match_context(str, pos, endpos) else: buf = space.readbuf_w(w_string) size = buf.getlength() assert size >= 0 if pos > size: pos = size if endpos > size: endpos = size return rsre_core.BufMatchContext(buf, pos, endpos, self.flags)
def match_port(self, w_port): self.ensure_compiled() if isinstance(w_port, values.W_StringInputPort): # fast path ctx = rsre_core.search(self.code, w_port.str, start=w_port.ptr) if not ctx: return None start, end = ctx.span(0) # the whole match w_port.ptr = end return _extract_result(ctx, self.groupcount) buf = PortBuffer(w_port) ctx = rsre_core.BufMatchContext(self.code, buf, 0, buf.getlength(), 0) matched = rsre_core.search_context(ctx) if not matched: return None return _extract_result(ctx, self.groupcount)
def _test_sre_ctx_buf_(self, str, start, end): # Test BufMatchContext. buf = StringBuffer(str) return rsre_core.BufMatchContext(buf, start, end)