def __init__(self, pattern, flags=0): """ Constructor - see, the signature is the same as of re.compile that can be very useful to retain API compatibility. Note, the flags aren't yet implemented - REG_EXTENDED is used for everything instead. """ if isinstance(pattern, basestring): string_type, reg_function = _get_specialized('comp', pattern) else: raise TypeError("first argument must be string or unicode") # the real compiled regex - a regex_t instance self.preg = byref(regex_t()) pattern_buffer = (string_type * len(pattern))() pattern_buffer.value = pattern result = reg_function(self.preg, pattern_buffer, len(pattern), REG_EXTENDED) if reg_errcode_t[result] != 'REG_OK': if reg_errcode_t[result] in ('REG_EBRACK', 'REG_EPAREN', 'REG_EBRACE'): raise sre_constants.error("unbalanced parenthesis") else: raise sre_constants.error('Parse error, symbol %s code %d' % (reg_errcode_t[result], result)) # how much memory to reserve # refer to the re_nsub field of the regex_t self.match_buffers = self.preg._obj.re_nsub + 1
def finditer(self, string): """Returns an iterator with all matches""" pmatch = (regmatch_t * self.match_buffers)() nmatch = c_size_t(self.match_buffers) # get the proper types and functions for the string string_type, reg_function = _get_specialized('exec', string) string_buffer = (string_type * len(string))() string_buffer.value = string # loop until no matches are found (REG_NOMATCH) while True: result = reg_function(self.preg, string_buffer, len(string), nmatch, pmatch, 0) if reg_errcode_t[result] == 'REG_NOMATCH': raise StopIteration elif reg_errcode_t[result] != 'REG_OK': raise sre_constants.error('Exec error') for match in pmatch: yield string[match.rm_so:match.rm_eo] # move string offset string = string[match.rm_eo:] string_buffer = (string_type * len(string))() string_buffer.value = string