def compile_regexp_to_noncapturing(pattern, flags=0):
    """
    Convert all grouping parentheses in the given regexp pattern to
    non-capturing groups, and return the result.  E.g.:

        >>> from nltk.internals import compile_regexp_to_noncapturing
        >>> compile_regexp_to_noncapturing('ab(c(x+)(z*))?d')
        'ab(?:c(?:x+)(?:z*))?d'

    :type pattern: str
    :rtype: str
    """
    def convert_regexp_to_noncapturing_parsed(parsed_pattern):
        res_data = []
        for key, value in parsed_pattern.data:
            if key == sre_constants.SUBPATTERN:
                index, subpattern = value
                value = (None, convert_regexp_to_noncapturing(subpattern))
            elif key == sre_constants.GROUPREF:
                raise ValueError('Regular expressions with back-references are not supported: {0}'.format(pattern))
            res_data.append((key, value))
        parsed_pattern.data = res_data
        parsed_pattern.pattern.groups = 1
        parsed_pattern.pattern.groupdict = {}
        return parsed_pattern

    return sre_compile.compile(convert_regexp_to_noncapturing_parsed(sre_parse.parse(pattern)))
Example #2
0
 def __init__(self, lexicons, init_state=None, flags=0):
     # All the regexp magic below is copied from re.Scanner from
     # the standard library.
     import sre_compile
     import sre_parse
     from sre_constants import BRANCH, SUBPATTERN
     if init_state is None:
         init_state = State()
     if not hasattr(init_state, 'start'):
         init_state.start = None
     self.init_state = init_state
     self.lexicons = lexicons
     self.scanners = {}
     for start, lexicon in lexicons.iteritems():
         # combine phrases into a compound pattern
         p, a = [], []
         s = sre_parse.Pattern()
         s.flags = flags
         for phrase, action in lexicon:
             p.append(sre_parse.SubPattern(s, [
                         (SUBPATTERN, (len(p)+1,
                                       sre_parse.parse(phrase, flags))),
                         ]))
             a.append(action)
         p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
         s.groups = len(p)
         self.scanners[start] = sre_compile.compile(p).match, a
Example #3
0
    def __init__(self, lexicon, flags=FLAGS):
        self.actions = [None]
        # combine phrases into a compound pattern
        s = sre_parse.Pattern()
        s.flags = flags
        p = []
        # NOTE(kgibbs): These lines must be added to make this file work under
        # Python 2.2, which is commonly used at Google.
        def enumerate(obj):
            i = -1
            for item in obj:
                i += 1
                yield i, item
        # NOTE(kgibbs): End changes.
        for idx, token in enumerate(lexicon):
            phrase = token.pattern
            try:
                subpattern = sre_parse.SubPattern(s,
                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
            except sre_constants.error:
                raise
            p.append(subpattern)
            self.actions.append(token)

        s.groups = len(p)+1  # NOTE(guido): Added to make SRE validation work
        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
        self.scanner = sre_compile.compile(p)
Example #4
0
def _compile(*key):
    pattern, flags = key
    bypass_cache = flags & DEBUG
    if not bypass_cache:
        cachekey = (type(key[0]),) + key
        p = _cache.get(cachekey)
        if p is not None:
            return p
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError("Cannot process flags argument with a compiled pattern")
        return pattern
    else:
        if not sre_compile.isstring(pattern):
            raise TypeError, "first argument must be string or compiled pattern"
        try:
            p = sre_compile.compile(pattern, flags)
        except error as v:
            raise error, v

        if not bypass_cache:
            if len(_cache) >= _MAXCACHE:
                _cache.clear()
            _cache[cachekey] = p
        return p
Example #5
0
def _compile(pattern, flags):
    # internal: compile pattern
    if isinstance(flags, RegexFlag):
        flags = flags.value
    try:
        return _cache[type(pattern), pattern, flags]
    except KeyError:
        pass
    if isinstance(pattern, Pattern):
        if flags:
            raise ValueError(
                "cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    p = sre_compile.compile(pattern, flags)
    if not (flags & DEBUG):
        if len(_cache) >= _MAXCACHE:
            # Drop the oldest item
            try:
                del _cache[next(iter(_cache))]
            except (StopIteration, RuntimeError, KeyError):
                pass
        _cache[type(pattern), pattern, flags] = p
    return p
Example #6
0
    def _compile(*key):
        # internal: compile pattern
        taint = _get_taint(key[0])
        if taint is not None: # can't hash the set
            taint = tuple(taint)
        cachekey = (type(key[0]), key, taint)
        p = re._cache.get(cachekey)
        if p is not None:
            return p
        pattern, flags = key
        if isinstance(pattern, re._pattern_type):
            if flags:
                raise ValueError("Cannot process flags argument with"
                                 " a compiled pattern")
            return pattern
        if not sre_compile.isstring(pattern):
            raise TypeError("first argument must be string or compiled"
                            " pattern")

        p = sre_compile.compile(pattern, flags)

        if len(re._cache) >= re._MAXCACHE:
            re._cache.clear()
        re._cache[cachekey] = p
        return p
Example #7
0
def _compile(pattern, flags):
    # internal: compile pattern
    try:
        p, loc = _cache[type(pattern), pattern, flags]
        if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
            return p
    except KeyError:
        pass
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError(
                "cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    p = sre_compile.compile(pattern, flags)
    if not (flags & DEBUG):
        if len(_cache) >= _MAXCACHE:
            _cache.clear()
        if p.flags & LOCALE:
            if not _locale:
                return p
            loc = _locale.setlocale(_locale.LC_CTYPE)
        else:
            loc = None
        _cache[type(pattern), pattern, flags] = p, loc
    return p
Example #8
0
 def __init__(self, runtimePath) :
     self.availRuleNames = []
     basePath = os.path.join(runtimePath, "rules")
     ruleFiles = os.listdir(basePath)
     rulePattern = sre_compile.compile("^(.*)\.py$")
     for eachRuleFile in ruleFiles :  
         if os.path.isfile(os.path.join(basePath, eachRuleFile)) :
             ruleMatch = rulePattern.match(eachRuleFile)
             if ruleMatch != None and eachRuleFile.find("__init__") == -1 :
                 ruleName = ruleMatch.group(1)
                 self.availRuleNames.append(ruleName)
     self.availRuleCount = len(self.availRuleNames)
     self.availRuleModules = {}
     self.loadedRule = []  
     self.rules = []
     self.preprocessRules = []
     self.functionNameRules = []
     self.functionScopeRules = []       
     self.typeNameRules = []    
     self.typeScopeRules = []
     self.lineRules = []    
     self.fileEndRules = []    
     self.fileStartRules = []
     self.projectRules = [] 
     self.rollBackImporter = None           
Example #9
0
 def Sub(self, pattern, repl, s):
   """Replace the string for the pattern by the paramter repl, caching the compiled regexp."""
   # for example: s='a1234a' ,repl='OOOO' pattern = r'd+'
   # result is 'aOOOOa'
   #
   if not pattern in self._regexp_compile_cache:
     self._regexp_compile_cache[pattern] = sre_compile.compile(pattern)
   return self._regexp_compile_cache[pattern].sub(repl,s)
Example #10
0
 def Match(self, pattern, s):
   """Matches the string with the pattern, caching the compiled regexp."""
   # The regexp compilation caching is inlined in both Match and Search for
   # performance reasons; factoring it out into a separate function turns out
   # to be noticeably expensive.
   if not pattern in self._regexp_compile_cache:
     self._regexp_compile_cache[pattern] = sre_compile.compile(pattern)
   return self._regexp_compile_cache[pattern].match(s)
Example #11
0
File: re.py Project: earney/pycomo
def _compile_typed(text_bytes_type, pattern, flags):
    # internal: compile pattern
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError(
                "Cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    return sre_compile.compile(pattern, flags)
Example #12
0
 def __init__(self, lexicon, flags=0):
     from sre_constants import BRANCH, SUBPATTERN
     self.lexicon = lexicon
     p = []
     s = sre_parse.Pattern()
     s.flags = flags
     for (phrase, action) in lexicon:
         p.append(sre_parse.SubPattern(s, [(SUBPATTERN, (len(p) + 1, sre_parse.parse(phrase, flags)))]))
     s.groups = len(p) + 1
     p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
     self.scanner = sre_compile.compile(p)
Example #13
0
    def __init__(self, lexicon):
        from sre_constants import BRANCH, SUBPATTERN

        self.lexicon = lexicon
        # combine phrases into a compound pattern
        p = []
        s = sre_parse.Pattern()
        for phrase, action in lexicon:
            p.append(sre_parse.SubPattern(s, [(SUBPATTERN, (len(p), sre_parse.parse(phrase)))]))
        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
        s.groups = len(p)
        self.scanner = sre_compile.compile(p)
Example #14
0
def _compile(*key):
    # internal: compile pattern
    p = _cache.get(key)
    if p is not None:
        return p
    pattern, flags = key
    if type(pattern) not in sre_compile.STRING_TYPES:
        return pattern
    try:
        p = sre_compile.compile(pattern, flags)
    except error, v:
        raise error, v  # invalid expression
Example #15
0
    def _get_group_pattern(self,flags):
        # combine phrases into a compound pattern
	patterns = []
        sub_pattern = sre_parse.Pattern()
        sub_pattern.flags = flags
        for phrase, action in self.lexicon:
            patterns.append(sre_parse.SubPattern(sub_pattern, [
                (SUBPATTERN, (len(patterns) + 1, sre_parse.parse(phrase, flags))),
                ]))
        sub_pattern.groups = len(patterns) + 1
        group_pattern = sre_parse.SubPattern(sub_pattern, [(BRANCH, (None, patterns))])
        return sre_compile.compile(group_pattern)
Example #16
0
 def __init__(self, lexicon, flags=0):
     self.lexicon = lexicon
     # combine phrases into a compound pattern
     p = []
     s = sre_parse.Pattern()
     s.flags = flags
     for phrase, action in lexicon:
         p.append(sre_parse.SubPattern(s, [
             (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
             ]))
     s.groups = len(p)+1
     p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
     self.scanner = sre_compile.compile(p)
Example #17
0
def _compile(*key):
    p = _cache.get(key)
    if (p is not None):
        return p
    (pattern, flags,) = key
    if (type(pattern) is _pattern_type):
        return pattern
    if (type(pattern) not in sre_compile.STRING_TYPES):
        raise TypeError, 'first argument must be string or compiled pattern'
    try:
        p = sre_compile.compile(pattern, flags)
    except error, v:
        raise error, v
Example #18
0
    def __init__(self, regexp, negative=False, **property_names):
        """
        Create a new C{RegexpTokenizer} from a given regular expression.
        
        @type regexp: C{string} or C{SRE_Pattern}
        @param regexp: The regular expression used to tokenized texts.
            Unless C{negative} is true, this regular expression
            specifies the form of a single word type; so the list of
            tokens generated by tokenization includes all non-overlapping
            substrings that match C{regexp}
        @type negative: C{boolean}
        @param negative: An optional parameter that inverts the
            meaning of C{regexp}.  In particular, if C{negative} is
            true, then C{regexp} is taken to specify the form of word
            separators (and not word types); so the list of tokens
            generated by tokenization includes all substrings that
            occur I{between} matches of the regular expression.
        @type property_names: C{dict}
        @param property_names: A dictionary that can be used to override
            the default property names.  Each entry maps from a
            default property name to a new property name.
        """
        assert chktype(1, regexp, str)

        AbstractTokenizer.__init__(self, **property_names)

        if hasattr(regexp, "pattern"):
            regexp = regexp.pattern
        self._negative = bool(negative)

        # Replace any grouping parentheses with non-grouping ones.  We
        # need to do this, because the list returned by re.sub will
        # contain an element corresponding to every set of grouping
        # parentheses.  We must not touch escaped parentheses, and
        # need to handle the case of escaped escapes (e.g. "\\(").
        # We also need to handle nested parentheses, which means our
        # regexp contexts must be zero-width. There are also issues with
        # parenthesis appearing in bracketed contexts, hence we've
        # operated on the intermediate parse structure from sre_parse.
        parsed = sre_parse.parse(regexp)
        parsed = _remove_group_identifiers(parsed)

        # Add grouping parentheses around the regexp; this will allow
        # us to access the material that was split on.
        # Need to set the Pattern to expect a single group
        pattern = sre_parse.Pattern()
        pattern.groups += 1
        grouped = sre_parse.SubPattern(pattern)
        grouped.append((sre_constants.SUBPATTERN, (1, parsed)))

        self._regexp = sre_compile.compile(grouped, re.UNICODE)
def _compile(*key):
    # internal: compile pattern
    p = _cache.get(key)
    if p is not None:
        return p
    pattern, flags = key
    if type(pattern) is _pattern_type:
        return pattern
    if type(pattern) not in sre_compile.STRING_TYPES:
        raise TypeError, "first argument must be string or compiled pattern"
    try:
        p = sre_compile.compile(pattern, flags)
    except error, v:
        raise error, v # invalid expression
Example #20
0
def _compile(*key):
	# internal: compile pattern
	p = _cache.get(key)
	if p is not None:
		return p
	pattern, flags = key
	if isinstance(pattern, _pattern_type):
		return pattern
	if not sre_compile.isstring(pattern):
		raise TypeError, "first argument must be string or compiled pattern"
	try:
		p = sre_compile.compile(pattern, flags)
	except error, v:
		raise error, v # invalid expression
Example #21
0
 def __init__(self, lexicon, flags=0):
     from sre_constants import BRANCH, SUBPATTERN
     self.lexicon = lexicon
     # combine phrases into a compound pattern
     p = []
     s = sre_parse.Pattern()
     s.flags = flags
     for phrase, action in lexicon:
         gid = s.opengroup()
         p.append(sre_parse.SubPattern(s, [
             (SUBPATTERN, (gid, 0, 0, sre_parse.parse(phrase, flags))),
             ]))
         s.closegroup(gid, p[-1])
     p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
     self.scanner = sre_compile.compile(p)
def _compile(regexp):

    parsed = sre_parse.parse(regexp)
    parsed = _remove_group_identifiers(parsed)

    # Add grouping parentheses around the regexp; this will allow
    # us to access the material that was split on.
    # Need to set the Pattern to expect a single group

    pattern = sre_parse.Pattern()
    pattern.groups += 1
    grouped = sre_parse.SubPattern(pattern)
    grouped.append((sre_constants.SUBPATTERN, (1, parsed)))

    return sre_compile.compile(grouped, re.UNICODE | re.MULTILINE | re.DOTALL)
Example #23
0
def ReplaceAll(pattern, rep, s):
  """Replaces instances of pattern in a string with a replacement.

  The compiled regex is kept in a cache shared by Match and Search.

  Args:
    pattern: regex pattern
    rep: replacement text
    s: search string

  Returns:
    string with replacements made (or original string if no replacements)
  """
  if pattern not in _regexp_compile_cache:
    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
  return _regexp_compile_cache[pattern].sub(rep, s)
Example #24
0
def build_scanner(lexicon, flags=0):
    import sre_parse
    import sre_compile
    from sre_constants import BRANCH, SUBPATTERN
    # combine phrases into a compound pattern
    p = []
    s = sre_parse.Pattern()
    s.flags = flags
    for phrase, action in lexicon:
        p.append(sre_parse.SubPattern(s, [
            (SUBPATTERN, (len(p) + 1, sre_parse.parse(phrase, flags))),
        ]))
    s.groups = len(p) + 1
    p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
    scanner = sre_compile.compile(p)
    return scanner
Example #25
0
def _compile(*key):
    # internal: compile pattern
    cachekey = (type(key[0]),) + key
    p = _cache.get(cachekey)
    if p is not None:
        return p
    pattern, flags = key
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError('Cannot process flags argument with a compiled pattern')
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError, "first argument must be string or compiled pattern"
    try:
        p = sre_compile.compile(pattern, flags)
    except error, v:
        raise error, v # invalid expression
Example #26
0
    def __init__(self, lexicon, flags=FLAGS):
        self.actions = [None]
        # combine phrases into a compound pattern
        s = sre_parse.Pattern()
        s.flags = flags
        p = []
        for idx, token in enumerate(lexicon):
            phrase = token.pattern
            try:
                subpattern = sre_parse.SubPattern(s, [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
            except sre_constants.error:
                raise
            p.append(subpattern)
            self.actions.append(token)

        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
        self.scanner = sre_compile.compile(p)
Example #27
0
def _compile(pattern, flags):
    # internal: compile pattern
    try:
        return _cache[type(pattern), pattern, flags]
    except KeyError:
        pass
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError(
                "Cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    p = sre_compile.compile(pattern, flags)
    if len(_cache) >= _MAXCACHE:
        _cache.clear()
    _cache[type(pattern), pattern, flags] = p
    return p
Example #28
0
 def __init__(self, lexicon, flags=0):
     from sre_constants import BRANCH, SUBPATTERN
     if isinstance(flags, RegexFlag):
         flags = flags.value
     self.lexicon = lexicon
     # combine phrases into a compound pattern
     p = []
     s = sre_parse.Pattern()
     s.flags = flags
     for phrase, action in lexicon:
         gid = s.opengroup()
         p.append(
             sre_parse.SubPattern(s, [
                 (SUBPATTERN, (gid, 0, 0, sre_parse.parse(phrase, flags))),
             ]))
         s.closegroup(gid, p[-1])
     p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
     self.scanner = sre_compile.compile(p)
Example #29
0
def _compile(*key):
    # internal: compile pattern
    cachekey = (type(key[0]),) + key
    p = _cache.get(cachekey)
    if p is not None:
        return p
    pattern, flags = key
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError("Cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    p = sre_compile.compile(pattern, flags)
    if len(_cache) >= _MAXCACHE:
        _cache.clear()
    _cache[cachekey] = p
    return p
Example #30
0
def _compile(*key):
    # internal: compile pattern
    cachekey = (type(key[0]),) + key
    p = _cache.get(cachekey)
    if p is not None:
        return p
    pattern, flags = key
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError(
                "Cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    p = sre_compile.compile(pattern, flags)
    if len(_cache) >= _MAXCACHE:
        _cache.clear()
    _cache[cachekey] = p
    return p
    def __init__(self, lexicon, flags=FLAGS):
        self.actions = [None]
        # combine phrases into a compound pattern
        s = sre_parse.Pattern()
        s.flags = flags
        p = []
        for idx, token in enumerate(lexicon):
            phrase = token.pattern
            try:
                subpattern = sre_parse.SubPattern(
                    s,
                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
            except sre_constants.error:
                raise
            p.append(subpattern)
            self.actions.append(token)

        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
        self.scanner = sre_compile.compile(p)
def _compile(*key):
    cachekey = (type(key[0]),) + key
    p = _cache.get(cachekey)
    if p is not None:
        return p
    pattern, flags = key
    if isinstance(pattern, _pattern_type):
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError, 'first argument must be string or compiled pattern'
    try:
        p = sre_compile.compile(pattern, flags)
    except error as v:
        raise error, v

    if len(_cache) >= _MAXCACHE:
        _cache.clear()
    _cache[cachekey] = p
    return p
Example #33
0
def _compile(pattern, flags):
    bypass_cache = flags & DEBUG
    if not bypass_cache:
        try:
            return _cache[(type(pattern), pattern, flags)]
        except KeyError:
            pass
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError('Cannot process flags argument with a compiled pattern')
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError('first argument must be string or compiled pattern')
    p = sre_compile.compile(pattern, flags)
    if not bypass_cache:
        if len(_cache) >= _MAXCACHE:
            _cache.clear()
        _cache[(type(pattern), pattern, flags)] = p
    return p
Example #34
0
def _compile(pattern, flags):
    # internal: compile pattern
    try:
        return _cache[type(pattern), pattern, flags]
    except KeyError:
        pass
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError(
                "cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    p = sre_compile.compile(pattern, flags)
    if not (flags & DEBUG):
        if len(_cache) >= _MAXCACHE:
            _cache.clear()
        _cache[type(pattern), pattern, flags] = p
    return p
Example #35
0
def _compile(*key):
    cachekey = (type(key[0]), ) + key
    p = _cache.get(cachekey)
    if p is not None:
        return p
    pattern, flags = key
    if isinstance(pattern, _pattern_type):
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError, 'first argument must be string or compiled pattern'
    try:
        p = sre_compile.compile(pattern, flags)
    except error as v:
        raise error, v

    if len(_cache) >= _MAXCACHE:
        _cache.clear()
    _cache[cachekey] = p
    return p
Example #36
0
def _compile(*key):
    # internal: compile pattern
    pattern, flags = key
    bypass_cache = flags & DEBUG
    if not bypass_cache:
        cachekey = (type(key[0]), ) + key
        p = _cache.get(cachekey)
        if p is not None:
            return p
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError('Cannot process flags \
                argument with a compiled pattern')
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    try:
        p = sre_compile.compile(pattern, flags)
    except error, v:
        raise error(v)  # invalid expression
Example #37
0
def _compile(pattern, flags):
    bypass_cache = flags & DEBUG
    if not bypass_cache:
        try:
            return _cache[(type(pattern), pattern, flags)]
        except KeyError:
            pass
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError(
                'Cannot process flags argument with a compiled pattern')
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError('first argument must be string or compiled pattern')
    p = sre_compile.compile(pattern, flags)
    if not bypass_cache:
        if len(_cache) >= _MAXCACHE:
            _cache.clear()
        _cache[(type(pattern), pattern, flags)] = p
    return p
Example #38
0
def make_scanner(lexicon, flags=FLAGS):
    actions = [None]
    # Combine phrases into a compound pattern
    s = sre_parse.Pattern()
    s.flags = flags
    charpatterns = {}
    p = []
    idx = 0
    for token in lexicon:
        if token.pattern in (r'\[', r'{', r'"'):
            charpatterns[token.pattern[-1]] = token
        idx += 1
        phrase = token.pattern
        try:
            subpattern = sre_parse.SubPattern(
                s, [(SUBPATTERN, (idx, sre_parse.parse(phrase, flags)))])
        except sre_constants.error:
            raise
        p.append(subpattern)
        actions.append(token)

    s.groups = len(p) + 1  # NOTE(guido): Added to make SRE validation work
    p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
    scanner = sre_compile.compile(p).scanner

    def _scan_once(string, idx=0, context=None):
        try:
            action = charpatterns[string[idx]]
        except KeyError:
            pass
        except IndexError:
            raise StopIteration
        else:
            return action((string, idx + 1), context)

        m = scanner(string, idx).match()
        if m is None or m.end() == idx:
            raise StopIteration
        return actions[m.lastindex](m, context)

    return _scan_once
Example #39
0
def _compile(*key):
    # internal: compile pattern
    pattern, flags = key
    bypass_cache = flags & DEBUG
    if not bypass_cache:
        cachekey = (type(key[0]),) + key
        try:
            p, loc = _cache[cachekey]
            if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
                return p
        except KeyError:
            pass
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError('Cannot process flags argument with a compiled pattern')
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError, "first argument must be string or compiled pattern"
    try:
        p = sre_compile.compile(pattern, flags)
    except error, v:
        raise error, v # invalid expression
Example #40
0
def expand_sub(string, template, debug=0, mode='all'):
    """ Given a regular expression and a replacement string, generate
        expansions of the regular expression and for each one return it and
        its transformation as applied by the replacement string.

        string: regular expression to expand
        template: transformation to apply to each regular expression
        mode: can take 3 values
            all: return all possible shortest strings that the regular
                 expression would match
            first: return the first string that all would return
            random: return one random string that the regular expression would match
    """
    pattern = sre_parse.parse(string, flags=sre_parse.SRE_FLAG_VERBOSE)
    pattern.mode = mode
    template = sre_parse.parse_template(template, sre_compile.compile(pattern))
    if debug:
        print(pattern)
        print(template)
    for s in _iterate(pattern, pattern.data, MatchObj(pattern, "")):
        s.patient = 0
        yield (s.string, sre_parse.expand_template(template, s))
Example #41
0
def compile_regexp_to_noncapturing(pattern, flags=0):
    """
    Compile the regexp pattern after switching all grouping parentheses
    in the given regexp pattern to non-capturing groups.

    :type pattern: str
    :rtype: str
    """
    def convert_regexp_to_noncapturing_parsed(parsed_pattern):
        res_data = []
        for key, value in parsed_pattern.data:
            if key == sre_constants.SUBPATTERN:
                index, subpattern = value
                value = (None, convert_regexp_to_noncapturing_parsed(subpattern))
            elif key == sre_constants.GROUPREF:
                raise ValueError('Regular expressions with back-references are not supported: {0}'.format(pattern))
            res_data.append((key, value))
        parsed_pattern.data = res_data
        parsed_pattern.pattern.groups = 1
        parsed_pattern.pattern.groupdict = {}
        return parsed_pattern

    return sre_compile.compile(convert_regexp_to_noncapturing_parsed(sre_parse.parse(pattern)), flags=flags)
Example #42
0
File: re.py Project: ybay/yZhPy
def _compile(pattern, flags):
    # 内部:编译模式
    if isinstance(flags, RegexFlag):
        flags = flags.value
    try:
        return _cache[type(pattern), pattern, flags]
    except KeyError:
        pass
    if isinstance(pattern, Pattern):
        if flags:
            raise ValueError("无法使用编译模式处理标志参数")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("第一个参数必须是字符串或编译模式")
    p = sre_compile.compile(pattern, flags)
    if not (flags & DEBUG):
        if len(_cache) >= _MAXCACHE:
            # 删除最旧的项目
            try:
                del _cache[next(iter(_cache))]
            except (StopIteration, RuntimeError, KeyError):
                pass
        _cache[type(pattern), pattern, flags] = p
    return p
Example #43
0
def _compile(pattern, flags):
    # internal: compile pattern
    if isinstance(flags, RegexFlag):
        flags = flags.value
    try:
        return _cache[type(pattern), pattern, flags]
    except KeyError:
        pass
    if isinstance(pattern, Pattern):
        if flags:
            raise ValueError(
                "cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    p = sre_compile.compile(pattern, flags)
    if not (flags & DEBUG):
        if len(_cache) >= _MAXCACHE:
            try:
                _cache.popitem(last=False)
            except KeyError:
                pass
        _cache[type(pattern), pattern, flags] = p
    return p
Example #44
0
def _compile(pattern, flags):
    # internal: compile pattern
    try:
        #fixme brython
        #return _cache[type(pattern), pattern, flags]
        return _cache["%s:%s:%s" % (type(pattern), pattern, flags)]
    except KeyError:
       pass
    #print(pattern)
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError(
                "Cannot process flags argument with a compiled pattern")
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError("first argument must be string or compiled pattern")
    p = sre_compile.compile(pattern, flags)
    #print('_compile', p)
    if len(_cache) >= _MAXCACHE:
        _cache.clear()
    #fix me brython
    #_cache[type(pattern), pattern, flags] = p
    _cache["%s:%s:%s" % (type(pattern), pattern, flags)]= p
    return p
Example #45
0
def _compile(*key):
    pattern, flags = key
    bypass_cache = flags & DEBUG
    if not bypass_cache:
        cachekey = (
         type(key[0]),) + key
        try:
            p, loc = _cache[cachekey]
            if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
                return p
        except KeyError:
            pass

    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError('Cannot process flags argument with a compiled pattern')
        return pattern
    if not sre_compile.isstring(pattern):
        raise TypeError, 'first argument must be string or compiled pattern'
    try:
        p = sre_compile.compile(pattern, flags)
    except error as v:
        raise error, v

    if not bypass_cache:
        if len(_cache) >= _MAXCACHE:
            _cache.clear()
        if p.flags & LOCALE:
            if not _locale:
                return p
            loc = _locale.setlocale(_locale.LC_CTYPE)
        else:
            loc = None
        _cache[cachekey] = (
         p, loc)
    return p
Example #46
0
def _compile(*key):
    p = _cache.get(key)
    if p is not None:
        return p
    
    (pattern, flags) = key
    if type(pattern) is _pattern_type:
        return pattern
    
    if type(pattern) not in sre_compile.STRING_TYPES:
        raise TypeError, 'first argument must be string or compiled pattern'
    
    
    try:
        p = sre_compile.compile(pattern, flags)
    except error:
        v = None
        raise error, v

    if len(_cache) >= _MAXCACHE:
        _cache.clear()
    
    _cache[key] = p
    return p
def FindAll(pattern, s):
    """Searches the string for the pattern, caching the compiled regexp."""
    if not pattern in _regexp_compile_cache:
        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    return _regexp_compile_cache[pattern].findall(s)
Example #48
0
class Wiki2XHTML(Translator):
    '''Simple conversion from DotClear wiki2xhtml markup to HTML

	.. Warning::
	   Behaviour is quite different than the original DotClear parser and a
	   few elements has been left unimplemented.
	'''
    #? first space of each line
    _first_space = sre_compile.compile(r'(?:^|(?<=\n))(?:[ ]|(?=\n))')

    #? first '> ' sequence of each line, the space being optional
    _first_gt_space = sre_compile.compile(r'(?:^|(?<=\n))>(?:[ ]|(?=\n))')

    #? double or more LF
    _block_separator = sre_compile.compile(r'\n\n+(?=\S)')

    #? separate list prefix (# | *) and list value
    _fragment_list = sre_compile.compile(
        r'(?P<type>[*#]+) \s* (?P<value> (?: (?:(?:.|\n)+?) (?:(?=\n[#*])|$) ) | (?:.+\n)$ )',
        sre_compile.SRE_FLAG_MULTILINE | sre_compile.SRE_FLAG_VERBOSE)

    #? use to beautify ``<li>\n\s*value`` to ``<li>value``
    _li_trim = sre_compile.compile(r'(?<=<li>)\s+|(?<![>])(?=\n)\s+?(?=</li>)')

    #? non-word
    _non_word = sre_compile.compile(r'\W+')

    @staticmethod
    def escape(string, entities=False):
        '''Escape special HTML characters

		Replace characters with a special signifiance in HTML by their
		HTML entity equivalent.
		If the optional argument `entities` is set, will use an
		entities table build from :var:`htmlentitydefs.entitydefs`.
		'''
        tr = html_entities if entities else html_special_chars
        return u''.join(c in tr and tr[c] or c for c in string)

    ##### blocks
    @staticmethod
    def b_hr(match):
        return u'<hr />\n'

    def b_p(self, match):
        return u'<p>%s</p>\n' % p_inline.sub(self.inlines,
                                             match.group('p')).strip()

    def b_xmp(self, match):
        return u'<pre class="xmp">%s</pre>\n' % self.escape(
            match.group('xmp')).strip()

    def b_pre(self, match):
        return u'<pre>%s</pre>\n' % p_inline.sub(
            self.inlines,
            self._first_space.sub('', match.group(match.lastgroup))).rstrip()

    def b_special(self, match):
        assert match.group('macro') == 'html'
        return u'<div class="macro %s">%s</div>\n' % (self.escape(
            match.group('macro')), match.group('special').strip())

    def b_head(self, match):
        return u'<h%(n)u>%(value)s</h%(n)u>\n' % {
            'n': 6 - len(match.group('head_level')),
            'value': p_inline.sub(self.inlines, match.group('head_value'))
        }

    def b_blockquote(self, match):
        return u'<blockquote><p>%s</p></blockquote>\n' % u'</p>\n<p>'.join(
            self._block_separator.split(
                p_inline.sub(
                    self.inlines,
                    self._first_gt_space.sub('', match.group(
                        match.lastgroup))))).rstrip()

    def b_list(self, match):
        ltprev = ''
        #? TODO: i'd like to do it without the nodes tree
        root = node = MazNode('div')
        for m in self._fragment_list.finditer(match.group()):
            ltcurr, value = m.groups()
            for prev, curr in itertools.dropwhile(
                    lambda x: not cmp(x[0], x[1]),
                    itertools.izip_longest(ltprev, ltcurr)):
                if prev:
                    node = node.parent
                    if node.name == 'li':
                        node = node.parent
                if curr:
                    if node.child and node.child.name == 'li':
                        node = node.child.prev
                    node += MazNode('%sl' % (curr == '#' and 'o' or 'u', ))
                    node = node.child.prev
            node += (MazNode('li') +
                     MazNode(value=p_inline.sub(self.inlines, value)))
            ltprev = ltcurr
        # FIXME: there is a bug in MazNode when descending from a higher level than the root
        root.child.parent = root.child
        return self._li_trim.sub('', node2html(root.child))

    @staticmethod
    def b_nl(match):
        return ''

    ##### inlines
    def i_code(self, match):
        return u'<tt class="code">%s</tt>' % p_inline.sub(
            self.inlines, match.group(match.lastgroup))

    def i_em(self, match):
        return u'<em>%s</em>' % p_inline.sub(self.inlines,
                                             match.group(match.lastgroup))

    def i_strong(self, match):
        return u'<strong>%s</strong>' % p_inline.sub(
            self.inlines, match.group(match.lastgroup))

    def i_del(self, match):
        return u'<del>%s</del>' % p_inline.sub(self.inlines,
                                               match.group(match.lastgroup))

    def i_ins(self, match):
        return u'<ins>%s</ins>' % p_inline.sub(self.inlines,
                                               match.group(match.lastgroup))

    @staticmethod
    def i_br(match):
        return u'<br />'

    def i_anchor(self, match):
        return u'<a name="%s"></a>' % self._non_word.sub(
            '-', match.group('anchor'))

    def i_acronym(self, match):
        return u'<acronym%s>%s</acronym>' % (
            u' title="%s"' % self.escape(match.group('acronym_title').strip())
            if match.group('acronym_title') else '',
            p_inline.sub(self.inlines, match.group('acronym_value')).strip())

    def i_a(self, match):
        href = urlparse.urlsplit(match.group('a_href'))
        link = [u'<a href="%s"' % match.group('a_href')]
        if match.group('a_title'):
            link.append(u' title="%s"' % self.escape(match.group('a_title')))
        if match.group('a_lang'):
            link.append(u' hreflang="%s"' % self.escape(match.group('a_lang')))
        if href.scheme:
            # TODO: make a handle for the external using the hostname
            link.append(u' class="external"')
        link.append(u'>%s</a>' % (
         p_inline.sub(self.inlines, match.group('a_value')) \
         if match.group('a_value') \
         else self.escape(match.group('a_href'))
        ))
        return ''.join(link)

    def i_uri(self, match):
        return u'<a href="%s" class="external">%s</a>' % (match.group(
            match.lastgroup), self.escape(match.group(match.lastgroup)))

    def i_img(self, match):
        link = [u'<img src="%s"' % match.group('img_src')]
        if match.group('img_alt'):
            link.append(u'alt="%s"' % self.escape(match.group('img_alt')))
        if match.group('img_desc'):
            link.append(u'longdesc="%s"' %
                        self.escape(match.group('img_desc')))
        if match.group('img_align'):
            align = match.group('img_align').strip().lower()[0]
            if align in 'lg':
                #? align left
                link.append('style="float:left; margin: 0 1em 1em 0;"')
            elif align in 'cm':
                #? align center
                link.append('style="display:block; margin:0 auto;"')
            elif align in 'rd':
                #? align right
                link.append('style="float:right; margin: 0 0 1em 1em;"')
            else:
                self.warn(match,
                          'unknown alignment %r' % match.group('img_align'))
        link.append('/>')
        return ' '.join(link)

    def i_cite(self, match):
        r = ['<q']
        if match.group('cite_lang'):
            r.append(u' lang="%s"' % self.escape(match.group('cite_lang')))
        if match.group('cite_cite'):
            # FIXME? use urlencode, not escape
            r.append(u' cite="%s"' % self.escape(match.group('cite_cite')))
        r.append(u'>%s</q>' %
                 p_inline.sub(self.inlines, match.group('cite_value')).strip())
        return ''.join(r)
def search(pattern, s):
    """Searches the string for the pattern, caching the compiled regexp."""
    if pattern not in _regexp_compile_cache:
        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    return _regexp_compile_cache[pattern].search(s)
Example #50
0
strings.
"""

__date__ = '28 June 2018'
__author__ = ('Rohit Sehgal <*****@*****.**>')

from typing import Any
from importlib import import_module

import re
import os
import builtins
import traceback
import sre_compile

Match = type(sre_compile.compile('', 0).match(''))  # cf. Lib/re.py#L263


class REResolver:
    """
    This class defines how the values from the jackson file will be translated
    to in memory json (having secrets) file.

    For now it supports translations from either environment variables or from
    other python functions. Which are specified in MATCH_REGEX

    The key value have to be declared like:
        - env.<ENVIRONMENT_VAR_NAME>
        - !foo.bar.baz: the python notation of calling method from other python
            file.
    """
Example #51
0
 def __init__(self, rules, flags=0):
     self.scanner = sre_compile.compile(
         '(%s)' % '|'.join('(%s)' % pattern for pattern in rules), flags)
Example #52
0
    def __init__(self,
                 pattern,
                 flags=0,
                 charset=CHARSET,
                 max_count=None,
                 relaxed=False):
        # If the RE module cannot compile it, we give up quickly
        if not isinstance(pattern, sre_parse.SubPattern):
            pattern = sre_parse.parse(pattern, flags)
        self.matcher = sre_compile.compile(pattern, flags)
        if not flags & re.DOTALL:
            charset = "".join(c for c in charset if c != "\n")
        self.charset = charset
        self.relaxed = relaxed

        self.named_group_lookup = self.matcher.groupindex

        flags |= DEFAULT_RE_FLAGS  # https://github.com/google/sre_yield/issues/3
        if flags & re.IGNORECASE:
            raise ParseError(
                'Flag "i" not supported. https://github.com/google/sre_yield/issues/4'
            )
        elif flags & re.UNICODE:
            raise ParseError(
                'Flag "u" not supported. https://github.com/google/sre_yield/issues/3'
            )
        elif flags & re.LOCALE:
            raise ParseError(
                'Flag "l" not supported. https://github.com/google/sre_yield/issues/5'
            )

        if max_count is None:
            self.max_count = MAX_REPEAT_COUNT
        else:
            self.max_count = max_count

        self.has_groupref = False

        # Configure the parser backends
        self.backends = {
            sre_constants.LITERAL:
            lambda y: [chr(y)],
            sre_constants.RANGE:
            lambda l, h: [chr(c) for c in range(l, h + 1)],
            sre_constants.SUBPATTERN:
            self.maybe_save,
            sre_constants.BRANCH:
            self.branch_values,
            sre_constants.MIN_REPEAT:
            self.max_repeat_values,
            sre_constants.MAX_REPEAT:
            self.max_repeat_values,
            sre_constants.AT:
            self.nothing_added,
            sre_constants.ASSERT:
            self.lookaround_parse_error,
            sre_constants.ASSERT_NOT:
            self.lookaround_parse_error,
            sre_constants.ANY:
            lambda _: self.in_values(((sre_constants.NEGATE, ), )),
            sre_constants.IN:
            self.in_values,
            sre_constants.NOT_LITERAL:
            self.not_literal,
            sre_constants.CATEGORY:
            self.category,
            sre_constants.GROUPREF:
            self.groupref,
        }
        if self.relaxed:
            self.backends.update({
                sre_constants.ASSERT: self.nothing_added,
                sre_constants.ASSERT_NOT: self.nothing_added,
            })

        self.state = STATE_START
        # Now build a generator that knows all possible patterns
        self.raw = self.sub_values(pattern)
        # Configure this class instance to know about that result
        self.length = self.raw.__len__()
Example #53
0
 def test_no_pattern(self):
     import sre_compile, sre_parse
     sre_pattern = sre_compile.compile(
         sre_parse.SubPattern(sre_parse.Pattern()))
     assert sre_pattern.scanner('s') is not None
Example #54
0
 def test_create_basic(self):
     orig = sre_parse.parse(r"a{2}")
     new = create_subpattern(orig.data)
     sre_compile.compile(new)
     assert _val_eq(orig, new)
	r'(?P<pre> (?:(?<=\n\n)|^)   (?:^[ ].+(\n|$))+ )',
	#? list, ordered and unordered Matches them whole, separate items are parsed later. The list *must* start with a single bullet.
	r'(?P<list>^[ \t]*([*][^*\#]|[\#][^\#*]).*$(\n[ \t]*[*\#]+.*$)*)',
	#? head
	r'^\s*(?P<head>(?P<head_level>!{1,4})(?P<head_value>.*?))\s*$',
	#? hr separator
	r'(?P<hr>^\s*----\s*$)',
	#? citation
	r'^(?P<blockquote>>(.*) ([\#]!(\s+.*)?$)?(.|\n)+?)(?:^[^>]|^$)',
	#? paragraph
	r'(?:(?<=(\n\n)(?![/*#!]|----))|^) (?P<p> ^(?:(?:.+\n(?!\n))*(?:.+$)) \n?)',
	#? empty line
	r'(?P<nl>^\s*$ )',
)

p_block = sre_compile.compile('|'.join(RULES_BLOCK), RULES_FLAGS)

RULES_INLINE = (
	#? URLs (starting with an url scheme like HTTP)
	#TODO: r'(?P<url>(^|(?<=\s|[.,:;!?()/=]))(?P<escaped_url>~)?(?P<url_target> (?P<url_proto>https?|ftps?|ircs?|nntp|news|mailto|telnet|file):\S+?)($|(?=\s|[,.:;!?()](\s|$))))',
	r'(?P<uri>[a-zA-Z]+:/{,3}[%s]+/?[%s]*|%s)' %(
		r'A-Za-z0-9\-\.',
		r'\%\;\/\?\:\@\&\=\+\$\,\[\]A-Za-z0-9\-_\.\!\~\*\'\(\)\w#',
		r'\%\;\/\?\:\@\&\=\+\$\,\[\]A-Za-z0-9\-_\.\!\~\*\'\(\)'
	),
	#? image
	r'(?P<img>\050\050 (?P<img_src>%(word)s) (?: \| (?P<img_alt>%(word)s) (?: \| (?P<img_align>%(word)s) (?: \| (?P<img_desc>%(word)s))?)?)?  \051\051)' % {'word': r'(?: (?<![^\\](?=\|)) .)+'},
	#? escaped character
	r'(?P<escape>[\\] (?P<escaped_char>\S) )',
	#? emphasis
	r"(?:'' (?P<em>.+?) (?<![\\])'')",
Example #56
0
def compile(regexp, flags=0):
    return sre_compile.compile(regexp, get_flags(flags))
Example #57
0
    alphanum = _alphanum
    for i, c in enumerate(pattern):
        if c not in alphanum:
            if c == "\000":
                s[i] = "\\000"
            else:
                s[i] = "\\" + c
    return pattern[:0].join(s)

# --------------------------------------------------------------------
# internals

_cache = {}
_cache_repl = {}

_pattern_type = type(sre_compile.compile("", 0))

_MAXCACHE = 100

def _compile(*key):
    # internal: compile pattern
    cachekey = (type(key[0]),) + key
    p = _cache.get(cachekey)
    if p is not None:
        return p
    pattern, flags = key
    if isinstance(pattern, _pattern_type):
        if flags:
            raise ValueError('Cannot process flags argument with a compiled pattern')
        return pattern
    if not sre_compile.isstring(pattern):
Example #58
0
    for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'
}


def escape(pattern):
    """
    Escape special characters in a string.
    """
    if isinstance(pattern, str):
        return pattern.translate(_special_chars_map)
    else:
        pattern = str(pattern, 'latin1')
        return pattern.translate(_special_chars_map).encode('latin1')


Pattern = type(sre_compile.compile('', 0))
Match = type(sre_compile.compile('', 0).match(''))

# --------------------------------------------------------------------
# internals

_cache = {}  # ordered!

_MAXCACHE = 512


def _compile(pattern, flags):
    # internal: compile pattern
    if isinstance(flags, RegexFlag):
        flags = flags.value
    try:
Example #59
0
import sre_compile
import sre_constants

r = sre_compile.compile("a(b+)c", 0)
print r.match("")
print r.match("ac")
print r.match("abc").groups()
for i in xrange(100000):
    r.match("abbc").groups()
    if i % 10000 == 0:
        print i


def identity(o):
    return o


charset = [(sre_constants.RANGE, (128, 65535))]

print sre_compile._optimize_charset(charset, identity)
Example #60
0
    u'(?:<p>(?:&nbsp;|\\s|<br \\/>)*?</p>\\s*)+\\Z',
    u'(?<!\\\\)([aAbBcdDeEfFgGhHiIjlLmMnNoOPrsStTUuwWyYzZ])',
    u'\\\\(.)',
    '((^|[^%])(%%)*%[sy])',
    '(?P<year>\\d{4})-(?P<month>\\d{1,2})-(?P<day>\\d{1,2})$',
    '(?P<hour>\\d{1,2}):(?P<minute>\\d{1,2})(?::(?P<second>\\d{1,2})(?:\\.(?P<microsecond>\\d{1,6})\\d{0,6})?)?',
    '(?P<year>\\d{4})-(?P<month>\\d{1,2})-(?P<day>\\d{1,2})[T ](?P<hour>\\d{1,2}):(?P<minute>\\d{1,2})(?::(?P<second>\\d{1,2})(?:\\.(?P<microsecond>\\d{1,6})\\d{0,6})?)?(?P<tzinfo>Z|[+-]\\d{2}(?::?\\d{2})?)?$',
    '\\?|[-+]?[.\\w]+$',
    u'(?:W/)?"((?:\\\\.|[^"])*)"',
    u'^\\w{3}, (?P<day>\\d{2}) (?P<mon>\\w{3}) (?P<year>\\d{4}) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) GMT$',
    u'^\\w{6,9}, (?P<day>\\d{2})-(?P<mon>\\w{3})-(?P<year>\\d{2}) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) GMT$',
    u'^\\w{3} (?P<mon>\\w{3}) (?P<day>[ \\d]\\d) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) (?P<year>\\d{4})$',
    u'\\s*,\\s*',
    '^From ',
    '[ \\(\\)<>@,;:\\\\"/\\[\\]\\?=]',
    u'(\\{\\%.*?\\%\\}|\\{\\{.*?\\}\\}|\\{\\#.*?\\#\\})',
    u'\n^(?P<constant>(?:\\_\\("[^"\\\\]*(?:\\\\.[^"\\\\]*)*"\\)|\\_\\(\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'\\)|"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'))|\n^(?P<var>[\\w\\.]+|[-+\\.]?\\d[\\d\\.e]*)|\n (?:\\s*\\|\\s*\n     (?P<filter_name>\\w+)\n         (?:\\:\n             (?:\n              (?P<constant_arg>(?:\\_\\("[^"\\\\]*(?:\\\\.[^"\\\\]*)*"\\)|\\_\\(\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'\\)|"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'))|\n              (?P<var_arg>[\\w\\.]+|[-+\\.]?\\d[\\d\\.e]*)\n             )\n         )?\n )',
    u'(?:(\\w+)=)?(.+)',
    u'API|TOKEN|KEY|SECRET|PASS|PROFANITIES_LIST|SIGNATURE',
    '\\s*#?\\s*$',
    '[_a-z]\\w*\\.py$',
    u'.*; charset=([\\w\\d-]+);?',
    '[ \\(\\)<>@,;:\\\\"/\\[\\]\\?=]',
    u'\\s+',
    u'^[\\w.@+-]+$',
]

for pattern in patterns:
    sre_compile.compile(pattern, 0)