def __init__(self, expr): super(NotAny, self).__init__(expr) prec, pattern = self.expr.__regex__() try: self.regex = regex_compile(f"(?!{pattern})") except Exception as c: self.regex = None
def __init__(self, match): Literal.__init__(self, match.upper()) self.set_config( match=match, regex=regex_compile(regex_caseless(match)), ) self.parser_name = repr(self.parser_config.regex.pattern)
def __init__(self, wordChars=printables): super(WordStart, self).__init__() self.set_config( regex=regex_compile( f"(?:(?<={(CharsNotIn(wordChars, exact=1)).__regex__()[1]})|^)(?={Char(wordChars).__regex__()[1]})" ), word_chars="".join(sorted(set(wordChars))), ) self.streamlined = True
def __init__(self, wordChars=printables): super(WordEnd, self).__init__() self.engine = PLAIN_ENGINE self.set_config( word_chars="".join(sorted(set(wordChars))), regex=regex_compile( f"(?<={Char(wordChars).__regex__()[1]})({(~Char(wordChars)).__regex__()[1]}|$)" ), )
def __init__(self, pattern): """ :param pattern: THE REGEX PATTERN :param asGroupList: RETURN A LIST OF CAPTURED GROUPS /1, /2, /3, ... """ parsed = regex.parseString(pattern) ParseEnhancement.__init__(self, parsed.value().streamline()) # WE ASSUME IT IS SAFE TO ASSIGN regex (NO SERIOUS BACKTRACKING PROBLEMS) self.streamlined = True self.regex = regex_compile(pattern)
def __init__(self, charset, asKeyword=False, excludeChars=None): """ Represent one character in a given charset """ Token.__init__(self) if excludeChars: charset = set(charset) - set(excludeChars) regex = regex_range(charset) if asKeyword: regex = r"\b%s\b" % self self.set_config( regex=regex_compile(regex), charset="".join(sorted(set(charset))), )
def __init__(self, maps): ParserElement.__init__(self) all_keys = set() lookup = OrderedDict() for m in maps: for k, ee in m.items(): all_keys.add(k) lookup.setdefault(k, []).extend(ee) # patterns must be mutually exclusive to work items = list(lookup.items()) if len(maps) - max(len(v) for k, v in items) < LOOKUP_COST: Log.error("not useful") compact = [] for k, e in items: min_k = k # FIND SHORTEST PREFIX for kk, ee in items: if ee and min_k.startswith(kk): min_k = kk # COLLECT acc = [] for kk, ee in items: if kk.startswith(min_k): acc.extend(ee) ee.clear() if acc: compact.append((min_k, acc)) if len(maps) - max(len(v) for k, v in compact) < LOOKUP_COST: Log.error("not useful") # patterns can be shortened so far as they remain exclusive shorter = [ (k[:min_length], e) for k, e in sorted(compact, key=lambda p: p[0]) for min_length in [max(_distinct(k, kk) for kk, _ in compact if kk != k)] ] self.lookup = {k: e for k, e in shorter} self.regex = regex_compile("|".join(regex_caseless(k) for k, _ in shorter)) self.all_keys = list(sorted(all_keys))
def __init__(self, match, ident_chars=None, caseless=None): Token.__init__(self) if ident_chars is None: ident_chars = self.engine.keyword_chars else: ident_chars = "".join(sorted(set(ident_chars))) if caseless: pattern = regex_caseless(match) else: pattern = re.escape(match) non_word = "($|(?!" + regex_range(ident_chars) + "))" self.set_config(ident_chars=ident_chars, match=match, regex=regex_compile(pattern + non_word)) self.parser_name = match if caseless: self.__class__ = CaselessKeyword
def __init__(self, notChars, min=1, max=0, exact=0): Token.__init__(self) not_chars = "".join(sorted(set(notChars))) if min < 1: raise ValueError( "cannot specify a minimum length < 1; use " "Optional(CharsNotIn()) if zero-length char group is permitted" ) max = max if max > 0 else MAX_INT if exact: min = exact max = exact if len(notChars) == 1: regex = "[^" + regex_range(notChars) + "]" else: regex = "[^" + regex_range(notChars)[1:] if not max or max == MAX_INT: if min == 0: suffix = "*" elif min == 1: suffix = "+" else: suffix = "{" + str(min) + ":}" elif min == 1 and max == 1: suffix = "" else: suffix = "{" + str(min) + ":" + str(max) + "}" self.set_config( regex=regex_compile(regex + suffix), min_len=min, max_len=max, not_chars=not_chars, ) self.parser_name = text(self)
def __init__( self, init_chars, body_chars=None, min=1, max=None, exact=0, asKeyword=False, excludeChars=None, ): Token.__init__(self) if body_chars is None: body_chars = init_chars if exact: min = max = exact if min < 1: raise ValueError( "cannot specify a minimum length < 1; use Optional(Word()) if" " zero-length word is permitted") if body_chars == init_chars: prec, regexp = Char( init_chars, excludeChars=excludeChars)[min:max].__regex__() elif max is None or max == MAX_INT: prec, regexp = (Char(init_chars, excludeChars=excludeChars) + Char( body_chars, excludeChars=excludeChars)[min - 1:]).__regex__() else: prec, regexp = ( Char(init_chars, excludeChars=excludeChars) + Char(body_chars, excludeChars=excludeChars)[min - 1:max - 1]).__regex__() if asKeyword: regexp = r"\b" + regexp + r"\b" self.set_config(regex=regex_compile(regexp), min=min)
def stopOn(self, ender): if ender: end = self.engine.normalize(ender) self.set_config(end=regex_compile(end.__regex__()[1])) return self
def __init__(self): with Engine(" \t") as e: super(LineEnd, self).__init__() self.set_config(lock_engine=e, regex=regex_compile("\\r?(\\n|$)"))
def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True, ): super(QuotedString, self).__init__() # remove white space from quote chars - wont work anyway quoteChar = quoteChar.strip() if not quoteChar: warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) raise SyntaxError() if endQuoteChar is None: endQuoteChar = quoteChar else: endQuoteChar = endQuoteChar.strip() if not endQuoteChar: warnings.warn( "endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2, ) raise SyntaxError() self.set_config( quote_char=quoteChar, end_quote_char=endQuoteChar, esc_char=escChar, esc_quote=escQuote, unquoteResults=unquoteResults, convertWhitespaceEscapes=convertWhitespaceEscapes, ) # TODO: FIX THIS MESS. WE SHOULD BE ABLE TO CONSTRUCT REGEX FROM ParserElements included = Empty() excluded = Literal(self.parser_config.end_quote_char) if not multiline: excluded |= Char("\r\n") if escQuote: included |= Literal(escQuote) if escChar: excluded |= Literal(self.parser_config.esc_char) included = included | escChar + Char(printables) self.set_config( escCharReplacePattern=re.escape(self.parser_config.esc_char) + "(.)") prec, pattern = ( Literal(quoteChar) + ((~excluded + AnyChar()) | included)[0:] + Literal(self.parser_config.end_quote_char)).__regex__() self.set_config(multiline=multiline, regex=regex_compile(pattern)) self.parser_name = text(self)