예제 #1
0
 def __init__(self, expr):
     super(NotAny, self).__init__(expr)
     prec, pattern = self.expr.__regex__()
     try:
         self.regex = regex_compile(f"(?!{pattern})")
     except Exception as c:
         self.regex = None
예제 #2
0
 def __init__(self, match):
     Literal.__init__(self, match.upper())
     self.set_config(
         match=match,
         regex=regex_compile(regex_caseless(match)),
     )
     self.parser_name = repr(self.parser_config.regex.pattern)
예제 #3
0
 def __init__(self, wordChars=printables):
     super(WordStart, self).__init__()
     self.set_config(
         regex=regex_compile(
             f"(?:(?<={(CharsNotIn(wordChars, exact=1)).__regex__()[1]})|^)(?={Char(wordChars).__regex__()[1]})"
         ),
         word_chars="".join(sorted(set(wordChars))),
     )
     self.streamlined = True
예제 #4
0
 def __init__(self, wordChars=printables):
     super(WordEnd, self).__init__()
     self.engine = PLAIN_ENGINE
     self.set_config(
         word_chars="".join(sorted(set(wordChars))),
         regex=regex_compile(
             f"(?<={Char(wordChars).__regex__()[1]})({(~Char(wordChars)).__regex__()[1]}|$)"
         ),
     )
예제 #5
0
 def __init__(self, pattern):
     """
     :param pattern:  THE REGEX PATTERN
     :param asGroupList: RETURN A LIST OF CAPTURED GROUPS /1, /2, /3, ...
     """
     parsed = regex.parseString(pattern)
     ParseEnhancement.__init__(self, parsed.value().streamline())
     # WE ASSUME IT IS SAFE TO ASSIGN regex (NO SERIOUS BACKTRACKING PROBLEMS)
     self.streamlined = True
     self.regex = regex_compile(pattern)
예제 #6
0
 def __init__(self, charset, asKeyword=False, excludeChars=None):
     """
     Represent one character in a given charset
     """
     Token.__init__(self)
     if excludeChars:
         charset = set(charset) - set(excludeChars)
     regex = regex_range(charset)
     if asKeyword:
         regex = r"\b%s\b" % self
     self.set_config(
         regex=regex_compile(regex),
         charset="".join(sorted(set(charset))),
     )
예제 #7
0
    def __init__(self, maps):
        ParserElement.__init__(self)

        all_keys = set()
        lookup = OrderedDict()
        for m in maps:
            for k, ee in m.items():
                all_keys.add(k)
                lookup.setdefault(k, []).extend(ee)

        # patterns must be mutually exclusive to work
        items = list(lookup.items())
        if len(maps) - max(len(v) for k, v in items) < LOOKUP_COST:
            Log.error("not useful")

        compact = []
        for k, e in items:
            min_k = k
            # FIND SHORTEST PREFIX
            for kk, ee in items:
                if ee and min_k.startswith(kk):
                    min_k = kk
            # COLLECT
            acc = []
            for kk, ee in items:
                if kk.startswith(min_k):
                    acc.extend(ee)
                    ee.clear()
            if acc:
                compact.append((min_k, acc))
        if len(maps) - max(len(v) for k, v in compact) < LOOKUP_COST:
            Log.error("not useful")

        # patterns can be shortened so far as they remain exclusive
        shorter = [
            (k[:min_length], e)
            for k, e in sorted(compact, key=lambda p: p[0])
            for min_length in [max(_distinct(k, kk) for kk, _ in compact if kk != k)]
        ]

        self.lookup = {k: e for k, e in shorter}
        self.regex = regex_compile("|".join(regex_caseless(k) for k, _ in shorter))
        self.all_keys = list(sorted(all_keys))
예제 #8
0
    def __init__(self, match, ident_chars=None, caseless=None):
        Token.__init__(self)
        if ident_chars is None:
            ident_chars = self.engine.keyword_chars
        else:
            ident_chars = "".join(sorted(set(ident_chars)))

        if caseless:
            pattern = regex_caseless(match)
        else:
            pattern = re.escape(match)

        non_word = "($|(?!" + regex_range(ident_chars) + "))"
        self.set_config(ident_chars=ident_chars,
                        match=match,
                        regex=regex_compile(pattern + non_word))

        self.parser_name = match
        if caseless:
            self.__class__ = CaselessKeyword
예제 #9
0
    def __init__(self, notChars, min=1, max=0, exact=0):
        Token.__init__(self)
        not_chars = "".join(sorted(set(notChars)))

        if min < 1:
            raise ValueError(
                "cannot specify a minimum length < 1; use "
                "Optional(CharsNotIn()) if zero-length char group is permitted"
            )

        max = max if max > 0 else MAX_INT
        if exact:
            min = exact
            max = exact

        if len(notChars) == 1:
            regex = "[^" + regex_range(notChars) + "]"
        else:
            regex = "[^" + regex_range(notChars)[1:]

        if not max or max == MAX_INT:
            if min == 0:
                suffix = "*"
            elif min == 1:
                suffix = "+"
            else:
                suffix = "{" + str(min) + ":}"
        elif min == 1 and max == 1:
            suffix = ""
        else:
            suffix = "{" + str(min) + ":" + str(max) + "}"

        self.set_config(
            regex=regex_compile(regex + suffix),
            min_len=min,
            max_len=max,
            not_chars=not_chars,
        )
        self.parser_name = text(self)
예제 #10
0
    def __init__(
        self,
        init_chars,
        body_chars=None,
        min=1,
        max=None,
        exact=0,
        asKeyword=False,
        excludeChars=None,
    ):
        Token.__init__(self)

        if body_chars is None:
            body_chars = init_chars
        if exact:
            min = max = exact

        if min < 1:
            raise ValueError(
                "cannot specify a minimum length < 1; use Optional(Word()) if"
                " zero-length word is permitted")

        if body_chars == init_chars:
            prec, regexp = Char(
                init_chars, excludeChars=excludeChars)[min:max].__regex__()
        elif max is None or max == MAX_INT:
            prec, regexp = (Char(init_chars, excludeChars=excludeChars) + Char(
                body_chars, excludeChars=excludeChars)[min - 1:]).__regex__()
        else:
            prec, regexp = (
                Char(init_chars, excludeChars=excludeChars) +
                Char(body_chars,
                     excludeChars=excludeChars)[min - 1:max - 1]).__regex__()

        if asKeyword:
            regexp = r"\b" + regexp + r"\b"

        self.set_config(regex=regex_compile(regexp), min=min)
예제 #11
0
 def stopOn(self, ender):
     if ender:
         end = self.engine.normalize(ender)
         self.set_config(end=regex_compile(end.__regex__()[1]))
     return self
예제 #12
0
 def __init__(self):
     with Engine(" \t") as e:
         super(LineEnd, self).__init__()
         self.set_config(lock_engine=e, regex=regex_compile("\\r?(\\n|$)"))
예제 #13
0
    def __init__(
        self,
        quoteChar,
        escChar=None,
        escQuote=None,
        multiline=False,
        unquoteResults=True,
        endQuoteChar=None,
        convertWhitespaceEscapes=True,
    ):
        super(QuotedString, self).__init__()

        # remove white space from quote chars - wont work anyway
        quoteChar = quoteChar.strip()
        if not quoteChar:
            warnings.warn("quoteChar cannot be the empty string",
                          SyntaxWarning,
                          stacklevel=2)
            raise SyntaxError()

        if endQuoteChar is None:
            endQuoteChar = quoteChar
        else:
            endQuoteChar = endQuoteChar.strip()
            if not endQuoteChar:
                warnings.warn(
                    "endQuoteChar cannot be the empty string",
                    SyntaxWarning,
                    stacklevel=2,
                )
                raise SyntaxError()

        self.set_config(
            quote_char=quoteChar,
            end_quote_char=endQuoteChar,
            esc_char=escChar,
            esc_quote=escQuote,
            unquoteResults=unquoteResults,
            convertWhitespaceEscapes=convertWhitespaceEscapes,
        )
        # TODO: FIX THIS MESS. WE SHOULD BE ABLE TO CONSTRUCT REGEX FROM ParserElements
        included = Empty()
        excluded = Literal(self.parser_config.end_quote_char)

        if not multiline:
            excluded |= Char("\r\n")
        if escQuote:
            included |= Literal(escQuote)
        if escChar:
            excluded |= Literal(self.parser_config.esc_char)
            included = included | escChar + Char(printables)
            self.set_config(
                escCharReplacePattern=re.escape(self.parser_config.esc_char) +
                "(.)")

        prec, pattern = (
            Literal(quoteChar) + ((~excluded + AnyChar()) | included)[0:] +
            Literal(self.parser_config.end_quote_char)).__regex__()

        self.set_config(multiline=multiline, regex=regex_compile(pattern))

        self.parser_name = text(self)