Ejemplo n.º 1
0
def normalize_keywords(keywords):
    ret = []
    for k, t in keywords:
        rule = "".join(
            [group(a.lower(), a.upper()) if a.isalpha() else a for a in k])

        ret.append((parse_regex(rule), t))

    return ret
Ejemplo n.º 2
0
def normalize_keywords(keywords):
    ret = []
    for k, t in keywords:
        rule = "".join([
            group(a.lower(), a.upper()) if a.isalpha() else a for a in k
        ])

        ret.append((parse_regex(rule), t))

    return ret
Ejemplo n.º 3
0
    def __init__(self):
        self.token_class = Token

        RULES_FOR_CONTEXT_BRACKETS = [(parse_regex("[a-zA-Z_][a-zA-Z_0-9]*"),
                                       'T_VARIABLE')]

        self.rules = {
            CONTEXT_NORMAL: KEYWORDS + RULES,
            CONTEXT_OBJECT_ACCESS: RULES,
            CONTEXT_DOUBLEQUOTE: RULES_FOR_DOUBLE_QUOTE,
            CONTEXT_CURLY_BRACES:
            KEYWORDS + RULES_FOR_CONTEXT_BRACKETS + RULES,
            CONTEXT_BRACKETS: RULES_FOR_BRACKETS,
            CONTEXT_HEREDOC: RULES_FOR_HEREDOC,
            CONTEXT_BACKTICK: RULES_FOR_BACKTICK
        }

        self.runners_context = {}
        for context, rules in self.rules.items():

            base_dir = rpath.dirname(__file__)
            runner_name = 'runner_%s' % context

            try:
                lexer_runner = __import__(
                    'hippy.%s.%s' % ("lexer_cache", runner_name), None, None,
                    ['recognize', 'automaton'])
                if getattr(lexer_runner, "rulehash", 0) != hash(str(rules)):
                    raise ImportError  # hack

                self.runners_context[context] = (lexer_runner.recognize,
                                                 lexer_runner.automaton)
            except ImportError:
                runner_file = rpath.join(
                    base_dir,
                    ["lexer_cache", "%s.py" % runner_name])

                names, regexs = [], []
                for rule, name in rules:
                    names.append(name)
                    regexs.append(rule)

                rex = regex.LexingOrExpression(regexs, names)
                automaton = rex.make_automaton()
                automaton = automaton.make_deterministic(names)
                automaton.optimize()
                code = automaton.generate_lexing_code()
                with open(runner_file, "w") as f:
                    f.write(code)
                    f.write("\n\nrulehash = %s" % hash(str(rules)))

                exec py.code.Source(code).compile()
                self.runners_context[context] = (recognize, automaton)
Ejemplo n.º 4
0
 def get_regex(self, r):
     from rpython.rlib.parsing.regexparse import parse_regex
     if r in self.matchers:
         return self.matchers[r]
     regex = parse_regex(r)
     if regex is None:
         raise ValueError(
             "%s is not a valid regular expression" % regextext)
     automaton = regex.make_automaton().make_deterministic()
     automaton.optimize()
     matcher = automaton.make_lexing_code()
     self.matchers[r] = py.code.Source(matcher)
     return matcher
Ejemplo n.º 5
0
 def get_regex(self, r):
     from rpython.rlib.parsing.regexparse import parse_regex
     if r in self.matchers:
         return self.matchers[r]
     regex = parse_regex(r)
     if regex is None:
         raise ValueError(
             "%s is not a valid regular expression" % regextext)
     automaton = regex.make_automaton().make_deterministic()
     automaton.optimize()
     matcher = automaton.make_lexing_code()
     self.matchers[r] = py.code.Source(matcher)
     return matcher
Ejemplo n.º 6
0
    def __init__(self):
        self.token_class = Token

        RULES_FOR_CONTEXT_BRACKETS = [
            (parse_regex("[a-zA-Z_][a-zA-Z_0-9]*"), 'T_VARIABLE')
        ]

        self.rules = {
            CONTEXT_NORMAL: KEYWORDS + RULES,
            CONTEXT_OBJECT_ACCESS: RULES,
            CONTEXT_DOUBLEQUOTE: RULES_FOR_DOUBLE_QUOTE,
            CONTEXT_CURLY_BRACES: KEYWORDS + RULES_FOR_CONTEXT_BRACKETS + RULES,
            CONTEXT_BRACKETS: RULES_FOR_BRACKETS,
            CONTEXT_HEREDOC: RULES_FOR_HEREDOC,
            CONTEXT_BACKTICK: RULES_FOR_BACKTICK
         }

        self.runners_context = {}
        for context, rules in self.rules.items():

            base_dir = rpath.dirname(__file__)
            runner_name = 'runner_%s' % context

            try:
                lexer_runner = __import__(
                    'hippy.%s.%s' % ("lexer_cache", runner_name) , None, None,
                    ['recognize', 'automaton']
                )

                self.runners_context[context] = (
                    lexer_runner.recognize, lexer_runner.automaton)
            except ImportError:
                runner_file = rpath.join(base_dir,
                    ["lexer_cache", "%s.py" % runner_name])

                names, regexs = [], []
                for rule, name in rules:
                    names.append(name)
                    regexs.append(rule)

                rex = regex.LexingOrExpression(regexs, names)
                automaton = rex.make_automaton()
                automaton = automaton.make_deterministic(names)
                automaton.optimize()
                code = automaton.generate_lexing_code()
                open(runner_file, "w").write(code)

                exec py.code.Source(code).compile()
                self.runners_context[context] = (recognize, automaton)
Ejemplo n.º 7
0
    ('\(', '('),
    ('\)', ')'),
    ("\{", '{'),
    ("\}", '}'),
    ("\~", '~'),
    ("\@", '@'),
    ("\$", '$'),
    ("\"", '"'),
    ("`", '`'),
    ("\\n", 'H_NEW_LINE'),
    (r"\r\n", 'H_NEW_LINE'),
    ("\\t", 'H_TABULATURE'),
    (" ", 'H_WHITESPACE')
)

RULES = [(parse_regex(rule), name) for rule, name in _RULES]

_RULES_FOR_DOUBLE_QUOTE = (
    ("\$[a-zA-Z_][0-9a-zA-Z_]*(->[a-zA-Z_][0-9a-zA-Z_]*)?", 'T_VARIABLE'),
    (r"\{\$|\$\{", "T_DOLLAR_OPEN_CURLY_BRACES"),
    (r"([^\"\$\{\\]|\\.|\$[^a-zA-Z\"\{]|{[^\$])+", "T_ENCAPSED_AND_WHITESPACE"),
    (r"\$", "T_DOLLAR"),
    ('"', '"'),
)

RULES_FOR_DOUBLE_QUOTE = [(parse_regex(rule), name) for rule, name in _RULES_FOR_DOUBLE_QUOTE]

_RULES_FOR_BACKTICK = (
    (r"\{\$|\$\{", "T_DOLLAR_OPEN_CURLY_BRACES"),
    ("`", '`'),
    ("\}", '}'),
Ejemplo n.º 8
0
def make_lexer():
    return Lexer([parse_regex(globals()[r]) for r in tokens], tokens[:])