def normalize_keywords(keywords): ret = [] for k, t in keywords: rule = "".join( [group(a.lower(), a.upper()) if a.isalpha() else a for a in k]) ret.append((parse_regex(rule), t)) return ret
def normalize_keywords(keywords): ret = [] for k, t in keywords: rule = "".join([ group(a.lower(), a.upper()) if a.isalpha() else a for a in k ]) ret.append((parse_regex(rule), t)) return ret
def __init__(self): self.token_class = Token RULES_FOR_CONTEXT_BRACKETS = [(parse_regex("[a-zA-Z_][a-zA-Z_0-9]*"), 'T_VARIABLE')] self.rules = { CONTEXT_NORMAL: KEYWORDS + RULES, CONTEXT_OBJECT_ACCESS: RULES, CONTEXT_DOUBLEQUOTE: RULES_FOR_DOUBLE_QUOTE, CONTEXT_CURLY_BRACES: KEYWORDS + RULES_FOR_CONTEXT_BRACKETS + RULES, CONTEXT_BRACKETS: RULES_FOR_BRACKETS, CONTEXT_HEREDOC: RULES_FOR_HEREDOC, CONTEXT_BACKTICK: RULES_FOR_BACKTICK } self.runners_context = {} for context, rules in self.rules.items(): base_dir = rpath.dirname(__file__) runner_name = 'runner_%s' % context try: lexer_runner = __import__( 'hippy.%s.%s' % ("lexer_cache", runner_name), None, None, ['recognize', 'automaton']) if getattr(lexer_runner, "rulehash", 0) != hash(str(rules)): raise ImportError # hack self.runners_context[context] = (lexer_runner.recognize, lexer_runner.automaton) except ImportError: runner_file = rpath.join( base_dir, ["lexer_cache", "%s.py" % runner_name]) names, regexs = [], [] for rule, name in rules: names.append(name) regexs.append(rule) rex = regex.LexingOrExpression(regexs, names) automaton = rex.make_automaton() automaton = automaton.make_deterministic(names) automaton.optimize() code = automaton.generate_lexing_code() with open(runner_file, "w") as f: f.write(code) f.write("\n\nrulehash = %s" % hash(str(rules))) exec py.code.Source(code).compile() self.runners_context[context] = (recognize, automaton)
def get_regex(self, r): from rpython.rlib.parsing.regexparse import parse_regex if r in self.matchers: return self.matchers[r] regex = parse_regex(r) if regex is None: raise ValueError( "%s is not a valid regular expression" % regextext) automaton = regex.make_automaton().make_deterministic() automaton.optimize() matcher = automaton.make_lexing_code() self.matchers[r] = py.code.Source(matcher) return matcher
def __init__(self): self.token_class = Token RULES_FOR_CONTEXT_BRACKETS = [ (parse_regex("[a-zA-Z_][a-zA-Z_0-9]*"), 'T_VARIABLE') ] self.rules = { CONTEXT_NORMAL: KEYWORDS + RULES, CONTEXT_OBJECT_ACCESS: RULES, CONTEXT_DOUBLEQUOTE: RULES_FOR_DOUBLE_QUOTE, CONTEXT_CURLY_BRACES: KEYWORDS + RULES_FOR_CONTEXT_BRACKETS + RULES, CONTEXT_BRACKETS: RULES_FOR_BRACKETS, CONTEXT_HEREDOC: RULES_FOR_HEREDOC, CONTEXT_BACKTICK: RULES_FOR_BACKTICK } self.runners_context = {} for context, rules in self.rules.items(): base_dir = rpath.dirname(__file__) runner_name = 'runner_%s' % context try: lexer_runner = __import__( 'hippy.%s.%s' % ("lexer_cache", runner_name) , None, None, ['recognize', 'automaton'] ) self.runners_context[context] = ( lexer_runner.recognize, lexer_runner.automaton) except ImportError: runner_file = rpath.join(base_dir, ["lexer_cache", "%s.py" % runner_name]) names, regexs = [], [] for rule, name in rules: names.append(name) regexs.append(rule) rex = regex.LexingOrExpression(regexs, names) automaton = rex.make_automaton() automaton = automaton.make_deterministic(names) automaton.optimize() code = automaton.generate_lexing_code() open(runner_file, "w").write(code) exec py.code.Source(code).compile() self.runners_context[context] = (recognize, automaton)
('\(', '('), ('\)', ')'), ("\{", '{'), ("\}", '}'), ("\~", '~'), ("\@", '@'), ("\$", '$'), ("\"", '"'), ("`", '`'), ("\\n", 'H_NEW_LINE'), (r"\r\n", 'H_NEW_LINE'), ("\\t", 'H_TABULATURE'), (" ", 'H_WHITESPACE') ) RULES = [(parse_regex(rule), name) for rule, name in _RULES] _RULES_FOR_DOUBLE_QUOTE = ( ("\$[a-zA-Z_][0-9a-zA-Z_]*(->[a-zA-Z_][0-9a-zA-Z_]*)?", 'T_VARIABLE'), (r"\{\$|\$\{", "T_DOLLAR_OPEN_CURLY_BRACES"), (r"([^\"\$\{\\]|\\.|\$[^a-zA-Z\"\{]|{[^\$])+", "T_ENCAPSED_AND_WHITESPACE"), (r"\$", "T_DOLLAR"), ('"', '"'), ) RULES_FOR_DOUBLE_QUOTE = [(parse_regex(rule), name) for rule, name in _RULES_FOR_DOUBLE_QUOTE] _RULES_FOR_BACKTICK = ( (r"\{\$|\$\{", "T_DOLLAR_OPEN_CURLY_BRACES"), ("`", '`'), ("\}", '}'),
def make_lexer(): return Lexer([parse_regex(globals()[r]) for r in tokens], tokens[:])