def create_parser(self, pickle_id = None): startrule = self.ast.children[1] # startrule grammar = startrule.children[1] parser = grammar.children[0] assert parser.symbol.name == "parser" self.parse_rules(parser) if self.implicit_ws(): ws_rule = Rule() ws_rule.symbol = Nonterminal("WS") ws_rule.add_alternative([Terminal("<ws>"), Nonterminal("WS")]) ws_rule.add_alternative([Terminal("<return>"), Nonterminal("WS")]) ws_rule.add_alternative([Terminal("<backslash>"), Terminal("<return>"), Nonterminal("WS")]) ws_rule.add_alternative([]) # or empty self.rules[ws_rule.symbol] = ws_rule # allow whitespace/comments at beginning of file start_rule = Rule() start_rule.symbol = Nonterminal("Startrule") start_rule.add_alternative([Nonterminal("WS"), self.start_symbol]) self.rules[start_rule.symbol] = start_rule self.start_symbol = start_rule.symbol incparser = IncParser() incparser.from_dict(self.rules, self.start_symbol, self.lr_type, self.implicit_ws(), pickle_id, self.precedences) incparser.init_ast() self.incparser = incparser
def test_input(): pytest.skip("deprecated") lrp = IncParser(grammar, LR1) assert lrp.inc_parse("1 * 2") == True lrp.get_ast().pprint() parse_root = lrp.ast_stack[0] assert lrp.inc_parse("1 + 2") == True lrp.stack[1].pprint() assert parse_root is lrp.ast_stack[0] assert False
def test_empty_start(): pytest.skip("deprecated") grammar = """ S ::= "a" | """ lrp = IncParser(grammar, LR1) assert lrp.inc_parse("") == True lrp.get_ast().pprint() assert lrp.inc_parse("a") == True lrp.stack[1].pprint() assert False
def get_parser_lexer_for_language(self, grammar, whitespaces): from grammar_parser.bootstrap import BootstrapParser from jsonmanager import JsonManager if isinstance(grammar, Language): incparser = IncParser(grammar.grammar, 1, whitespaces) incparser.init_ast() inclexer = IncrementalLexer(grammar.priorities) return incparser, inclexer elif isinstance(grammar, EcoFile): incparser, inclexer = grammar.load() return incparser, inclexer else: print("Grammar Error: could not determine grammar type") return
def set_language(self, lang, whitespace): if isinstance(lang, Language): lrp = IncParser(str(lang.grammar), 1, whitespace) lrp.init_ast() lexer = IncrementalLexer(str(lang.priorities)) self.editor.set_mainlanguage(lrp, lexer, lang.name) elif isinstance(lang, EcoGrammar): bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespace) bootstrap.parse(lang.grammar) self.editor.set_mainlanguage(bootstrap.incparser, bootstrap.inclexer, lang.name) elif isinstance(lang, EcoFile): incparser, inclexer = lang.load() self.editor.set_mainlanguage(incparser, inclexer, lang.name)
def set_language(self, lang, whitespace): if isinstance(lang, Language): grammar = str(lang.grammar) new_priorities = str(lang.priorities) lrp = IncParser(str(lang.grammar), 1, whitespace) lrp.init_ast() lexer = IncrementalLexer(str(lang.priorities)) self.editor.set_mainlanguage(lrp, lexer, lang.name) elif isinstance(lang, EcoGrammar): bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespace) bootstrap.parse(lang.grammar) self.editor.set_mainlanguage(bootstrap.incparser, bootstrap.inclexer, lang.name) elif isinstance(lang, EcoFile): incparser, inclexer = lang.load() self.editor.set_mainlanguage(incparser, inclexer, lang.name)
def parse(self, ecogrammar): # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco)) from grammars.eco_grammar import eco_grammar as grammar self.lexer = IncrementalLexer(grammar.priorities) self.parser = IncParser(grammar.grammar, 1, True) self.parser.init_ast() self.ast = self.parser.previous_version.parent self.treemanager = TreeManager() self.treemanager.add_parser(self.parser, self.lexer, grammar.name) self.treemanager.import_file(ecogrammar) if self.parser.last_status == False: raise Exception("Invalid input grammar due to syntax errors") self.read_options() self.parse_both() self.create_parser() self.create_lexer()
def test_multiple_changes_at_once(): pytest.skip("deprecated") grammar = """ S ::= C S | C C ::= "a" | "b" """ lrp = IncParser(grammar, LR1) lrp.check("a a a") lrp.previous_version = lrp.get_ast() ast = lrp.previous_version #root # bos # S # C a # S # C a # ... C = ast.parent.children[1].children[1].children[0] assert C.symbol == Nonterminal("C") assert C.children[0].symbol == Terminal("a") # put insertion into this Node changed_node = C.children[0] changed_node.symbol.name = "b b a" apply_change(lrp, changed_node) lrp.previous_version.pprint() lrp.inc_parse() lrp.stack[1].pprint() assert False
def test_incparser_ast(): pytest.skip("lrp.check is deprecated") lrp = IncParser(grammar) lrp.check("1 + 2 * 3") ast1 = lrp.get_ast() lrp.check("1 + 2 * 1") ast2 = lrp.get_ast() # reparsing should reuse parent node assert ast1.parent is ast2.parent
def load(self): from grammar_parser.bootstrap import BootstrapParser from jsonmanager import JsonManager if _cache.has_key(self.name + "::parser"): root, language, whitespaces = _cache[self.name + "::json"] # parse rules as they are needed by the incremental parser to # detect comments manager = JsonManager(unescape=True) root, language, whitespaces = manager.load(self.filename)[0] pickle_id = hash(self) bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces) bootstrap.ast = root bootstrap.parse_rules(root.children[1].children[1].children[0]) pickle_id, whitespace = _cache[self.name + "::parser"] from incparser.incparser import IncParser incparser = IncParser() incparser.from_dict(bootstrap.rules, None, None, whitespace, pickle_id, None) incparser.init_ast() inclexer = _cache[self.name + "::lexer"] incparser.lexer = inclexer # give parser a reference to its lexer (needed for multiline comments) return (incparser, inclexer) else: manager = JsonManager(unescape=True) root, language, whitespaces = manager.load(self.filename)[0] pickle_id = hash(self) bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces) bootstrap.ast = root bootstrap.extra_alternatives = self.alts bootstrap.change_startrule = self.extract bootstrap.read_options() bootstrap.parse_both() bootstrap.create_parser(pickle_id) bootstrap.create_lexer() whitespace = bootstrap.implicit_ws() _cache[self.name + "::lexer"] = bootstrap.inclexer _cache[self.name + "::json"] = (root, language, whitespaces) _cache[self.name + "::parser"] = (pickle_id, whitespace) bootstrap.incparser.lexer = bootstrap.inclexer return (bootstrap.incparser, bootstrap.inclexer)
def load(self): from grammar_parser.bootstrap import BootstrapParser from jsonmanager import JsonManager if _cache.has_key(self.name + "::parser"): root, language, whitespaces = _cache[self.name + "::json"] pickle_id, whitespace = _cache[self.name + "::parser"] from incparser.incparser import IncParser incparser = IncParser() incparser.from_dict(None, None, None, whitespace, pickle_id, None) incparser.init_ast() inclexer = _cache[self.name + "::lexer"] return (incparser, inclexer) else: manager = JsonManager(unescape=True) root, language, whitespaces = manager.load(self.filename)[0] pickle_id = hash(self) bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces) bootstrap.ast = root bootstrap.extra_alternatives = self.alts bootstrap.change_startrule = self.extract bootstrap.read_options() bootstrap.create_parser(pickle_id) whitespace = bootstrap.implicit_ws() bootstrap.create_lexer() _cache[self.name + "::lexer"] = bootstrap.inclexer _cache[self.name + "::json"] = (root, language, whitespaces) _cache[self.name + "::parser"] = (pickle_id, whitespace) return (bootstrap.incparser, bootstrap.inclexer)
def parse(self, ecogrammar): # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco)) from grammars.eco_grammar import eco_grammar as grammar self.lexer = IncrementalLexer(grammar.priorities) self.parser = IncParser(grammar.grammar, 1, True) self.parser.init_ast() self.ast = self.parser.previous_version.parent self.treemanager = TreeManager() self.treemanager.add_parser(self.parser, self.lexer, grammar.name) self.treemanager.import_file(ecogrammar) if self.parser.last_status == False: raise Exception("Invalid input grammar: at %s %s" % (self.parser.error_node.prev_term, self.parser.error_node)) self.read_options() self.create_parser() self.create_lexer()
def load(self, buildlexer=True): from grammar_parser.bootstrap import BootstrapParser from jsonmanager import JsonManager from incparser.incparser import IncParser if self.name + "::parser" in _cache: syntaxtable, whitespaces = _cache[self.name + "::parser"] incparser = IncParser() incparser.syntaxtable = syntaxtable incparser.whitespaces = whitespaces incparser.init_ast() incparser.lang = self.name inclexer = _cache[self.name + "::lexer"] incparser.lexer = inclexer # give parser a reference to its lexer (needed for multiline comments) incparser.previous_version.parent.name = self.name return (incparser, inclexer) else: manager = JsonManager(unescape=True) root, language, whitespaces = manager.load(self.filename)[0] bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces) bootstrap.ast = root bootstrap.extra_alternatives = self.alts bootstrap.change_startrule = self.extract bootstrap.read_options() whitespace = bootstrap.implicit_ws() pickle_id = self.pickleid(whitespace) bootstrap.parse_both() bootstrap.create_parser(pickle_id) bootstrap.create_lexer(buildlexer) _cache[self.name + "::lexer"] = bootstrap.inclexer _cache[self.name + "::json"] = (root, language, whitespaces) _cache[self.name + "::parser"] = (bootstrap.incparser.syntaxtable, whitespace) bootstrap.incparser.lang = self.name bootstrap.incparser.previous_version.parent.name = self.name bootstrap.incparser.lexer = bootstrap.inclexer return (bootstrap.incparser, bootstrap.inclexer)
def test_multiple_changes_3(): pytest.skip("deprecated") lrp = IncParser(grammar, LR1) lrp.check("1 + 2") lrp.previous_version = lrp.get_ast() ast = lrp.previous_version Viewer().show_tree(lrp.previous_version.parent.children[1]) i2 = ast.parent.children[1].children[1] assert i2.symbol == Terminal("+") i2.symbol.name = "*" apply_change(lrp, i2) lrp.inc_parse() lrp.stack[1].pprint() Viewer().show_tree(lrp.stack[1]) assert False
def load(self): from grammar_parser.bootstrap import BootstrapParser from jsonmanager import JsonManager if _cache.has_key(self.name + "::parser"): root, language, whitespaces = _cache[self.name + "::json"] # parse rules as they are needed by the incremental parser to # detect comments manager = JsonManager(unescape=True) root, language, whitespaces = manager.load(self.filename)[0] pickle_id = hash(self) bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces) bootstrap.ast = root bootstrap.parse_rules(root.children[1].children[1].children[0]) pickle_id, whitespace = _cache[self.name + "::parser"] from incparser.incparser import IncParser incparser = IncParser() incparser.from_dict(bootstrap.rules, None, None, whitespace, pickle_id, None) incparser.init_ast() inclexer = _cache[self.name + "::lexer"] incparser.lexer = inclexer # give parser a reference to its lexer (needed for multiline comments) return (incparser, inclexer) else: manager = JsonManager(unescape=True) root, language, whitespaces = manager.load(self.filename)[0] pickle_id = hash(self) bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces) bootstrap.ast = root bootstrap.extra_alternatives = self.alts bootstrap.change_startrule = self.extract bootstrap.read_options() bootstrap.create_parser(pickle_id) whitespace = bootstrap.implicit_ws() bootstrap.create_lexer() _cache[self.name + "::lexer"] = bootstrap.inclexer _cache[self.name + "::json"] = (root, language, whitespaces) _cache[self.name + "::parser"] = (pickle_id, whitespace) bootstrap.incparser.lexer = bootstrap.inclexer return (bootstrap.incparser, bootstrap.inclexer)
def test_deletion(): pytest.skip("deprecated") grammar = """ S ::= C S | C C ::= "a" | "b" """ lrp = IncParser(grammar, LR1) lrp.check("a a a") lrp.previous_version = lrp.get_ast() ast = lrp.previous_version Viewer().show_tree(lrp.previous_version.parent) C = ast.parent.children[1].children[1].children[0] assert C.symbol == Nonterminal("C") assert C.children[0].symbol == Terminal("a") # delete terminal node C.children.pop(0) apply_change(lrp, C) lrp.inc_parse() Viewer().show_tree(lrp.stack[1]) assert False
class BootstrapParser(object): def __init__(self, lr_type=1, whitespaces=False): self.lr_type = lr_type self.whitespaces = whitespaces # load (old) parser for grammar grammar self.rules = {} self.lrules = [] self.start_symbol = None self.incparser = None self.inclexer = None self.terminals = set() self.extra_alternatives = {} self.change_startrule = None self.options = {"nowhitespace": []} self.precedences = [] self.current_rulename = "" self.all_terminals = set() self.functions = [] self.prod_ids = {} def implicit_ws(self): if self.options.has_key("implicit_ws"): if self.options["implicit_ws"] == "true": return True return False def implicit_newlines(self): if self.options.has_key("implicit_newlines"): if self.options["implicit_newlines"] == "false": return False return True def indentation_based(self): if self.options.has_key("indentation"): if self.options["indentation"] == "true": return True return False def parse(self, ecogrammar): # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco)) from grammars.eco_grammar import eco_grammar as grammar self.lexer = IncrementalLexer(grammar.priorities) self.parser = IncParser(grammar.grammar, 1, True) self.parser.init_ast() self.ast = self.parser.previous_version.parent self.treemanager = TreeManager() self.treemanager.add_parser(self.parser, self.lexer, grammar.name) self.treemanager.import_file(ecogrammar) if self.parser.last_status == False: raise Exception("Invalid input grammar due to syntax errors") self.read_options() self.parse_both() self.create_parser() self.create_lexer() def parse_both(self): # parse rules startrule = self.ast.children[1] # startrule grammar = startrule.children[1] parser = grammar.children[0] assert parser.symbol.name == "parser" self.parse_rules(parser) # parse lexer startrule = self.ast.children[1] # startrule grammar = startrule.children[1] for element in grammar.children: if element.symbol.name == "lexer": break lexer = element assert lexer.symbol.name == "lexer" self.parse_lexer(lexer) for name, regex in self.lrules: # collect terminals for parser modifications self.all_terminals.add(name) def read_options(self): startrule = self.ast.children[1] # startrule assert startrule.symbol.name == "Startrule" grammar = startrule.children[1] assert grammar.symbol.name == "grammar" for element in grammar.children: if element.symbol.name == "options": break if element.symbol.name != "options": # grammar has no options print("warning: grammar has no options") # backwards compatibility if self.whitespaces: self.options["implicit_ws"] = "true" return options = element assert options.symbol.name == "options" self.parse_options(options) def parse_options(self, options): if options.children == []: return if len(options.children) > 0: assert options.children[0].symbol.name == "settings" self.parse_settings(options.children[0]) if len(options.children) > 1: assert options.children[1].symbol.name == "precedences" self.parse_precedences(options.children[1]) def parse_settings(self, options): if options.children == []: return if len(options.children) == 2: more = options.children[0] self.parse_settings(more) option = options.children[1] else: option = options.children[0] name = option.children[2].symbol.name choice = option.children[6] assert choice.symbol.name == "choice" if choice.children[0].symbol.name == "choice_list": self.options[name] = self.parse_choicelist(choice.children[0]) else: self.options[name] = choice.children[0].symbol.name def parse_choicelist(self, symbol): s = [] for c in symbol.children: if c.symbol.name == ",": continue if c.symbol.name == "WS": continue if c.lookup == "nonterminal": s.append(c.symbol.name) continue if c.symbol.name == "choice_list": rec_s = self.parse_choicelist(symbol.children[0]) s.extend(rec_s) continue return s def parse_precedences(self, precedences): if precedences.children == []: return # recursively parse other precedences if len(precedences.children) == 2: more = precedences.children[0] self.parse_precedences(more) precedence = precedences.children[1] else: precedence = precedences.children[0] # parse single precedence name = precedence.children[0].symbol.name terminals = self.parse_precedence_symbols(precedence.children[2]) self.precedences.append((name, terminals)) def parse_precedence_symbols(self, symbol): s = [] for c in symbol.children: if c.symbol.name == "WS": continue if c.symbol.name == "terminals": rec_s = self.parse_precedence_symbols(symbol.children[0]) s.extend(rec_s) if c.lookup == "terminal": s.append(c.symbol.name[1:-1]) return s def create_parser(self, pickle_id=None): self.all_terminals.update(self.terminals) for fname, terminals, parentrule in self.functions: if fname.startswith("*match_until"): if Nonterminal(fname) not in self.rules: r = Rule(Nonterminal(fname)) for t in self.all_terminals: if t not in terminals: r.add_alternative( [Nonterminal(fname), Terminal(t)], None, t) r.add_alternative([]) self.rules[r.symbol] = r # remove whitespace before special rule from parent rule, e.g. # multistring ::= "MLS" WS *match_until "MLS" WS # ^ this WS causes shift/reduce conflicts prule = self.rules[Nonterminal(parentrule)] for a in prule.alternatives: for i in range(len(a)): sym = a[i] if sym.name == "WS": if len(a) > i + 1 and a[i + 1].name.startswith( "*match_until"): a.pop(i) break if self.implicit_ws(): ws_rule = Rule() ws_rule.symbol = Nonterminal("WS") ws_rule.add_alternative([Nonterminal("WS"), Terminal("<ws>")]) # get comment rule if self.options.has_key('comment_rule'): cmt_rules = self.options['comment_rule'] for cmt_rule in cmt_rules: if Nonterminal(cmt_rule) in self.rules: ws_rule.add_alternative( [Nonterminal("WS"), Nonterminal("comment")]) if self.implicit_newlines(): ws_rule.add_alternative( [Nonterminal("WS"), Terminal("<return>")]) ws_rule.add_alternative([ Nonterminal("WS"), Terminal("<backslash>"), Terminal("<return>") ]) ws_rule.add_alternative([]) # or empty self.rules[ws_rule.symbol] = ws_rule for a in ws_rule.alternatives: self.prod_ids[Production(ws_rule.symbol, a)] = len(self.prod_ids) # allow whitespace/comments at beginning of file start_rule = Rule() start_rule.symbol = Nonterminal("Startrule") start_rule.add_alternative([Nonterminal("WS"), self.start_symbol]) self.rules[start_rule.symbol] = start_rule self.prod_ids[Production(start_rule.symbol, start_rule.alternatives[0])] = len( self.prod_ids) self.start_symbol = start_rule.symbol incparser = IncParser() incparser.from_dict(self.rules, self.start_symbol, self.lr_type, self.implicit_ws(), pickle_id, self.precedences, self.prod_ids) incparser.init_ast() self.incparser = incparser def parse_rules(self, node): if node.children[0].symbol.name == "parser": self.parse_rules(node.children[0]) self.parse_rule(node.children[3]) elif node.children[0].symbol.name == "rule": self.parse_rule(node.children[0]) def parse_rule(self, node): name = node.children[0].symbol.name self.current_rulename = name alternatives = self.parse_alternatives(node.children[4]) symbol = Nonterminal(name) if self.start_symbol is None: self.start_symbol = symbol if self.change_startrule and symbol.name == self.change_startrule: self.start_symbol = symbol r = Rule(symbol) for a in alternatives: r.add_alternative(a[0], a[1], a[2]) self.prod_ids[Production(symbol, a[0])] = len(self.prod_ids) # add additional alternatives to the grammar (grammar extension feature, e.g. languageboxes) if self.extra_alternatives.has_key(symbol.name): for n in self.extra_alternatives[symbol.name]: a = [MagicTerminal(n), Nonterminal("WS")] r.add_alternative(a) self.prod_ids[Production(symbol, a)] = len(self.prod_ids) self.rules[symbol] = r def parse_alternatives(self, node): if node.children[0].symbol.name == "alternatives": alternatives = self.parse_alternatives(node.children[0]) alternative = self.parse_alternative(node.children[3]) alternatives.append(alternative) return alternatives elif node.children[0].symbol.name == "right": return [self.parse_alternative(node.children[0])] def parse_alternative(self, node): if len(node.children) > 0: annotation = None prec = None for c in node.children: if c.symbol.name == "symbols": symbols = self.parse_symbols(c) if c.symbol.name == "prec": prec = self.parse_prec(c) if c.symbol.name == "annotations": annotation = self.parse_annotation(c) return (symbols, annotation, prec) else: return ([], None, None) def parse_prec(self, node): if node.children: c = node.children[2] return c.symbol.name[1:-1] def parse_symbols(self, node): if node.children[0].symbol.name == "symbols": symbols = self.parse_symbols(node.children[0]) symbol = self.parse_symbol(node.children[1]) symbols.append(symbol) if ( isinstance(symbol, Terminal) or isinstance(symbol, MagicTerminal) ) and self.implicit_ws( ) and self.current_rulename not in self.options["nowhitespace"]: symbols.append(Nonterminal("WS")) return symbols elif node.children[0].symbol.name == "symbol": l = [] symbol = self.parse_symbol(node.children[0]) l.append(symbol) if isinstance(symbol, Terminal) and self.implicit_ws( ) and self.current_rulename not in self.options["nowhitespace"]: l.append(Nonterminal("WS")) return l def parse_symbol(self, node): node = node.children[0] if node.lookup == "nonterminal": return Nonterminal(node.symbol.name) elif node.lookup == "terminal": if node.symbol.name != "\"<eos>\"": self.terminals.add(node.symbol.name[1:-1]) return Terminal(node.symbol.name[1:-1]) elif node.lookup == "languagebox": return MagicTerminal(node.symbol.name) elif node.symbol.name == "function": return self.parse_function(node) def parse_function(self, node): fname = node.children[0].symbol.name terminals = self.parse_fargs(node.children[4]) safe_name = "*%s%s" % (fname, hash(frozenset(terminals))) self.functions.append((safe_name, terminals, self.current_rulename)) return Nonterminal(safe_name) def parse_fargs(self, symbol): s = [] for c in symbol.children: if c.symbol.name == ",": continue if c.symbol.name == "WS": continue if c.lookup == "terminal": s.append(c.symbol.name[1:-1]) continue if c.symbol.name == "f_args": rec_s = self.parse_fargs(symbol.children[0]) s.extend(rec_s) return s def parse_annotation(self, node): a_options = node.children[2] assert a_options.symbol.name == "a_options" if a_options.children[0].symbol.name == "astnode": return self.parse_astnode(a_options.children[0]) elif a_options.children[0].symbol.name == "expression": return self.parse_expression(a_options.children[0]) elif a_options.children[0].symbol.name == "forloop": return self.parse_foreach(a_options.children[0]) def parse_astnode(self, node): name = node.children[0].symbol.name children = self.parse_astnode_children(node.children[4]) d = {} for n, expr in children: d[n] = expr return AstNode(name, d) def parse_astnode_children(self, node): assert node.symbol.name == "astnode_children" if node.children[0].symbol.name == "astnode_child": return [self.parse_astnode_child(node.children[0])] elif node.children[0].symbol.name == "astnode_children": children = self.parse_astnode_children(node.children[0]) child = self.parse_astnode_child(node.children[3]) children.append(child) return children def parse_astnode_child(self, node): assert node.symbol.name == "astnode_child" name = node.children[0].symbol.name if node.children[4].symbol.name == "expression": expr = self.parse_expression(node.children[4]) elif node.children[4].symbol.name == "reference": expr = self.parse_reference(node.children[4]) return (name, expr) def parse_expression(self, node): if node.children[0].symbol.name == "node": return self.parse_node(node.children[0]) elif node.children[0].symbol.name == "list": return self.parse_list(node.children[0]) elif node.children[0].symbol.name == "node_ref": return self.parse_noderef(node.children[0]) else: expr1 = self.parse_expression(node.children[0]) if node.children[3].symbol.name == "node": expr2 = self.parse_node(node.children[3]) else: expr2 = self.parse_list(node.children[3]) return AddExpr(expr1, expr2) def parse_foreach(self, node): item = self.parse_node(node.children[4]) expr = self.parse_astnode(node.children[7]) return Foreach(node.symbol.name, item, expr) def parse_noderef(self, node): lookup = self.parse_node(node.children[0]) attr = node.children[3] lookup.attribute = attr.symbol.name return lookup def parse_node(self, node): return LookupExpr(int(node.children[2].symbol.name)) def parse_list(self, node): return ListExpr(self.parse_listloop(node.children[2])) def parse_reference(self, node): base = node.children[0].symbol.name ref = node.children[4].symbol.name return ReferenceExpr(base, ref) def parse_listloop(self, node): if len(node.children) == 0: return [] if node.children[0].symbol.name == "list_loop": l = self.parse_listloop(node.children[0]) element = self.parse_unknown(node.children[3]) l.append(element) return l else: return [self.parse_unknown(node.children[0])] def parse_unknown(self, node): if node.symbol.name == "node": return self.parse_node(node) elif node.symbol.name == "astnode": return self.parse_astnode(node) def create_lexer(self, buildlexer=True): names = [] regexs = [] for name, regex in self.lrules: names.append(name) self.all_terminals.add(name) regexs.append(regex) # add so far undefined terminals undefined_terminals = self.terminals.difference(set(names)) import re for t in undefined_terminals: names.insert(0, t) regexs.insert(0, re.escape(t)) if not buildlexer: self.inclexer = (names, regexs) return self.inclexer = IncrementalLexerCF() self.inclexer.from_name_and_regex(names, regexs) if self.indentation_based(): self.inclexer.indentation_based = True def parse_lexer(self, lexer): if lexer.children[0].symbol.name == "lrule": self.parse_lrule(lexer.children[0]) elif lexer.children[0].symbol.name == "lexer": self.parse_lexer(lexer.children[0]) self.parse_lrule(lexer.children[1]) def parse_lrule(self, lrule): assert lrule.children[0].symbol.name == "tokenname" name = lrule.children[0].children[0].symbol.name regex = lrule.children[3].symbol.name[1:-1] self.lrules.append((name, regex))
def create_parser(self, pickle_id=None): self.all_terminals.update(self.terminals) for fname, terminals, parentrule in self.functions: if fname.startswith("*match_until"): if Nonterminal(fname) not in self.rules: r = Rule(Nonterminal(fname)) for t in self.all_terminals: if t not in terminals: r.add_alternative( [Nonterminal(fname), Terminal(t)], None, t) r.add_alternative([]) self.rules[r.symbol] = r # remove whitespace before special rule from parent rule, e.g. # multistring ::= "MLS" WS *match_until "MLS" WS # ^ this WS causes shift/reduce conflicts prule = self.rules[Nonterminal(parentrule)] for a in prule.alternatives: for i in range(len(a)): sym = a[i] if sym.name == "WS": if len(a) > i + 1 and a[i + 1].name.startswith( "*match_until"): a.pop(i) break if self.implicit_ws(): ws_rule = Rule() ws_rule.symbol = Nonterminal("WS") ws_rule.add_alternative([Nonterminal("WS"), Terminal("<ws>")]) # get comment rule if self.options.has_key('comment_rule'): cmt_rules = self.options['comment_rule'] for cmt_rule in cmt_rules: if Nonterminal(cmt_rule) in self.rules: ws_rule.add_alternative( [Nonterminal("WS"), Nonterminal("comment")]) if self.implicit_newlines(): ws_rule.add_alternative( [Nonterminal("WS"), Terminal("<return>")]) ws_rule.add_alternative([ Nonterminal("WS"), Terminal("<backslash>"), Terminal("<return>") ]) ws_rule.add_alternative([]) # or empty self.rules[ws_rule.symbol] = ws_rule for a in ws_rule.alternatives: self.prod_ids[Production(ws_rule.symbol, a)] = len(self.prod_ids) # allow whitespace/comments at beginning of file start_rule = Rule() start_rule.symbol = Nonterminal("Startrule") start_rule.add_alternative([Nonterminal("WS"), self.start_symbol]) self.rules[start_rule.symbol] = start_rule self.prod_ids[Production(start_rule.symbol, start_rule.alternatives[0])] = len( self.prod_ids) self.start_symbol = start_rule.symbol incparser = IncParser() incparser.from_dict(self.rules, self.start_symbol, self.lr_type, self.implicit_ws(), pickle_id, self.precedences, self.prod_ids) incparser.init_ast() self.incparser = incparser
# IN THE SOFTWARE. """Program for quick viewing the AST of a given program using a given annotated grammar""" from viewer import Viewer from grammars.grammars import python275_annotated from treemanager import TreeManager from incparser.incparser import IncParser from inclexer.inclexer import IncrementalLexer from incparser.astree import BOS, EOS grammar = python275_annotated whitespace = True lexer = IncrementalLexer(grammar.priorities) parser = IncParser(grammar.grammar, 1, whitespace) parser.init_ast() ast = parser.previous_version treemanager = TreeManager() treemanager.add_parser(parser, lexer, grammar.name) treemanager.set_font_test(7, 17) # hard coded. PyQt segfaults in test suite inputstring = """import abc.xyz as efg from x import z class Test: def x(): if x == 1: z = 3 + 4 * 5 elif x == 2: for x in range(2,10):
class BootstrapParser(object): def __init__(self, lr_type=1, whitespaces=False): self.lr_type = lr_type self.whitespaces = whitespaces # load (old) parser for grammar grammar self.rules = {} self.lrules = [] self.start_symbol = None self.incparser = None self.inclexer = None self.terminals = set() self.extra_alternatives = {} self.change_startrule = None self.options = {} self.precedences = [] def implicit_ws(self): if self.options.has_key("implicit_ws"): if self.options["implicit_ws"] == "true": return True return False def indentation_based(self): if self.options.has_key("indentation"): if self.options["indentation"] == "true": return True return False def parse(self, ecogrammar): # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco)) from grammars.eco_grammar import eco_grammar as grammar self.lexer = IncrementalLexer(grammar.priorities) self.parser = IncParser(grammar.grammar, 1, True) self.parser.init_ast() self.ast = self.parser.previous_version.parent self.treemanager = TreeManager() self.treemanager.add_parser(self.parser, self.lexer, grammar.name) self.treemanager.import_file(ecogrammar) if self.parser.last_status == False: raise Exception("Invalid input grammar: at %s %s" % (self.parser.error_node.prev_term, self.parser.error_node)) self.read_options() self.create_parser() self.create_lexer() def read_options(self): startrule = self.ast.children[1] # startrule assert startrule.symbol.name == "Startrule" grammar = startrule.children[1] assert grammar.symbol.name == "grammar" for element in grammar.children: if element.symbol.name == "options": break if element.symbol.name != "options": # grammar has no options print("warning: grammar has no options") # backwards compatibility if self.whitespaces: self.options["implicit_ws"] = "true" return options = element assert options.symbol.name == "options" self.parse_options(options) def parse_options(self, options): if options.children == []: return if len(options.children) > 0: assert options.children[0].symbol.name == "settings" self.parse_settings(options.children[0]) if len(options.children) > 1: assert options.children[1].symbol.name == "precedences" self.parse_precedences(options.children[1]) def parse_settings(self, options): if options.children == []: return if len(options.children) == 2: more = options.children[0] self.parse_settings(more) option = options.children[1] else: option = options.children[0] name = option.children[2].symbol.name choice = option.children[6] assert choice.symbol.name == "choice" self.options[name] = choice.children[0].symbol.name def parse_precedences(self, precedences): if precedences.children == []: return # recursively parse other precedences if len(precedences.children) == 2: more = precedences.children[0] self.parse_precedences(more) precedence = precedences.children[1] else: precedence = precedences.children[0] # parse single precedence name = precedence.children[0].symbol.name terminals = self.parse_precedence_symbols(precedence.children[2]) self.precedences.append((name, terminals)) def parse_precedence_symbols(self, symbol): s = [] for c in symbol.children: if c.symbol.name == "WS": continue if c.symbol.name == "terminals": rec_s = self.parse_precedence_symbols(symbol.children[0]) s.extend(rec_s) if c.lookup == "terminal": s.append(c.symbol.name[1:-1]) return s def create_parser(self, pickle_id = None): startrule = self.ast.children[1] # startrule grammar = startrule.children[1] parser = grammar.children[0] assert parser.symbol.name == "parser" self.parse_rules(parser) if self.implicit_ws(): ws_rule = Rule() ws_rule.symbol = Nonterminal("WS") ws_rule.add_alternative([Terminal("<ws>"), Nonterminal("WS")]) ws_rule.add_alternative([Terminal("<return>"), Nonterminal("WS")]) ws_rule.add_alternative([Terminal("<backslash>"), Terminal("<return>"), Nonterminal("WS")]) ws_rule.add_alternative([]) # or empty self.rules[ws_rule.symbol] = ws_rule # allow whitespace/comments at beginning of file start_rule = Rule() start_rule.symbol = Nonterminal("Startrule") start_rule.add_alternative([Nonterminal("WS"), self.start_symbol]) self.rules[start_rule.symbol] = start_rule self.start_symbol = start_rule.symbol incparser = IncParser() incparser.from_dict(self.rules, self.start_symbol, self.lr_type, self.implicit_ws(), pickle_id, self.precedences) incparser.init_ast() self.incparser = incparser def parse_rules(self, node): if node.children[0].symbol.name == "parser": self.parse_rules(node.children[0]) self.parse_rule(node.children[3]) elif node.children[0].symbol.name == "rule": self.parse_rule(node.children[0]) def parse_rule(self, node): name = node.children[0].symbol.name alternatives = self.parse_alternatives(node.children[4]) symbol = Nonterminal(name) if self.start_symbol is None: self.start_symbol = symbol if self.change_startrule and symbol.name == self.change_startrule: self.start_symbol = symbol r = Rule(symbol) for a in alternatives: r.add_alternative(a[0], a[1], a[2]) # add additional alternatives to the grammar (grammar extension feature, e.g. languageboxes) if self.extra_alternatives.has_key(symbol.name): for n in self.extra_alternatives[symbol.name]: r.add_alternative([MagicTerminal(n), Nonterminal("WS")], None) self.rules[symbol] = r def parse_alternatives(self, node): if node.children[0].symbol.name == "alternatives": alternatives = self.parse_alternatives(node.children[0]) alternative = self.parse_alternative(node.children[3]) alternatives.append(alternative) return alternatives elif node.children[0].symbol.name == "right": return [self.parse_alternative(node.children[0])] def parse_alternative(self, node): if len(node.children) > 0: annotation = None prec = None for c in node.children: if c.symbol.name == "symbols": symbols = self.parse_symbols(c) if c.symbol.name == "prec": prec = self.parse_prec(c) if c.symbol.name == "annotations": annotation = self.parse_annotation(c) return (symbols, annotation, prec) else: return ([], None, None) def parse_prec(self, node): if node.children: c = node.children[2] return c.symbol.name[1:-1] def parse_symbols(self, node): if node.children[0].symbol.name == "symbols": symbols = self.parse_symbols(node.children[0]) symbol = self.parse_symbol(node.children[1]) symbols.append(symbol) if (isinstance(symbol, Terminal) or isinstance(symbol, MagicTerminal)) and self.implicit_ws(): symbols.append(Nonterminal("WS")) return symbols elif node.children[0].symbol.name == "symbol": l = [] symbol = self.parse_symbol(node.children[0]) l.append(symbol) if isinstance(symbol, Terminal) and self.implicit_ws(): l.append(Nonterminal("WS")) return l def parse_symbol(self, node): node = node.children[0] if node.lookup == "nonterminal": return Nonterminal(node.symbol.name) elif node.lookup == "terminal": self.terminals.add(node.symbol.name[1:-1]) return Terminal(node.symbol.name[1:-1]) elif node.lookup == "languagebox": return MagicTerminal(node.symbol.name) def parse_annotation(self, node): a_options = node.children[2] assert a_options.symbol.name == "a_options" if a_options.children[0].symbol.name == "astnode": return self.parse_astnode(a_options.children[0]) elif a_options.children[0].symbol.name == "expression": return self.parse_expression(a_options.children[0]) elif a_options.children[0].symbol.name == "forloop": return self.parse_foreach(a_options.children[0]) def parse_astnode(self, node): name = node.children[0].symbol.name children = self.parse_astnode_children(node.children[4]) d = {} for n, expr in children: d[n] = expr return AstNode(name, d) def parse_astnode_children(self, node): assert node.symbol.name == "astnode_children" if node.children[0].symbol.name == "astnode_child": return [self.parse_astnode_child(node.children[0])] elif node.children[0].symbol.name == "astnode_children": children = self.parse_astnode_children(node.children[0]) child = self.parse_astnode_child(node.children[3]) children.append(child) return children def parse_astnode_child(self, node): assert node.symbol.name == "astnode_child" name = node.children[0].symbol.name if node.children[4].symbol.name == "expression": expr = self.parse_expression(node.children[4]) elif node.children[4].symbol.name == "reference": expr = self.parse_reference(node.children[4]) return (name, expr) def parse_expression(self, node): if node.children[0].symbol.name == "node": return self.parse_node(node.children[0]) elif node.children[0].symbol.name == "list": return self.parse_list(node.children[0]) elif node.children[0].symbol.name == "node_ref": return self.parse_noderef(node.children[0]) else: expr1 = self.parse_expression(node.children[0]) if node.children[3].symbol.name == "node": expr2 = self.parse_node(node.children[3]) else: expr2 = self.parse_list(node.children[3]) return AddExpr(expr1, expr2) def parse_foreach(self, node): item = self.parse_node(node.children[4]) expr = self.parse_astnode(node.children[7]) return Foreach(node.symbol.name, item, expr) def parse_noderef(self, node): lookup = self.parse_node(node.children[0]) attr = node.children[3] lookup.attribute = attr.symbol.name return lookup def parse_node(self, node): return LookupExpr(int(node.children[2].symbol.name)) def parse_list(self, node): return ListExpr(self.parse_listloop(node.children[2])) def parse_reference(self, node): base = node.children[0].symbol.name ref = node.children[4].symbol.name return ReferenceExpr(base, ref) def parse_listloop(self, node): if len(node.children) == 0: return [] if node.children[0].symbol.name == "list_loop": l = self.parse_listloop(node.children[0]) element = self.parse_unknown(node.children[3]) l.append(element) return l else: return [self.parse_unknown(node.children[0])] def parse_unknown(self, node): if node.symbol.name == "node": return self.parse_node(node) elif node.symbol.name == "astnode": return self.parse_astnode(node) def create_lexer(self): startrule = self.ast.children[1] # startrule grammar = startrule.children[1] for element in grammar.children: if element.symbol.name == "lexer": break lexer = element assert lexer.symbol.name == "lexer" self.parse_lexer(lexer) names = [] regexs = [] for name, regex in self.lrules: names.append(name) regexs.append(regex) # add so far undefined terminals undefined_terminals = self.terminals.difference(set(names)) import re for t in undefined_terminals: names.insert(0, t) regexs.insert(0,re.escape(t)) self.inclexer = IncrementalLexerCF() self.inclexer.from_name_and_regex(names, regexs) if self.indentation_based(): self.inclexer.indentation_based = True def parse_lexer(self, lexer): if lexer.children[0].symbol.name == "lrule": self.parse_lrule(lexer.children[0]) elif lexer.children[0].symbol.name == "lexer": self.parse_lexer(lexer.children[0]) self.parse_lrule(lexer.children[1]) def parse_lrule(self, lrule): assert lrule.children[0].symbol.name == "tokenname" name = lrule.children[0].children[0].symbol.name regex = lrule.children[3].symbol.name[1:-1] self.lrules.append((name, regex))
def setup_class(cls): cls.lexer = IncrementalLexer(calc1.priorities) cls.parser = IncParser(calc1.grammar, 1, True) cls.parser.init_ast() cls.ast = cls.parser.previous_version