Beispiel #1
0
    def create_parser(self, pickle_id = None):
        startrule = self.ast.children[1] # startrule
        grammar = startrule.children[1]
        parser = grammar.children[0]
        assert parser.symbol.name == "parser"
        self.parse_rules(parser)

        if self.implicit_ws():
            ws_rule = Rule()
            ws_rule.symbol = Nonterminal("WS")
            ws_rule.add_alternative([Terminal("<ws>"), Nonterminal("WS")])
            ws_rule.add_alternative([Terminal("<return>"), Nonterminal("WS")])
            ws_rule.add_alternative([Terminal("<backslash>"), Terminal("<return>"), Nonterminal("WS")])
            ws_rule.add_alternative([]) # or empty
            self.rules[ws_rule.symbol] = ws_rule

            # allow whitespace/comments at beginning of file
            start_rule = Rule()
            start_rule.symbol = Nonterminal("Startrule")
            start_rule.add_alternative([Nonterminal("WS"), self.start_symbol])
            self.rules[start_rule.symbol] = start_rule
            self.start_symbol = start_rule.symbol

        incparser = IncParser()
        incparser.from_dict(self.rules, self.start_symbol, self.lr_type, self.implicit_ws(), pickle_id, self.precedences)
        incparser.init_ast()
        self.incparser = incparser
Beispiel #2
0
def test_input():
    pytest.skip("deprecated")
    lrp = IncParser(grammar, LR1)
    assert lrp.inc_parse("1 * 2") == True
    lrp.get_ast().pprint()
    parse_root = lrp.ast_stack[0]
    assert lrp.inc_parse("1 + 2") == True
    lrp.stack[1].pprint()
    assert parse_root is lrp.ast_stack[0]
    assert False
Beispiel #3
0
def test_empty_start():
    pytest.skip("deprecated")
    grammar = """
        S ::= "a"
            |
    """
    lrp = IncParser(grammar, LR1)
    assert lrp.inc_parse("") == True
    lrp.get_ast().pprint()
    assert lrp.inc_parse("a") == True
    lrp.stack[1].pprint()
    assert False
Beispiel #4
0
 def get_parser_lexer_for_language(self, grammar, whitespaces):
     from grammar_parser.bootstrap import BootstrapParser
     from jsonmanager import JsonManager
     if isinstance(grammar, Language):
         incparser = IncParser(grammar.grammar, 1, whitespaces)
         incparser.init_ast()
         inclexer = IncrementalLexer(grammar.priorities)
         return incparser, inclexer
     elif isinstance(grammar, EcoFile):
         incparser, inclexer = grammar.load()
         return incparser, inclexer
     else:
         print("Grammar Error: could not determine grammar type")
         return
Beispiel #5
0
 def set_language(self, lang, whitespace):
     if isinstance(lang, Language):
         lrp = IncParser(str(lang.grammar), 1, whitespace)
         lrp.init_ast()
         lexer = IncrementalLexer(str(lang.priorities))
         self.editor.set_mainlanguage(lrp, lexer, lang.name)
     elif isinstance(lang, EcoGrammar):
         bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespace)
         bootstrap.parse(lang.grammar)
         self.editor.set_mainlanguage(bootstrap.incparser,
                                      bootstrap.inclexer, lang.name)
     elif isinstance(lang, EcoFile):
         incparser, inclexer = lang.load()
         self.editor.set_mainlanguage(incparser, inclexer, lang.name)
Beispiel #6
0
 def set_language(self, lang, whitespace):
     if isinstance(lang, Language):
         grammar = str(lang.grammar)
         new_priorities = str(lang.priorities)
         lrp = IncParser(str(lang.grammar), 1, whitespace)
         lrp.init_ast()
         lexer = IncrementalLexer(str(lang.priorities))
         self.editor.set_mainlanguage(lrp, lexer, lang.name)
     elif isinstance(lang, EcoGrammar):
         bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespace)
         bootstrap.parse(lang.grammar)
         self.editor.set_mainlanguage(bootstrap.incparser, bootstrap.inclexer, lang.name)
     elif isinstance(lang, EcoFile):
         incparser, inclexer = lang.load()
         self.editor.set_mainlanguage(incparser, inclexer, lang.name)
 def parse(self, ecogrammar):
     # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco))
     from grammars.eco_grammar import eco_grammar as grammar
     self.lexer = IncrementalLexer(grammar.priorities)
     self.parser = IncParser(grammar.grammar, 1, True)
     self.parser.init_ast()
     self.ast = self.parser.previous_version.parent
     self.treemanager = TreeManager()
     self.treemanager.add_parser(self.parser, self.lexer, grammar.name)
     self.treemanager.import_file(ecogrammar)
     if self.parser.last_status == False:
         raise Exception("Invalid input grammar due to syntax errors")
     self.read_options()
     self.parse_both()
     self.create_parser()
     self.create_lexer()
Beispiel #8
0
def test_multiple_changes_at_once():
    pytest.skip("deprecated")
    grammar = """
        S ::= C S
            | C
        C ::= "a" | "b"
    """
    lrp = IncParser(grammar, LR1)
    lrp.check("a a a")
    lrp.previous_version = lrp.get_ast()
    ast = lrp.previous_version
    #root
    #    bos
    #    S
    #        C a
    #        S
    #            C a
    #            ...
    C = ast.parent.children[1].children[1].children[0]
    assert C.symbol == Nonterminal("C")
    assert C.children[0].symbol == Terminal("a")
    # put insertion into this Node
    changed_node = C.children[0]
    changed_node.symbol.name = "b b a"
    apply_change(lrp, changed_node)
    lrp.previous_version.pprint()
    lrp.inc_parse()
    lrp.stack[1].pprint()
    assert False
Beispiel #9
0
def test_incparser_ast():
    pytest.skip("lrp.check is deprecated")
    lrp = IncParser(grammar)

    lrp.check("1 + 2 * 3")
    ast1 = lrp.get_ast()

    lrp.check("1 + 2 * 1")
    ast2 = lrp.get_ast()

    # reparsing should reuse parent node
    assert ast1.parent is ast2.parent
Beispiel #10
0
    def load(self):
        from grammar_parser.bootstrap import BootstrapParser
        from jsonmanager import JsonManager

        if _cache.has_key(self.name + "::parser"):

            root, language, whitespaces = _cache[self.name + "::json"]

            # parse rules as they are needed by the incremental parser to
            # detect comments

            manager = JsonManager(unescape=True)
            root, language, whitespaces = manager.load(self.filename)[0]

            pickle_id = hash(self)
            bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces)
            bootstrap.ast = root
            bootstrap.parse_rules(root.children[1].children[1].children[0])

            pickle_id, whitespace = _cache[self.name + "::parser"]
            from incparser.incparser import IncParser
            incparser = IncParser()
            incparser.from_dict(bootstrap.rules, None, None, whitespace,
                                pickle_id, None)
            incparser.init_ast()

            inclexer = _cache[self.name + "::lexer"]
            incparser.lexer = inclexer  # give parser a reference to its lexer (needed for multiline comments)

            return (incparser, inclexer)
        else:
            manager = JsonManager(unescape=True)
            root, language, whitespaces = manager.load(self.filename)[0]

            pickle_id = hash(self)
            bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces)
            bootstrap.ast = root
            bootstrap.extra_alternatives = self.alts
            bootstrap.change_startrule = self.extract
            bootstrap.read_options()

            bootstrap.parse_both()
            bootstrap.create_parser(pickle_id)
            bootstrap.create_lexer()
            whitespace = bootstrap.implicit_ws()

            _cache[self.name + "::lexer"] = bootstrap.inclexer
            _cache[self.name + "::json"] = (root, language, whitespaces)
            _cache[self.name + "::parser"] = (pickle_id, whitespace)

            bootstrap.incparser.lexer = bootstrap.inclexer
            return (bootstrap.incparser, bootstrap.inclexer)
Beispiel #11
0
    def load(self):
        from grammar_parser.bootstrap import BootstrapParser
        from jsonmanager import JsonManager

        if _cache.has_key(self.name + "::parser"):

            root, language, whitespaces = _cache[self.name + "::json"]

            pickle_id, whitespace = _cache[self.name + "::parser"]
            from incparser.incparser import IncParser
            incparser = IncParser()
            incparser.from_dict(None, None, None, whitespace, pickle_id, None)
            incparser.init_ast()

            inclexer = _cache[self.name + "::lexer"]

            return (incparser, inclexer)
        else:
            manager = JsonManager(unescape=True)
            root, language, whitespaces = manager.load(self.filename)[0]

            pickle_id = hash(self)
            bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces)
            bootstrap.ast = root
            bootstrap.extra_alternatives = self.alts
            bootstrap.change_startrule = self.extract
            bootstrap.read_options()

            bootstrap.create_parser(pickle_id)
            whitespace = bootstrap.implicit_ws()

            bootstrap.create_lexer()

            _cache[self.name + "::lexer"] = bootstrap.inclexer
            _cache[self.name + "::json"] = (root, language, whitespaces)
            _cache[self.name + "::parser"] = (pickle_id, whitespace)
            return (bootstrap.incparser, bootstrap.inclexer)
Beispiel #12
0
 def parse(self, ecogrammar):
     # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco))
     from grammars.eco_grammar import eco_grammar as grammar
     self.lexer = IncrementalLexer(grammar.priorities)
     self.parser = IncParser(grammar.grammar, 1, True)
     self.parser.init_ast()
     self.ast = self.parser.previous_version.parent
     self.treemanager = TreeManager()
     self.treemanager.add_parser(self.parser, self.lexer, grammar.name)
     self.treemanager.import_file(ecogrammar)
     if self.parser.last_status == False:
         raise Exception("Invalid input grammar: at %s %s" % (self.parser.error_node.prev_term, self.parser.error_node))
     self.read_options()
     self.create_parser()
     self.create_lexer()
Beispiel #13
0
    def load(self, buildlexer=True):
        from grammar_parser.bootstrap import BootstrapParser
        from jsonmanager import JsonManager
        from incparser.incparser import IncParser

        if self.name + "::parser" in _cache:

            syntaxtable, whitespaces = _cache[self.name + "::parser"]
            incparser = IncParser()
            incparser.syntaxtable = syntaxtable
            incparser.whitespaces = whitespaces
            incparser.init_ast()
            incparser.lang = self.name

            inclexer = _cache[self.name + "::lexer"]
            incparser.lexer = inclexer  # give parser a reference to its lexer (needed for multiline comments)
            incparser.previous_version.parent.name = self.name

            return (incparser, inclexer)
        else:
            manager = JsonManager(unescape=True)
            root, language, whitespaces = manager.load(self.filename)[0]

            bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces)
            bootstrap.ast = root
            bootstrap.extra_alternatives = self.alts
            bootstrap.change_startrule = self.extract
            bootstrap.read_options()
            whitespace = bootstrap.implicit_ws()

            pickle_id = self.pickleid(whitespace)

            bootstrap.parse_both()
            bootstrap.create_parser(pickle_id)
            bootstrap.create_lexer(buildlexer)

            _cache[self.name + "::lexer"] = bootstrap.inclexer
            _cache[self.name + "::json"] = (root, language, whitespaces)
            _cache[self.name + "::parser"] = (bootstrap.incparser.syntaxtable,
                                              whitespace)

            bootstrap.incparser.lang = self.name
            bootstrap.incparser.previous_version.parent.name = self.name
            bootstrap.incparser.lexer = bootstrap.inclexer
            return (bootstrap.incparser, bootstrap.inclexer)
Beispiel #14
0
def test_multiple_changes_3():
    pytest.skip("deprecated")
    lrp = IncParser(grammar, LR1)
    lrp.check("1 + 2")
    lrp.previous_version = lrp.get_ast()
    ast = lrp.previous_version
    Viewer().show_tree(lrp.previous_version.parent.children[1])
    i2 = ast.parent.children[1].children[1]
    assert i2.symbol == Terminal("+")
    i2.symbol.name = "*"
    apply_change(lrp, i2)
    lrp.inc_parse()
    lrp.stack[1].pprint()
    Viewer().show_tree(lrp.stack[1])
    assert False
Beispiel #15
0
    def load(self):
        from grammar_parser.bootstrap import BootstrapParser
        from jsonmanager import JsonManager

        if _cache.has_key(self.name + "::parser"):

            root, language, whitespaces = _cache[self.name + "::json"]

            # parse rules as they are needed by the incremental parser to
            # detect comments

            manager = JsonManager(unescape=True)
            root, language, whitespaces = manager.load(self.filename)[0]

            pickle_id = hash(self)
            bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces)
            bootstrap.ast = root
            bootstrap.parse_rules(root.children[1].children[1].children[0])

            pickle_id, whitespace = _cache[self.name + "::parser"]
            from incparser.incparser import IncParser
            incparser = IncParser()
            incparser.from_dict(bootstrap.rules, None, None, whitespace, pickle_id, None)
            incparser.init_ast()

            inclexer = _cache[self.name + "::lexer"]
            incparser.lexer = inclexer # give parser a reference to its lexer (needed for multiline comments)

            return (incparser, inclexer)
        else:
            manager = JsonManager(unescape=True)
            root, language, whitespaces = manager.load(self.filename)[0]

            pickle_id = hash(self)
            bootstrap = BootstrapParser(lr_type=1, whitespaces=whitespaces)
            bootstrap.ast = root
            bootstrap.extra_alternatives = self.alts
            bootstrap.change_startrule = self.extract
            bootstrap.read_options()

            bootstrap.create_parser(pickle_id)
            whitespace = bootstrap.implicit_ws()

            bootstrap.create_lexer()

            _cache[self.name + "::lexer"] = bootstrap.inclexer
            _cache[self.name + "::json"] = (root, language, whitespaces)
            _cache[self.name + "::parser"] = (pickle_id, whitespace)

            bootstrap.incparser.lexer = bootstrap.inclexer
            return (bootstrap.incparser, bootstrap.inclexer)
Beispiel #16
0
def test_deletion():
    pytest.skip("deprecated")
    grammar = """
        S ::= C S
            | C
        C ::= "a" | "b"
    """
    lrp = IncParser(grammar, LR1)
    lrp.check("a a a")
    lrp.previous_version = lrp.get_ast()
    ast = lrp.previous_version
    Viewer().show_tree(lrp.previous_version.parent)

    C = ast.parent.children[1].children[1].children[0]
    assert C.symbol == Nonterminal("C")
    assert C.children[0].symbol == Terminal("a")
    # delete terminal node
    C.children.pop(0)
    apply_change(lrp, C)
    lrp.inc_parse()
    Viewer().show_tree(lrp.stack[1])
    assert False
class BootstrapParser(object):
    def __init__(self, lr_type=1, whitespaces=False):
        self.lr_type = lr_type
        self.whitespaces = whitespaces
        # load (old) parser for grammar grammar
        self.rules = {}
        self.lrules = []
        self.start_symbol = None
        self.incparser = None
        self.inclexer = None
        self.terminals = set()
        self.extra_alternatives = {}
        self.change_startrule = None
        self.options = {"nowhitespace": []}
        self.precedences = []
        self.current_rulename = ""
        self.all_terminals = set()
        self.functions = []
        self.prod_ids = {}

    def implicit_ws(self):
        if self.options.has_key("implicit_ws"):
            if self.options["implicit_ws"] == "true":
                return True
        return False

    def implicit_newlines(self):
        if self.options.has_key("implicit_newlines"):
            if self.options["implicit_newlines"] == "false":
                return False
        return True

    def indentation_based(self):
        if self.options.has_key("indentation"):
            if self.options["indentation"] == "true":
                return True
        return False

    def parse(self, ecogrammar):
        # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco))
        from grammars.eco_grammar import eco_grammar as grammar
        self.lexer = IncrementalLexer(grammar.priorities)
        self.parser = IncParser(grammar.grammar, 1, True)
        self.parser.init_ast()
        self.ast = self.parser.previous_version.parent
        self.treemanager = TreeManager()
        self.treemanager.add_parser(self.parser, self.lexer, grammar.name)
        self.treemanager.import_file(ecogrammar)
        if self.parser.last_status == False:
            raise Exception("Invalid input grammar due to syntax errors")
        self.read_options()
        self.parse_both()
        self.create_parser()
        self.create_lexer()

    def parse_both(self):
        # parse rules
        startrule = self.ast.children[1]  # startrule
        grammar = startrule.children[1]
        parser = grammar.children[0]
        assert parser.symbol.name == "parser"
        self.parse_rules(parser)

        # parse lexer
        startrule = self.ast.children[1]  # startrule
        grammar = startrule.children[1]
        for element in grammar.children:
            if element.symbol.name == "lexer":
                break
        lexer = element
        assert lexer.symbol.name == "lexer"
        self.parse_lexer(lexer)
        for name, regex in self.lrules:
            # collect terminals for parser modifications
            self.all_terminals.add(name)

    def read_options(self):
        startrule = self.ast.children[1]  # startrule
        assert startrule.symbol.name == "Startrule"
        grammar = startrule.children[1]
        assert grammar.symbol.name == "grammar"
        for element in grammar.children:
            if element.symbol.name == "options":
                break
        if element.symbol.name != "options":
            # grammar has no options
            print("warning: grammar has no options")
            # backwards compatibility
            if self.whitespaces:
                self.options["implicit_ws"] = "true"
            return
        options = element
        assert options.symbol.name == "options"
        self.parse_options(options)

    def parse_options(self, options):
        if options.children == []:
            return
        if len(options.children) > 0:
            assert options.children[0].symbol.name == "settings"
            self.parse_settings(options.children[0])
        if len(options.children) > 1:
            assert options.children[1].symbol.name == "precedences"
            self.parse_precedences(options.children[1])

    def parse_settings(self, options):
        if options.children == []:
            return
        if len(options.children) == 2:
            more = options.children[0]
            self.parse_settings(more)
            option = options.children[1]
        else:
            option = options.children[0]
        name = option.children[2].symbol.name
        choice = option.children[6]
        assert choice.symbol.name == "choice"
        if choice.children[0].symbol.name == "choice_list":
            self.options[name] = self.parse_choicelist(choice.children[0])
        else:
            self.options[name] = choice.children[0].symbol.name

    def parse_choicelist(self, symbol):
        s = []
        for c in symbol.children:
            if c.symbol.name == ",":
                continue
            if c.symbol.name == "WS":
                continue
            if c.lookup == "nonterminal":
                s.append(c.symbol.name)
                continue
            if c.symbol.name == "choice_list":
                rec_s = self.parse_choicelist(symbol.children[0])
                s.extend(rec_s)
                continue
        return s

    def parse_precedences(self, precedences):
        if precedences.children == []:
            return
        # recursively parse other precedences
        if len(precedences.children) == 2:
            more = precedences.children[0]
            self.parse_precedences(more)
            precedence = precedences.children[1]
        else:
            precedence = precedences.children[0]
        # parse single precedence
        name = precedence.children[0].symbol.name
        terminals = self.parse_precedence_symbols(precedence.children[2])
        self.precedences.append((name, terminals))

    def parse_precedence_symbols(self, symbol):
        s = []
        for c in symbol.children:
            if c.symbol.name == "WS":
                continue
            if c.symbol.name == "terminals":
                rec_s = self.parse_precedence_symbols(symbol.children[0])
                s.extend(rec_s)
            if c.lookup == "terminal":
                s.append(c.symbol.name[1:-1])
        return s

    def create_parser(self, pickle_id=None):
        self.all_terminals.update(self.terminals)

        for fname, terminals, parentrule in self.functions:
            if fname.startswith("*match_until"):
                if Nonterminal(fname) not in self.rules:
                    r = Rule(Nonterminal(fname))
                    for t in self.all_terminals:
                        if t not in terminals:
                            r.add_alternative(
                                [Nonterminal(fname),
                                 Terminal(t)], None, t)
                    r.add_alternative([])
                    self.rules[r.symbol] = r
                # remove whitespace before special rule from parent rule, e.g.
                # multistring ::= "MLS" WS *match_until "MLS" WS
                #                       ^ this WS causes shift/reduce conflicts
                prule = self.rules[Nonterminal(parentrule)]
                for a in prule.alternatives:
                    for i in range(len(a)):
                        sym = a[i]
                        if sym.name == "WS":
                            if len(a) > i + 1 and a[i + 1].name.startswith(
                                    "*match_until"):
                                a.pop(i)
                                break

        if self.implicit_ws():
            ws_rule = Rule()
            ws_rule.symbol = Nonterminal("WS")
            ws_rule.add_alternative([Nonterminal("WS"), Terminal("<ws>")])
            # get comment rule
            if self.options.has_key('comment_rule'):
                cmt_rules = self.options['comment_rule']
                for cmt_rule in cmt_rules:
                    if Nonterminal(cmt_rule) in self.rules:
                        ws_rule.add_alternative(
                            [Nonterminal("WS"),
                             Nonterminal("comment")])
            if self.implicit_newlines():
                ws_rule.add_alternative(
                    [Nonterminal("WS"),
                     Terminal("<return>")])
                ws_rule.add_alternative([
                    Nonterminal("WS"),
                    Terminal("<backslash>"),
                    Terminal("<return>")
                ])
            ws_rule.add_alternative([])  # or empty
            self.rules[ws_rule.symbol] = ws_rule
            for a in ws_rule.alternatives:
                self.prod_ids[Production(ws_rule.symbol,
                                         a)] = len(self.prod_ids)

            # allow whitespace/comments at beginning of file
            start_rule = Rule()
            start_rule.symbol = Nonterminal("Startrule")
            start_rule.add_alternative([Nonterminal("WS"), self.start_symbol])
            self.rules[start_rule.symbol] = start_rule
            self.prod_ids[Production(start_rule.symbol,
                                     start_rule.alternatives[0])] = len(
                                         self.prod_ids)
            self.start_symbol = start_rule.symbol

        incparser = IncParser()
        incparser.from_dict(self.rules, self.start_symbol, self.lr_type,
                            self.implicit_ws(), pickle_id, self.precedences,
                            self.prod_ids)
        incparser.init_ast()
        self.incparser = incparser

    def parse_rules(self, node):
        if node.children[0].symbol.name == "parser":
            self.parse_rules(node.children[0])
            self.parse_rule(node.children[3])
        elif node.children[0].symbol.name == "rule":
            self.parse_rule(node.children[0])

    def parse_rule(self, node):
        name = node.children[0].symbol.name
        self.current_rulename = name
        alternatives = self.parse_alternatives(node.children[4])
        symbol = Nonterminal(name)
        if self.start_symbol is None:
            self.start_symbol = symbol
        if self.change_startrule and symbol.name == self.change_startrule:
            self.start_symbol = symbol
        r = Rule(symbol)
        for a in alternatives:
            r.add_alternative(a[0], a[1], a[2])
            self.prod_ids[Production(symbol, a[0])] = len(self.prod_ids)
        # add additional alternatives to the grammar (grammar extension feature, e.g. languageboxes)
        if self.extra_alternatives.has_key(symbol.name):
            for n in self.extra_alternatives[symbol.name]:
                a = [MagicTerminal(n), Nonterminal("WS")]
                r.add_alternative(a)
                self.prod_ids[Production(symbol, a)] = len(self.prod_ids)
        self.rules[symbol] = r

    def parse_alternatives(self, node):
        if node.children[0].symbol.name == "alternatives":
            alternatives = self.parse_alternatives(node.children[0])
            alternative = self.parse_alternative(node.children[3])
            alternatives.append(alternative)
            return alternatives
        elif node.children[0].symbol.name == "right":
            return [self.parse_alternative(node.children[0])]

    def parse_alternative(self, node):
        if len(node.children) > 0:
            annotation = None
            prec = None
            for c in node.children:
                if c.symbol.name == "symbols":
                    symbols = self.parse_symbols(c)
                if c.symbol.name == "prec":
                    prec = self.parse_prec(c)
                if c.symbol.name == "annotations":
                    annotation = self.parse_annotation(c)
            return (symbols, annotation, prec)
        else:
            return ([], None, None)

    def parse_prec(self, node):
        if node.children:
            c = node.children[2]
            return c.symbol.name[1:-1]

    def parse_symbols(self, node):
        if node.children[0].symbol.name == "symbols":
            symbols = self.parse_symbols(node.children[0])
            symbol = self.parse_symbol(node.children[1])
            symbols.append(symbol)
            if (
                    isinstance(symbol, Terminal)
                    or isinstance(symbol, MagicTerminal)
            ) and self.implicit_ws(
            ) and self.current_rulename not in self.options["nowhitespace"]:
                symbols.append(Nonterminal("WS"))
            return symbols
        elif node.children[0].symbol.name == "symbol":
            l = []
            symbol = self.parse_symbol(node.children[0])
            l.append(symbol)
            if isinstance(symbol, Terminal) and self.implicit_ws(
            ) and self.current_rulename not in self.options["nowhitespace"]:
                l.append(Nonterminal("WS"))
            return l

    def parse_symbol(self, node):
        node = node.children[0]
        if node.lookup == "nonterminal":
            return Nonterminal(node.symbol.name)
        elif node.lookup == "terminal":
            if node.symbol.name != "\"<eos>\"":
                self.terminals.add(node.symbol.name[1:-1])
            return Terminal(node.symbol.name[1:-1])
        elif node.lookup == "languagebox":
            return MagicTerminal(node.symbol.name)
        elif node.symbol.name == "function":
            return self.parse_function(node)

    def parse_function(self, node):
        fname = node.children[0].symbol.name
        terminals = self.parse_fargs(node.children[4])
        safe_name = "*%s%s" % (fname, hash(frozenset(terminals)))
        self.functions.append((safe_name, terminals, self.current_rulename))
        return Nonterminal(safe_name)

    def parse_fargs(self, symbol):
        s = []
        for c in symbol.children:
            if c.symbol.name == ",":
                continue
            if c.symbol.name == "WS":
                continue
            if c.lookup == "terminal":
                s.append(c.symbol.name[1:-1])
                continue
            if c.symbol.name == "f_args":
                rec_s = self.parse_fargs(symbol.children[0])
                s.extend(rec_s)
        return s

    def parse_annotation(self, node):
        a_options = node.children[2]
        assert a_options.symbol.name == "a_options"
        if a_options.children[0].symbol.name == "astnode":
            return self.parse_astnode(a_options.children[0])
        elif a_options.children[0].symbol.name == "expression":
            return self.parse_expression(a_options.children[0])
        elif a_options.children[0].symbol.name == "forloop":
            return self.parse_foreach(a_options.children[0])

    def parse_astnode(self, node):
        name = node.children[0].symbol.name
        children = self.parse_astnode_children(node.children[4])
        d = {}
        for n, expr in children:
            d[n] = expr
        return AstNode(name, d)

    def parse_astnode_children(self, node):
        assert node.symbol.name == "astnode_children"
        if node.children[0].symbol.name == "astnode_child":
            return [self.parse_astnode_child(node.children[0])]
        elif node.children[0].symbol.name == "astnode_children":
            children = self.parse_astnode_children(node.children[0])
            child = self.parse_astnode_child(node.children[3])
            children.append(child)
            return children

    def parse_astnode_child(self, node):
        assert node.symbol.name == "astnode_child"
        name = node.children[0].symbol.name
        if node.children[4].symbol.name == "expression":
            expr = self.parse_expression(node.children[4])
        elif node.children[4].symbol.name == "reference":
            expr = self.parse_reference(node.children[4])
        return (name, expr)

    def parse_expression(self, node):
        if node.children[0].symbol.name == "node":
            return self.parse_node(node.children[0])
        elif node.children[0].symbol.name == "list":
            return self.parse_list(node.children[0])
        elif node.children[0].symbol.name == "node_ref":
            return self.parse_noderef(node.children[0])
        else:
            expr1 = self.parse_expression(node.children[0])
            if node.children[3].symbol.name == "node":
                expr2 = self.parse_node(node.children[3])
            else:
                expr2 = self.parse_list(node.children[3])
            return AddExpr(expr1, expr2)

    def parse_foreach(self, node):
        item = self.parse_node(node.children[4])
        expr = self.parse_astnode(node.children[7])
        return Foreach(node.symbol.name, item, expr)

    def parse_noderef(self, node):
        lookup = self.parse_node(node.children[0])
        attr = node.children[3]
        lookup.attribute = attr.symbol.name
        return lookup

    def parse_node(self, node):
        return LookupExpr(int(node.children[2].symbol.name))

    def parse_list(self, node):
        return ListExpr(self.parse_listloop(node.children[2]))

    def parse_reference(self, node):
        base = node.children[0].symbol.name
        ref = node.children[4].symbol.name
        return ReferenceExpr(base, ref)

    def parse_listloop(self, node):
        if len(node.children) == 0:
            return []
        if node.children[0].symbol.name == "list_loop":
            l = self.parse_listloop(node.children[0])
            element = self.parse_unknown(node.children[3])
            l.append(element)
            return l
        else:
            return [self.parse_unknown(node.children[0])]

    def parse_unknown(self, node):
        if node.symbol.name == "node":
            return self.parse_node(node)
        elif node.symbol.name == "astnode":
            return self.parse_astnode(node)

    def create_lexer(self, buildlexer=True):
        names = []
        regexs = []
        for name, regex in self.lrules:
            names.append(name)
            self.all_terminals.add(name)
            regexs.append(regex)
        # add so far undefined terminals
        undefined_terminals = self.terminals.difference(set(names))
        import re
        for t in undefined_terminals:
            names.insert(0, t)
            regexs.insert(0, re.escape(t))
        if not buildlexer:
            self.inclexer = (names, regexs)
            return
        self.inclexer = IncrementalLexerCF()
        self.inclexer.from_name_and_regex(names, regexs)
        if self.indentation_based():
            self.inclexer.indentation_based = True

    def parse_lexer(self, lexer):
        if lexer.children[0].symbol.name == "lrule":
            self.parse_lrule(lexer.children[0])
        elif lexer.children[0].symbol.name == "lexer":
            self.parse_lexer(lexer.children[0])
            self.parse_lrule(lexer.children[1])

    def parse_lrule(self, lrule):
        assert lrule.children[0].symbol.name == "tokenname"
        name = lrule.children[0].children[0].symbol.name
        regex = lrule.children[3].symbol.name[1:-1]
        self.lrules.append((name, regex))
    def create_parser(self, pickle_id=None):
        self.all_terminals.update(self.terminals)

        for fname, terminals, parentrule in self.functions:
            if fname.startswith("*match_until"):
                if Nonterminal(fname) not in self.rules:
                    r = Rule(Nonterminal(fname))
                    for t in self.all_terminals:
                        if t not in terminals:
                            r.add_alternative(
                                [Nonterminal(fname),
                                 Terminal(t)], None, t)
                    r.add_alternative([])
                    self.rules[r.symbol] = r
                # remove whitespace before special rule from parent rule, e.g.
                # multistring ::= "MLS" WS *match_until "MLS" WS
                #                       ^ this WS causes shift/reduce conflicts
                prule = self.rules[Nonterminal(parentrule)]
                for a in prule.alternatives:
                    for i in range(len(a)):
                        sym = a[i]
                        if sym.name == "WS":
                            if len(a) > i + 1 and a[i + 1].name.startswith(
                                    "*match_until"):
                                a.pop(i)
                                break

        if self.implicit_ws():
            ws_rule = Rule()
            ws_rule.symbol = Nonterminal("WS")
            ws_rule.add_alternative([Nonterminal("WS"), Terminal("<ws>")])
            # get comment rule
            if self.options.has_key('comment_rule'):
                cmt_rules = self.options['comment_rule']
                for cmt_rule in cmt_rules:
                    if Nonterminal(cmt_rule) in self.rules:
                        ws_rule.add_alternative(
                            [Nonterminal("WS"),
                             Nonterminal("comment")])
            if self.implicit_newlines():
                ws_rule.add_alternative(
                    [Nonterminal("WS"),
                     Terminal("<return>")])
                ws_rule.add_alternative([
                    Nonterminal("WS"),
                    Terminal("<backslash>"),
                    Terminal("<return>")
                ])
            ws_rule.add_alternative([])  # or empty
            self.rules[ws_rule.symbol] = ws_rule
            for a in ws_rule.alternatives:
                self.prod_ids[Production(ws_rule.symbol,
                                         a)] = len(self.prod_ids)

            # allow whitespace/comments at beginning of file
            start_rule = Rule()
            start_rule.symbol = Nonterminal("Startrule")
            start_rule.add_alternative([Nonterminal("WS"), self.start_symbol])
            self.rules[start_rule.symbol] = start_rule
            self.prod_ids[Production(start_rule.symbol,
                                     start_rule.alternatives[0])] = len(
                                         self.prod_ids)
            self.start_symbol = start_rule.symbol

        incparser = IncParser()
        incparser.from_dict(self.rules, self.start_symbol, self.lr_type,
                            self.implicit_ws(), pickle_id, self.precedences,
                            self.prod_ids)
        incparser.init_ast()
        self.incparser = incparser
Beispiel #19
0
# IN THE SOFTWARE.

"""Program for quick viewing the AST of a given program using a given annotated grammar"""

from viewer import Viewer
from grammars.grammars import python275_annotated
from treemanager import TreeManager
from incparser.incparser import IncParser
from inclexer.inclexer import IncrementalLexer
from incparser.astree import BOS, EOS

grammar = python275_annotated
whitespace = True

lexer = IncrementalLexer(grammar.priorities)
parser = IncParser(grammar.grammar, 1, whitespace)
parser.init_ast()
ast = parser.previous_version
treemanager = TreeManager()
treemanager.add_parser(parser, lexer, grammar.name)
treemanager.set_font_test(7, 17) # hard coded. PyQt segfaults in test suite

inputstring = """import abc.xyz as efg
from x import z

class Test:
    def x():
        if x == 1:
            z = 3 + 4 * 5
        elif x == 2:
            for x in range(2,10):
Beispiel #20
0
class BootstrapParser(object):

    def __init__(self, lr_type=1, whitespaces=False):
        self.lr_type = lr_type
        self.whitespaces = whitespaces
        # load (old) parser for grammar grammar
        self.rules = {}
        self.lrules = []
        self.start_symbol = None
        self.incparser = None
        self.inclexer = None
        self.terminals = set()
        self.extra_alternatives = {}
        self.change_startrule = None
        self.options = {}
        self.precedences = []

    def implicit_ws(self):
        if self.options.has_key("implicit_ws"):
            if self.options["implicit_ws"] == "true":
                return True
        return False

    def indentation_based(self):
        if self.options.has_key("indentation"):
            if self.options["indentation"] == "true":
                return True
        return False

    def parse(self, ecogrammar):
        # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco))
        from grammars.eco_grammar import eco_grammar as grammar
        self.lexer = IncrementalLexer(grammar.priorities)
        self.parser = IncParser(grammar.grammar, 1, True)
        self.parser.init_ast()
        self.ast = self.parser.previous_version.parent
        self.treemanager = TreeManager()
        self.treemanager.add_parser(self.parser, self.lexer, grammar.name)
        self.treemanager.import_file(ecogrammar)
        if self.parser.last_status == False:
            raise Exception("Invalid input grammar: at %s %s" % (self.parser.error_node.prev_term, self.parser.error_node))
        self.read_options()
        self.create_parser()
        self.create_lexer()

    def read_options(self):
        startrule = self.ast.children[1] # startrule
        assert startrule.symbol.name == "Startrule"
        grammar = startrule.children[1]
        assert grammar.symbol.name == "grammar"
        for element in grammar.children:
            if element.symbol.name == "options":
                break
        if element.symbol.name != "options":
            # grammar has no options
            print("warning: grammar has no options")
            # backwards compatibility
            if self.whitespaces:
                self.options["implicit_ws"] = "true"
            return
        options = element
        assert options.symbol.name == "options"
        self.parse_options(options)

    def parse_options(self, options):
        if options.children == []:
            return
        if len(options.children) > 0:
            assert options.children[0].symbol.name == "settings"
            self.parse_settings(options.children[0])
        if len(options.children) > 1:
            assert options.children[1].symbol.name == "precedences"
            self.parse_precedences(options.children[1])

    def parse_settings(self, options):
        if options.children == []:
            return
        if len(options.children) == 2:
            more = options.children[0]
            self.parse_settings(more)
            option = options.children[1]
        else:
            option = options.children[0]
        name = option.children[2].symbol.name
        choice = option.children[6]
        assert choice.symbol.name == "choice"
        self.options[name] = choice.children[0].symbol.name

    def parse_precedences(self, precedences):
        if precedences.children == []:
            return
        # recursively parse other precedences
        if len(precedences.children) == 2:
            more = precedences.children[0]
            self.parse_precedences(more)
            precedence = precedences.children[1]
        else:
            precedence = precedences.children[0]
        # parse single precedence
        name = precedence.children[0].symbol.name
        terminals = self.parse_precedence_symbols(precedence.children[2])
        self.precedences.append((name, terminals))

    def parse_precedence_symbols(self, symbol):
        s = []
        for c in symbol.children:
            if c.symbol.name == "WS":
                continue
            if c.symbol.name == "terminals":
                rec_s = self.parse_precedence_symbols(symbol.children[0])
                s.extend(rec_s)
            if c.lookup == "terminal":
                s.append(c.symbol.name[1:-1])
        return s

    def create_parser(self, pickle_id = None):
        startrule = self.ast.children[1] # startrule
        grammar = startrule.children[1]
        parser = grammar.children[0]
        assert parser.symbol.name == "parser"
        self.parse_rules(parser)

        if self.implicit_ws():
            ws_rule = Rule()
            ws_rule.symbol = Nonterminal("WS")
            ws_rule.add_alternative([Terminal("<ws>"), Nonterminal("WS")])
            ws_rule.add_alternative([Terminal("<return>"), Nonterminal("WS")])
            ws_rule.add_alternative([Terminal("<backslash>"), Terminal("<return>"), Nonterminal("WS")])
            ws_rule.add_alternative([]) # or empty
            self.rules[ws_rule.symbol] = ws_rule

            # allow whitespace/comments at beginning of file
            start_rule = Rule()
            start_rule.symbol = Nonterminal("Startrule")
            start_rule.add_alternative([Nonterminal("WS"), self.start_symbol])
            self.rules[start_rule.symbol] = start_rule
            self.start_symbol = start_rule.symbol

        incparser = IncParser()
        incparser.from_dict(self.rules, self.start_symbol, self.lr_type, self.implicit_ws(), pickle_id, self.precedences)
        incparser.init_ast()
        self.incparser = incparser

    def parse_rules(self, node):
        if node.children[0].symbol.name == "parser":
            self.parse_rules(node.children[0])
            self.parse_rule(node.children[3])
        elif node.children[0].symbol.name == "rule":
            self.parse_rule(node.children[0])

    def parse_rule(self, node):
        name = node.children[0].symbol.name
        alternatives = self.parse_alternatives(node.children[4])
        symbol = Nonterminal(name)
        if self.start_symbol is None:
            self.start_symbol = symbol
        if self.change_startrule and symbol.name == self.change_startrule:
            self.start_symbol = symbol
        r = Rule(symbol)
        for a in alternatives:
            r.add_alternative(a[0], a[1], a[2])
        # add additional alternatives to the grammar (grammar extension feature, e.g. languageboxes)
        if self.extra_alternatives.has_key(symbol.name):
            for n in self.extra_alternatives[symbol.name]:
                r.add_alternative([MagicTerminal(n), Nonterminal("WS")], None)
        self.rules[symbol] = r

    def parse_alternatives(self, node):
        if node.children[0].symbol.name == "alternatives":
            alternatives = self.parse_alternatives(node.children[0])
            alternative = self.parse_alternative(node.children[3])
            alternatives.append(alternative)
            return alternatives
        elif node.children[0].symbol.name == "right":
            return [self.parse_alternative(node.children[0])]

    def parse_alternative(self, node):
        if len(node.children) > 0:
            annotation = None
            prec = None
            for c in node.children:
                if c.symbol.name == "symbols":
                    symbols = self.parse_symbols(c)
                if c.symbol.name == "prec":
                    prec = self.parse_prec(c)
                if c.symbol.name == "annotations":
                    annotation = self.parse_annotation(c)
            return (symbols, annotation, prec)
        else:
            return ([], None, None)

    def parse_prec(self, node):
        if node.children:
            c = node.children[2]
            return c.symbol.name[1:-1]

    def parse_symbols(self, node):
        if node.children[0].symbol.name == "symbols":
            symbols = self.parse_symbols(node.children[0])
            symbol = self.parse_symbol(node.children[1])
            symbols.append(symbol)
            if (isinstance(symbol, Terminal) or isinstance(symbol, MagicTerminal)) and self.implicit_ws():
                symbols.append(Nonterminal("WS"))
            return symbols
        elif node.children[0].symbol.name == "symbol":
            l = []
            symbol = self.parse_symbol(node.children[0])
            l.append(symbol)
            if isinstance(symbol, Terminal) and self.implicit_ws():
                l.append(Nonterminal("WS"))
            return l

    def parse_symbol(self, node):
        node = node.children[0]
        if node.lookup == "nonterminal":
            return Nonterminal(node.symbol.name)
        elif node.lookup == "terminal":
            self.terminals.add(node.symbol.name[1:-1])
            return Terminal(node.symbol.name[1:-1])
        elif node.lookup == "languagebox":
            return MagicTerminal(node.symbol.name)

    def parse_annotation(self, node):
        a_options = node.children[2]
        assert a_options.symbol.name == "a_options"
        if a_options.children[0].symbol.name == "astnode":
            return self.parse_astnode(a_options.children[0])
        elif a_options.children[0].symbol.name == "expression":
            return self.parse_expression(a_options.children[0])
        elif a_options.children[0].symbol.name == "forloop":
            return self.parse_foreach(a_options.children[0])

    def parse_astnode(self, node):
        name = node.children[0].symbol.name
        children = self.parse_astnode_children(node.children[4])
        d = {}
        for n, expr in children:
            d[n] = expr
        return AstNode(name, d)

    def parse_astnode_children(self, node):
        assert node.symbol.name == "astnode_children"
        if node.children[0].symbol.name == "astnode_child":
            return [self.parse_astnode_child(node.children[0])]
        elif node.children[0].symbol.name == "astnode_children":
            children = self.parse_astnode_children(node.children[0])
            child = self.parse_astnode_child(node.children[3])
            children.append(child)
            return children

    def parse_astnode_child(self, node):
        assert node.symbol.name == "astnode_child"
        name = node.children[0].symbol.name
        if node.children[4].symbol.name == "expression":
            expr = self.parse_expression(node.children[4])
        elif node.children[4].symbol.name == "reference":
            expr = self.parse_reference(node.children[4])
        return (name, expr)

    def parse_expression(self, node):
        if node.children[0].symbol.name == "node":
            return self.parse_node(node.children[0])
        elif node.children[0].symbol.name == "list":
            return self.parse_list(node.children[0])
        elif node.children[0].symbol.name == "node_ref":
            return self.parse_noderef(node.children[0])
        else:
            expr1 = self.parse_expression(node.children[0])
            if node.children[3].symbol.name == "node":
                expr2 = self.parse_node(node.children[3])
            else:
                expr2 = self.parse_list(node.children[3])
            return AddExpr(expr1, expr2)

    def parse_foreach(self, node):
        item = self.parse_node(node.children[4])
        expr = self.parse_astnode(node.children[7])
        return Foreach(node.symbol.name, item, expr)

    def parse_noderef(self, node):
        lookup = self.parse_node(node.children[0])
        attr = node.children[3]
        lookup.attribute = attr.symbol.name
        return lookup

    def parse_node(self, node):
        return LookupExpr(int(node.children[2].symbol.name))

    def parse_list(self, node):
        return ListExpr(self.parse_listloop(node.children[2]))

    def parse_reference(self, node):
        base = node.children[0].symbol.name
        ref = node.children[4].symbol.name
        return ReferenceExpr(base, ref)

    def parse_listloop(self, node):
        if len(node.children) == 0:
            return []
        if node.children[0].symbol.name == "list_loop":
            l = self.parse_listloop(node.children[0])
            element = self.parse_unknown(node.children[3])
            l.append(element)
            return l
        else:
            return [self.parse_unknown(node.children[0])]

    def parse_unknown(self, node):
        if node.symbol.name == "node":
            return self.parse_node(node)
        elif node.symbol.name == "astnode":
            return self.parse_astnode(node)

    def create_lexer(self):
        startrule = self.ast.children[1] # startrule
        grammar = startrule.children[1]
        for element in grammar.children:
            if element.symbol.name == "lexer":
                break
        lexer = element
        assert lexer.symbol.name == "lexer"
        self.parse_lexer(lexer)
        names = []
        regexs = []
        for name, regex in self.lrules:
            names.append(name)
            regexs.append(regex)
        # add so far undefined terminals
        undefined_terminals = self.terminals.difference(set(names))
        import re
        for t in undefined_terminals:
            names.insert(0, t)
            regexs.insert(0,re.escape(t))
        self.inclexer = IncrementalLexerCF()
        self.inclexer.from_name_and_regex(names, regexs)
        if self.indentation_based():
            self.inclexer.indentation_based = True

    def parse_lexer(self, lexer):
        if lexer.children[0].symbol.name == "lrule":
            self.parse_lrule(lexer.children[0])
        elif lexer.children[0].symbol.name == "lexer":
            self.parse_lexer(lexer.children[0])
            self.parse_lrule(lexer.children[1])

    def parse_lrule(self, lrule):
        assert lrule.children[0].symbol.name == "tokenname"
        name = lrule.children[0].children[0].symbol.name
        regex = lrule.children[3].symbol.name[1:-1]
        self.lrules.append((name, regex))
Beispiel #21
0
 def setup_class(cls):
     cls.lexer = IncrementalLexer(calc1.priorities)
     cls.parser = IncParser(calc1.grammar, 1, True)
     cls.parser.init_ast()
     cls.ast = cls.parser.previous_version