Ejemplo n.º 1
0
 def load(self):
     from inclexer.inclexer import IncrementalLexerCF
     lexer = IncrementalLexerCF()
     names = []
     regexs = []
     for n, r in lexingrules:
         names.append(n)
         regexs.append(r)
     lexer.from_name_and_regex(names, regexs)
     parser = RubyParser()
     parser.init_ast()
     return parser, lexer
Ejemplo n.º 2
0
class BootstrapParser(object):
    def __init__(self, lr_type=1, whitespaces=False):
        self.lr_type = lr_type
        self.whitespaces = whitespaces
        # load (old) parser for grammar grammar
        self.rules = {}
        self.lrules = []
        self.start_symbol = None
        self.incparser = None
        self.inclexer = None
        self.terminals = set()
        self.extra_alternatives = {}
        self.change_startrule = None
        self.options = {"nowhitespace": []}
        self.precedences = []
        self.current_rulename = ""
        self.all_terminals = set()
        self.functions = []
        self.prod_ids = {}

    def implicit_ws(self):
        if self.options.has_key("implicit_ws"):
            if self.options["implicit_ws"] == "true":
                return True
        return False

    def implicit_newlines(self):
        if self.options.has_key("implicit_newlines"):
            if self.options["implicit_newlines"] == "false":
                return False
        return True

    def indentation_based(self):
        if self.options.has_key("indentation"):
            if self.options["indentation"] == "true":
                return True
        return False

    def parse(self, ecogrammar):
        # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco))
        from grammars.eco_grammar import eco_grammar as grammar
        self.lexer = IncrementalLexer(grammar.priorities)
        self.parser = IncParser(grammar.grammar, 1, True)
        self.parser.init_ast()
        self.ast = self.parser.previous_version.parent
        self.treemanager = TreeManager()
        self.treemanager.add_parser(self.parser, self.lexer, grammar.name)
        self.treemanager.import_file(ecogrammar)
        if self.parser.last_status == False:
            raise Exception("Invalid input grammar due to syntax errors")
        self.read_options()
        self.parse_both()
        self.create_parser()
        self.create_lexer()

    def parse_both(self):
        # parse rules
        startrule = self.ast.children[1]  # startrule
        grammar = startrule.children[1]
        parser = grammar.children[0]
        assert parser.symbol.name == "parser"
        self.parse_rules(parser)

        # parse lexer
        startrule = self.ast.children[1]  # startrule
        grammar = startrule.children[1]
        for element in grammar.children:
            if element.symbol.name == "lexer":
                break
        lexer = element
        assert lexer.symbol.name == "lexer"
        self.parse_lexer(lexer)
        for name, regex in self.lrules:
            # collect terminals for parser modifications
            self.all_terminals.add(name)

    def read_options(self):
        startrule = self.ast.children[1]  # startrule
        assert startrule.symbol.name == "Startrule"
        grammar = startrule.children[1]
        assert grammar.symbol.name == "grammar"
        for element in grammar.children:
            if element.symbol.name == "options":
                break
        if element.symbol.name != "options":
            # grammar has no options
            print("warning: grammar has no options")
            # backwards compatibility
            if self.whitespaces:
                self.options["implicit_ws"] = "true"
            return
        options = element
        assert options.symbol.name == "options"
        self.parse_options(options)

    def parse_options(self, options):
        if options.children == []:
            return
        if len(options.children) > 0:
            assert options.children[0].symbol.name == "settings"
            self.parse_settings(options.children[0])
        if len(options.children) > 1:
            assert options.children[1].symbol.name == "precedences"
            self.parse_precedences(options.children[1])

    def parse_settings(self, options):
        if options.children == []:
            return
        if len(options.children) == 2:
            more = options.children[0]
            self.parse_settings(more)
            option = options.children[1]
        else:
            option = options.children[0]
        name = option.children[2].symbol.name
        choice = option.children[6]
        assert choice.symbol.name == "choice"
        if choice.children[0].symbol.name == "choice_list":
            self.options[name] = self.parse_choicelist(choice.children[0])
        else:
            self.options[name] = choice.children[0].symbol.name

    def parse_choicelist(self, symbol):
        s = []
        for c in symbol.children:
            if c.symbol.name == ",":
                continue
            if c.symbol.name == "WS":
                continue
            if c.lookup == "nonterminal":
                s.append(c.symbol.name)
                continue
            if c.symbol.name == "choice_list":
                rec_s = self.parse_choicelist(symbol.children[0])
                s.extend(rec_s)
                continue
        return s

    def parse_precedences(self, precedences):
        if precedences.children == []:
            return
        # recursively parse other precedences
        if len(precedences.children) == 2:
            more = precedences.children[0]
            self.parse_precedences(more)
            precedence = precedences.children[1]
        else:
            precedence = precedences.children[0]
        # parse single precedence
        name = precedence.children[0].symbol.name
        terminals = self.parse_precedence_symbols(precedence.children[2])
        self.precedences.append((name, terminals))

    def parse_precedence_symbols(self, symbol):
        s = []
        for c in symbol.children:
            if c.symbol.name == "WS":
                continue
            if c.symbol.name == "terminals":
                rec_s = self.parse_precedence_symbols(symbol.children[0])
                s.extend(rec_s)
            if c.lookup == "terminal":
                s.append(c.symbol.name[1:-1])
        return s

    def create_parser(self, pickle_id=None):
        self.all_terminals.update(self.terminals)

        for fname, terminals, parentrule in self.functions:
            if fname.startswith("*match_until"):
                if Nonterminal(fname) not in self.rules:
                    r = Rule(Nonterminal(fname))
                    for t in self.all_terminals:
                        if t not in terminals:
                            r.add_alternative(
                                [Nonterminal(fname),
                                 Terminal(t)], None, t)
                    r.add_alternative([])
                    self.rules[r.symbol] = r
                # remove whitespace before special rule from parent rule, e.g.
                # multistring ::= "MLS" WS *match_until "MLS" WS
                #                       ^ this WS causes shift/reduce conflicts
                prule = self.rules[Nonterminal(parentrule)]
                for a in prule.alternatives:
                    for i in range(len(a)):
                        sym = a[i]
                        if sym.name == "WS":
                            if len(a) > i + 1 and a[i + 1].name.startswith(
                                    "*match_until"):
                                a.pop(i)
                                break

        if self.implicit_ws():
            ws_rule = Rule()
            ws_rule.symbol = Nonterminal("WS")
            ws_rule.add_alternative([Nonterminal("WS"), Terminal("<ws>")])
            # get comment rule
            if self.options.has_key('comment_rule'):
                cmt_rules = self.options['comment_rule']
                for cmt_rule in cmt_rules:
                    if Nonterminal(cmt_rule) in self.rules:
                        ws_rule.add_alternative(
                            [Nonterminal("WS"),
                             Nonterminal("comment")])
            if self.implicit_newlines():
                ws_rule.add_alternative(
                    [Nonterminal("WS"),
                     Terminal("<return>")])
                ws_rule.add_alternative([
                    Nonterminal("WS"),
                    Terminal("<backslash>"),
                    Terminal("<return>")
                ])
            ws_rule.add_alternative([])  # or empty
            self.rules[ws_rule.symbol] = ws_rule
            for a in ws_rule.alternatives:
                self.prod_ids[Production(ws_rule.symbol,
                                         a)] = len(self.prod_ids)

            # allow whitespace/comments at beginning of file
            start_rule = Rule()
            start_rule.symbol = Nonterminal("Startrule")
            start_rule.add_alternative([Nonterminal("WS"), self.start_symbol])
            self.rules[start_rule.symbol] = start_rule
            self.prod_ids[Production(start_rule.symbol,
                                     start_rule.alternatives[0])] = len(
                                         self.prod_ids)
            self.start_symbol = start_rule.symbol

        incparser = IncParser()
        incparser.from_dict(self.rules, self.start_symbol, self.lr_type,
                            self.implicit_ws(), pickle_id, self.precedences,
                            self.prod_ids)
        incparser.init_ast()
        self.incparser = incparser

    def parse_rules(self, node):
        if node.children[0].symbol.name == "parser":
            self.parse_rules(node.children[0])
            self.parse_rule(node.children[3])
        elif node.children[0].symbol.name == "rule":
            self.parse_rule(node.children[0])

    def parse_rule(self, node):
        name = node.children[0].symbol.name
        self.current_rulename = name
        alternatives = self.parse_alternatives(node.children[4])
        symbol = Nonterminal(name)
        if self.start_symbol is None:
            self.start_symbol = symbol
        if self.change_startrule and symbol.name == self.change_startrule:
            self.start_symbol = symbol
        r = Rule(symbol)
        for a in alternatives:
            r.add_alternative(a[0], a[1], a[2])
            self.prod_ids[Production(symbol, a[0])] = len(self.prod_ids)
        # add additional alternatives to the grammar (grammar extension feature, e.g. languageboxes)
        if self.extra_alternatives.has_key(symbol.name):
            for n in self.extra_alternatives[symbol.name]:
                a = [MagicTerminal(n), Nonterminal("WS")]
                r.add_alternative(a)
                self.prod_ids[Production(symbol, a)] = len(self.prod_ids)
        self.rules[symbol] = r

    def parse_alternatives(self, node):
        if node.children[0].symbol.name == "alternatives":
            alternatives = self.parse_alternatives(node.children[0])
            alternative = self.parse_alternative(node.children[3])
            alternatives.append(alternative)
            return alternatives
        elif node.children[0].symbol.name == "right":
            return [self.parse_alternative(node.children[0])]

    def parse_alternative(self, node):
        if len(node.children) > 0:
            annotation = None
            prec = None
            for c in node.children:
                if c.symbol.name == "symbols":
                    symbols = self.parse_symbols(c)
                if c.symbol.name == "prec":
                    prec = self.parse_prec(c)
                if c.symbol.name == "annotations":
                    annotation = self.parse_annotation(c)
            return (symbols, annotation, prec)
        else:
            return ([], None, None)

    def parse_prec(self, node):
        if node.children:
            c = node.children[2]
            return c.symbol.name[1:-1]

    def parse_symbols(self, node):
        if node.children[0].symbol.name == "symbols":
            symbols = self.parse_symbols(node.children[0])
            symbol = self.parse_symbol(node.children[1])
            symbols.append(symbol)
            if (
                    isinstance(symbol, Terminal)
                    or isinstance(symbol, MagicTerminal)
            ) and self.implicit_ws(
            ) and self.current_rulename not in self.options["nowhitespace"]:
                symbols.append(Nonterminal("WS"))
            return symbols
        elif node.children[0].symbol.name == "symbol":
            l = []
            symbol = self.parse_symbol(node.children[0])
            l.append(symbol)
            if isinstance(symbol, Terminal) and self.implicit_ws(
            ) and self.current_rulename not in self.options["nowhitespace"]:
                l.append(Nonterminal("WS"))
            return l

    def parse_symbol(self, node):
        node = node.children[0]
        if node.lookup == "nonterminal":
            return Nonterminal(node.symbol.name)
        elif node.lookup == "terminal":
            if node.symbol.name != "\"<eos>\"":
                self.terminals.add(node.symbol.name[1:-1])
            return Terminal(node.symbol.name[1:-1])
        elif node.lookup == "languagebox":
            return MagicTerminal(node.symbol.name)
        elif node.symbol.name == "function":
            return self.parse_function(node)

    def parse_function(self, node):
        fname = node.children[0].symbol.name
        terminals = self.parse_fargs(node.children[4])
        safe_name = "*%s%s" % (fname, hash(frozenset(terminals)))
        self.functions.append((safe_name, terminals, self.current_rulename))
        return Nonterminal(safe_name)

    def parse_fargs(self, symbol):
        s = []
        for c in symbol.children:
            if c.symbol.name == ",":
                continue
            if c.symbol.name == "WS":
                continue
            if c.lookup == "terminal":
                s.append(c.symbol.name[1:-1])
                continue
            if c.symbol.name == "f_args":
                rec_s = self.parse_fargs(symbol.children[0])
                s.extend(rec_s)
        return s

    def parse_annotation(self, node):
        a_options = node.children[2]
        assert a_options.symbol.name == "a_options"
        if a_options.children[0].symbol.name == "astnode":
            return self.parse_astnode(a_options.children[0])
        elif a_options.children[0].symbol.name == "expression":
            return self.parse_expression(a_options.children[0])
        elif a_options.children[0].symbol.name == "forloop":
            return self.parse_foreach(a_options.children[0])

    def parse_astnode(self, node):
        name = node.children[0].symbol.name
        children = self.parse_astnode_children(node.children[4])
        d = {}
        for n, expr in children:
            d[n] = expr
        return AstNode(name, d)

    def parse_astnode_children(self, node):
        assert node.symbol.name == "astnode_children"
        if node.children[0].symbol.name == "astnode_child":
            return [self.parse_astnode_child(node.children[0])]
        elif node.children[0].symbol.name == "astnode_children":
            children = self.parse_astnode_children(node.children[0])
            child = self.parse_astnode_child(node.children[3])
            children.append(child)
            return children

    def parse_astnode_child(self, node):
        assert node.symbol.name == "astnode_child"
        name = node.children[0].symbol.name
        if node.children[4].symbol.name == "expression":
            expr = self.parse_expression(node.children[4])
        elif node.children[4].symbol.name == "reference":
            expr = self.parse_reference(node.children[4])
        return (name, expr)

    def parse_expression(self, node):
        if node.children[0].symbol.name == "node":
            return self.parse_node(node.children[0])
        elif node.children[0].symbol.name == "list":
            return self.parse_list(node.children[0])
        elif node.children[0].symbol.name == "node_ref":
            return self.parse_noderef(node.children[0])
        else:
            expr1 = self.parse_expression(node.children[0])
            if node.children[3].symbol.name == "node":
                expr2 = self.parse_node(node.children[3])
            else:
                expr2 = self.parse_list(node.children[3])
            return AddExpr(expr1, expr2)

    def parse_foreach(self, node):
        item = self.parse_node(node.children[4])
        expr = self.parse_astnode(node.children[7])
        return Foreach(node.symbol.name, item, expr)

    def parse_noderef(self, node):
        lookup = self.parse_node(node.children[0])
        attr = node.children[3]
        lookup.attribute = attr.symbol.name
        return lookup

    def parse_node(self, node):
        return LookupExpr(int(node.children[2].symbol.name))

    def parse_list(self, node):
        return ListExpr(self.parse_listloop(node.children[2]))

    def parse_reference(self, node):
        base = node.children[0].symbol.name
        ref = node.children[4].symbol.name
        return ReferenceExpr(base, ref)

    def parse_listloop(self, node):
        if len(node.children) == 0:
            return []
        if node.children[0].symbol.name == "list_loop":
            l = self.parse_listloop(node.children[0])
            element = self.parse_unknown(node.children[3])
            l.append(element)
            return l
        else:
            return [self.parse_unknown(node.children[0])]

    def parse_unknown(self, node):
        if node.symbol.name == "node":
            return self.parse_node(node)
        elif node.symbol.name == "astnode":
            return self.parse_astnode(node)

    def create_lexer(self, buildlexer=True):
        names = []
        regexs = []
        for name, regex in self.lrules:
            names.append(name)
            self.all_terminals.add(name)
            regexs.append(regex)
        # add so far undefined terminals
        undefined_terminals = self.terminals.difference(set(names))
        import re
        for t in undefined_terminals:
            names.insert(0, t)
            regexs.insert(0, re.escape(t))
        if not buildlexer:
            self.inclexer = (names, regexs)
            return
        self.inclexer = IncrementalLexerCF()
        self.inclexer.from_name_and_regex(names, regexs)
        if self.indentation_based():
            self.inclexer.indentation_based = True

    def parse_lexer(self, lexer):
        if lexer.children[0].symbol.name == "lrule":
            self.parse_lrule(lexer.children[0])
        elif lexer.children[0].symbol.name == "lexer":
            self.parse_lexer(lexer.children[0])
            self.parse_lrule(lexer.children[1])

    def parse_lrule(self, lrule):
        assert lrule.children[0].symbol.name == "tokenname"
        name = lrule.children[0].children[0].symbol.name
        regex = lrule.children[3].symbol.name[1:-1]
        self.lrules.append((name, regex))
Ejemplo n.º 3
0
class BootstrapParser(object):

    def __init__(self, lr_type=1, whitespaces=False):
        self.lr_type = lr_type
        self.whitespaces = whitespaces
        # load (old) parser for grammar grammar
        self.rules = {}
        self.lrules = []
        self.start_symbol = None
        self.incparser = None
        self.inclexer = None
        self.terminals = set()
        self.extra_alternatives = {}
        self.change_startrule = None
        self.options = {}
        self.precedences = []

    def implicit_ws(self):
        if self.options.has_key("implicit_ws"):
            if self.options["implicit_ws"] == "true":
                return True
        return False

    def indentation_based(self):
        if self.options.has_key("indentation"):
            if self.options["indentation"] == "true":
                return True
        return False

    def parse(self, ecogrammar):
        # this is only called for grammars based on Eco Grammar (not Eco Grammar (Eco))
        from grammars.eco_grammar import eco_grammar as grammar
        self.lexer = IncrementalLexer(grammar.priorities)
        self.parser = IncParser(grammar.grammar, 1, True)
        self.parser.init_ast()
        self.ast = self.parser.previous_version.parent
        self.treemanager = TreeManager()
        self.treemanager.add_parser(self.parser, self.lexer, grammar.name)
        self.treemanager.import_file(ecogrammar)
        if self.parser.last_status == False:
            raise Exception("Invalid input grammar: at %s %s" % (self.parser.error_node.prev_term, self.parser.error_node))
        self.read_options()
        self.create_parser()
        self.create_lexer()

    def read_options(self):
        startrule = self.ast.children[1] # startrule
        assert startrule.symbol.name == "Startrule"
        grammar = startrule.children[1]
        assert grammar.symbol.name == "grammar"
        for element in grammar.children:
            if element.symbol.name == "options":
                break
        if element.symbol.name != "options":
            # grammar has no options
            print("warning: grammar has no options")
            # backwards compatibility
            if self.whitespaces:
                self.options["implicit_ws"] = "true"
            return
        options = element
        assert options.symbol.name == "options"
        self.parse_options(options)

    def parse_options(self, options):
        if options.children == []:
            return
        if len(options.children) > 0:
            assert options.children[0].symbol.name == "settings"
            self.parse_settings(options.children[0])
        if len(options.children) > 1:
            assert options.children[1].symbol.name == "precedences"
            self.parse_precedences(options.children[1])

    def parse_settings(self, options):
        if options.children == []:
            return
        if len(options.children) == 2:
            more = options.children[0]
            self.parse_settings(more)
            option = options.children[1]
        else:
            option = options.children[0]
        name = option.children[2].symbol.name
        choice = option.children[6]
        assert choice.symbol.name == "choice"
        self.options[name] = choice.children[0].symbol.name

    def parse_precedences(self, precedences):
        if precedences.children == []:
            return
        # recursively parse other precedences
        if len(precedences.children) == 2:
            more = precedences.children[0]
            self.parse_precedences(more)
            precedence = precedences.children[1]
        else:
            precedence = precedences.children[0]
        # parse single precedence
        name = precedence.children[0].symbol.name
        terminals = self.parse_precedence_symbols(precedence.children[2])
        self.precedences.append((name, terminals))

    def parse_precedence_symbols(self, symbol):
        s = []
        for c in symbol.children:
            if c.symbol.name == "WS":
                continue
            if c.symbol.name == "terminals":
                rec_s = self.parse_precedence_symbols(symbol.children[0])
                s.extend(rec_s)
            if c.lookup == "terminal":
                s.append(c.symbol.name[1:-1])
        return s

    def create_parser(self, pickle_id = None):
        startrule = self.ast.children[1] # startrule
        grammar = startrule.children[1]
        parser = grammar.children[0]
        assert parser.symbol.name == "parser"
        self.parse_rules(parser)

        if self.implicit_ws():
            ws_rule = Rule()
            ws_rule.symbol = Nonterminal("WS")
            ws_rule.add_alternative([Terminal("<ws>"), Nonterminal("WS")])
            ws_rule.add_alternative([Terminal("<return>"), Nonterminal("WS")])
            ws_rule.add_alternative([Terminal("<backslash>"), Terminal("<return>"), Nonterminal("WS")])
            ws_rule.add_alternative([]) # or empty
            self.rules[ws_rule.symbol] = ws_rule

            # allow whitespace/comments at beginning of file
            start_rule = Rule()
            start_rule.symbol = Nonterminal("Startrule")
            start_rule.add_alternative([Nonterminal("WS"), self.start_symbol])
            self.rules[start_rule.symbol] = start_rule
            self.start_symbol = start_rule.symbol

        incparser = IncParser()
        incparser.from_dict(self.rules, self.start_symbol, self.lr_type, self.implicit_ws(), pickle_id, self.precedences)
        incparser.init_ast()
        self.incparser = incparser

    def parse_rules(self, node):
        if node.children[0].symbol.name == "parser":
            self.parse_rules(node.children[0])
            self.parse_rule(node.children[3])
        elif node.children[0].symbol.name == "rule":
            self.parse_rule(node.children[0])

    def parse_rule(self, node):
        name = node.children[0].symbol.name
        alternatives = self.parse_alternatives(node.children[4])
        symbol = Nonterminal(name)
        if self.start_symbol is None:
            self.start_symbol = symbol
        if self.change_startrule and symbol.name == self.change_startrule:
            self.start_symbol = symbol
        r = Rule(symbol)
        for a in alternatives:
            r.add_alternative(a[0], a[1], a[2])
        # add additional alternatives to the grammar (grammar extension feature, e.g. languageboxes)
        if self.extra_alternatives.has_key(symbol.name):
            for n in self.extra_alternatives[symbol.name]:
                r.add_alternative([MagicTerminal(n), Nonterminal("WS")], None)
        self.rules[symbol] = r

    def parse_alternatives(self, node):
        if node.children[0].symbol.name == "alternatives":
            alternatives = self.parse_alternatives(node.children[0])
            alternative = self.parse_alternative(node.children[3])
            alternatives.append(alternative)
            return alternatives
        elif node.children[0].symbol.name == "right":
            return [self.parse_alternative(node.children[0])]

    def parse_alternative(self, node):
        if len(node.children) > 0:
            annotation = None
            prec = None
            for c in node.children:
                if c.symbol.name == "symbols":
                    symbols = self.parse_symbols(c)
                if c.symbol.name == "prec":
                    prec = self.parse_prec(c)
                if c.symbol.name == "annotations":
                    annotation = self.parse_annotation(c)
            return (symbols, annotation, prec)
        else:
            return ([], None, None)

    def parse_prec(self, node):
        if node.children:
            c = node.children[2]
            return c.symbol.name[1:-1]

    def parse_symbols(self, node):
        if node.children[0].symbol.name == "symbols":
            symbols = self.parse_symbols(node.children[0])
            symbol = self.parse_symbol(node.children[1])
            symbols.append(symbol)
            if (isinstance(symbol, Terminal) or isinstance(symbol, MagicTerminal)) and self.implicit_ws():
                symbols.append(Nonterminal("WS"))
            return symbols
        elif node.children[0].symbol.name == "symbol":
            l = []
            symbol = self.parse_symbol(node.children[0])
            l.append(symbol)
            if isinstance(symbol, Terminal) and self.implicit_ws():
                l.append(Nonterminal("WS"))
            return l

    def parse_symbol(self, node):
        node = node.children[0]
        if node.lookup == "nonterminal":
            return Nonterminal(node.symbol.name)
        elif node.lookup == "terminal":
            self.terminals.add(node.symbol.name[1:-1])
            return Terminal(node.symbol.name[1:-1])
        elif node.lookup == "languagebox":
            return MagicTerminal(node.symbol.name)

    def parse_annotation(self, node):
        a_options = node.children[2]
        assert a_options.symbol.name == "a_options"
        if a_options.children[0].symbol.name == "astnode":
            return self.parse_astnode(a_options.children[0])
        elif a_options.children[0].symbol.name == "expression":
            return self.parse_expression(a_options.children[0])
        elif a_options.children[0].symbol.name == "forloop":
            return self.parse_foreach(a_options.children[0])

    def parse_astnode(self, node):
        name = node.children[0].symbol.name
        children = self.parse_astnode_children(node.children[4])
        d = {}
        for n, expr in children:
            d[n] = expr
        return AstNode(name, d)

    def parse_astnode_children(self, node):
        assert node.symbol.name == "astnode_children"
        if node.children[0].symbol.name == "astnode_child":
            return [self.parse_astnode_child(node.children[0])]
        elif node.children[0].symbol.name == "astnode_children":
            children = self.parse_astnode_children(node.children[0])
            child = self.parse_astnode_child(node.children[3])
            children.append(child)
            return children

    def parse_astnode_child(self, node):
        assert node.symbol.name == "astnode_child"
        name = node.children[0].symbol.name
        if node.children[4].symbol.name == "expression":
            expr = self.parse_expression(node.children[4])
        elif node.children[4].symbol.name == "reference":
            expr = self.parse_reference(node.children[4])
        return (name, expr)

    def parse_expression(self, node):
        if node.children[0].symbol.name == "node":
            return self.parse_node(node.children[0])
        elif node.children[0].symbol.name == "list":
            return self.parse_list(node.children[0])
        elif node.children[0].symbol.name == "node_ref":
            return self.parse_noderef(node.children[0])
        else:
            expr1 = self.parse_expression(node.children[0])
            if node.children[3].symbol.name == "node":
                expr2 = self.parse_node(node.children[3])
            else:
                expr2 = self.parse_list(node.children[3])
            return AddExpr(expr1, expr2)

    def parse_foreach(self, node):
        item = self.parse_node(node.children[4])
        expr = self.parse_astnode(node.children[7])
        return Foreach(node.symbol.name, item, expr)

    def parse_noderef(self, node):
        lookup = self.parse_node(node.children[0])
        attr = node.children[3]
        lookup.attribute = attr.symbol.name
        return lookup

    def parse_node(self, node):
        return LookupExpr(int(node.children[2].symbol.name))

    def parse_list(self, node):
        return ListExpr(self.parse_listloop(node.children[2]))

    def parse_reference(self, node):
        base = node.children[0].symbol.name
        ref = node.children[4].symbol.name
        return ReferenceExpr(base, ref)

    def parse_listloop(self, node):
        if len(node.children) == 0:
            return []
        if node.children[0].symbol.name == "list_loop":
            l = self.parse_listloop(node.children[0])
            element = self.parse_unknown(node.children[3])
            l.append(element)
            return l
        else:
            return [self.parse_unknown(node.children[0])]

    def parse_unknown(self, node):
        if node.symbol.name == "node":
            return self.parse_node(node)
        elif node.symbol.name == "astnode":
            return self.parse_astnode(node)

    def create_lexer(self):
        startrule = self.ast.children[1] # startrule
        grammar = startrule.children[1]
        for element in grammar.children:
            if element.symbol.name == "lexer":
                break
        lexer = element
        assert lexer.symbol.name == "lexer"
        self.parse_lexer(lexer)
        names = []
        regexs = []
        for name, regex in self.lrules:
            names.append(name)
            regexs.append(regex)
        # add so far undefined terminals
        undefined_terminals = self.terminals.difference(set(names))
        import re
        for t in undefined_terminals:
            names.insert(0, t)
            regexs.insert(0,re.escape(t))
        self.inclexer = IncrementalLexerCF()
        self.inclexer.from_name_and_regex(names, regexs)
        if self.indentation_based():
            self.inclexer.indentation_based = True

    def parse_lexer(self, lexer):
        if lexer.children[0].symbol.name == "lrule":
            self.parse_lrule(lexer.children[0])
        elif lexer.children[0].symbol.name == "lexer":
            self.parse_lexer(lexer.children[0])
            self.parse_lrule(lexer.children[1])

    def parse_lrule(self, lrule):
        assert lrule.children[0].symbol.name == "tokenname"
        name = lrule.children[0].children[0].symbol.name
        regex = lrule.children[3].symbol.name[1:-1]
        self.lrules.append((name, regex))