def add_production(self, prod_name, syms, func, precedence): if prod_name in self.terminals: raise ParserGeneratorError("Illegal rule name %r" % prod_name) if precedence is None: precname = rightmost_terminal(syms, self.terminals) prod_prec = self.precedence.get(precname, ("right", 0)) else: try: prod_prec = self.precedence[precedence] except KeyError: raise ParserGeneratorError("Precedence %r doesn't exist" % precedence) pnumber = len(self.productions) self.nonterminals.setdefault(prod_name, []) for t in syms: if t in self.terminals: self.terminals[t].append(pnumber) else: self.nonterminals.setdefault(t, []).append(pnumber) p = Production(pnumber, prod_name, syms, prod_prec, func) self.productions.append(p) self.prod_names.setdefault(prod_name, []).append(p)
def set_precedence(self, term, assoc, level): if term in self.precedence: raise ParserGeneratorError("Precedence already specified for %s" % term) if assoc not in ["left", "right", "nonassoc"]: raise ParserGeneratorError( "Precedence must be one of left, right, nonassoc; not %s" % (assoc)) self.precedence[term] = (assoc, level)
def production(self, rule, precedence=None): parts = rule.split() production_name = parts[0] if parts[1] != ":": raise ParserGeneratorError("Expecting :") syms = parts[2:] def inner(func): self.productions.append((production_name, syms, func, precedence)) return func return inner
def production(self, rule, precedence=None): """ A decorator that defines one or many production rules and registers the decorated function to be called with the terminals and non-terminals matched by those rules. A `rule` should consist of a name defining the non-terminal returned by the decorated function and one or more sequences of pipe-separated non-terminals and terminals that are supposed to be replaced:: replacing_non_terminal : TERMINAL1 non_term1 | TERMINAL2 non_term2 The name of the non-terminal replacing the sequence is on the left, separated from the sequence by a colon. The whitespace around the colon is required. Knowing this we can define productions:: pg = ParserGenerator(['NUMBER', 'ADD']) @pg.production('number : NUMBER') def expr_number(p): return BoxInt(int(p[0].getstr())) @pg.production('expr : number ADD number') def expr_add(p): return BoxInt(p[0].getint() + p[2].getint()) If a state was passed to the parser, the decorated function is additionally called with that state as first argument. """ parts = rule.split() production_name = parts[0] if parts[1] != ":": raise ParserGeneratorError("Expecting :") body = " ".join(parts[2:]) prods = body.split("|") def inner(func): for production in prods: syms = production.split() self.productions.append( (production_name, syms, func, precedence)) return func return inner
def from_grammar(cls, grammar): cidhash = IdentityDict() goto_cache = {} add_count = Counter() C = cls.lr0_items(grammar, add_count, cidhash, goto_cache) cls.add_lalr_lookaheads(grammar, C, add_count, cidhash, goto_cache) lr_action = [None] * len(C) lr_goto = [None] * len(C) sr_conflicts = [] rr_conflicts = [] for st, I in enumerate(C): st_action = {} st_actionp = {} st_goto = {} for p in I: if p.getlength() == p.lr_index + 1: if p.name == "S'": # Start symbol. Accept! st_action["$end"] = 0 st_actionp["$end"] = p else: laheads = p.lookaheads[st] for a in laheads: if a in st_action: r = st_action[a] if r > 0: sprec, slevel = grammar.productions[ st_actionp[a].number].prec rprec, rlevel = grammar.precedence.get( a, ("right", 0)) if (slevel < rlevel) or (slevel == rlevel and rprec == "left"): st_action[a] = -p.number st_actionp[a] = p if not slevel and not rlevel: sr_conflicts.append( (st, repr(a), "reduce")) grammar.productions[ p.number].reduced += 1 elif not (slevel == rlevel and rprec == "nonassoc"): if not rlevel: sr_conflicts.append( (st, repr(a), "shift")) elif r < 0: oldp = grammar.productions[-r] pp = grammar.productions[p.number] if oldp.number > pp.number: st_action[a] = -p.number st_actionp[a] = p chosenp, rejectp = pp, oldp grammar.productions[ p.number].reduced += 1 grammar.productions[ oldp.number].reduced -= 1 else: chosenp, rejectp = oldp, pp rr_conflicts.append( (st, repr(chosenp), repr(rejectp))) else: raise ParserGeneratorError( "Unknown conflict in state %d" % st) else: st_action[a] = -p.number st_actionp[a] = p grammar.productions[p.number].reduced += 1 else: i = p.lr_index a = p.prod[i + 1] if a in grammar.terminals: g = cls.lr0_goto(I, a, add_count, goto_cache) j = cidhash.get(g, -1) if j >= 0: if a in st_action: r = st_action[a] if r > 0: if r != j: raise ParserGeneratorError( "Shift/shift conflict in state %d" % st) elif r < 0: rprec, rlevel = grammar.productions[ st_actionp[a].number].prec sprec, slevel = grammar.precedence.get( a, ("right", 0)) if (slevel > rlevel) or (slevel == rlevel and rprec == "right"): grammar.productions[ st_actionp[a].number].reduced -= 1 st_action[a] = j st_actionp[a] = p if not rlevel: sr_conflicts.append( (st, repr(a), "shift")) elif not (slevel == rlevel and rprec == "nonassoc"): if not slevel and not rlevel: sr_conflicts.append( (st, repr(a), "reduce")) else: raise ParserGeneratorError( "Unknown conflict in state %d" % st) else: st_action[a] = j st_actionp[a] = p nkeys = set() for ii in I: for s in ii.unique_syms: if s in grammar.nonterminals: nkeys.add(s) for n in nkeys: g = cls.lr0_goto(I, n, add_count, goto_cache) j = cidhash.get(g, -1) if j >= 0: st_goto[n] = j lr_action[st] = st_action lr_goto[st] = st_goto default_reductions = [0] * len(lr_action) for state, actions in enumerate(lr_action): actions = set(itervalues(actions)) if len(actions) == 1 and next(iter(actions)) < 0: default_reductions[state] = next(iter(actions)) return LRTable(grammar, lr_action, lr_goto, default_reductions, sr_conflicts, rr_conflicts)