def add_terminal(self, loc: SrcLoc, name: str, lang_name: str, tags: List[Tuple[str, Optional[int]]]) -> None: if name in self.terminals: raise CCError( loc, f"terminal '{name}' already defined at {self.terminals[name].loc}" ) terminal = Terminal(loc, name, lang_name) self.terminals[name] = terminal self.terminals_list.append(terminal) for tag, value in tags: if tag == "skip": terminal.skip = True elif tag == "error": if self.term_error: raise CCError( loc, f"error terminal {self.term_error.name} already defined at {self.term_error.loc}" ) self.term_error = terminal elif tag == "eof": if self.term_eof: raise CCError( loc, f"eof terminal {self.term_eof.name} already defined at {self.term_eof.loc}" ) self.term_eof = terminal else: raise CCError(loc, f"invalid tag {tag}")
def _construct_nonterminals(self) -> None: for (loc, name, param_names, cond, symbols, action) in self.parser_rules: nt = self._get_nt(loc, name, len(param_names)) if nt.param_count != len(param_names): raise CCError( loc, f"nonterminal '{name}' has conflicting definitions, first definition at {nt.loc}" ) forced_captures: Dict[str, SrcLoc] = dict() unforced_captures: Dict[str, int] = defaultdict(lambda: 0) implicit_captures: Set[str] = set() used_captures: Set[str] = set() for symbol in symbols: if symbol.capture is not None: if symbol.capture in forced_captures: raise CCError( symbol.loc, f"capture '{symbol.capture}' already made at {forced_captures[symbol.capture]}" ) forced_captures[symbol.capture] = symbol.loc unforced_captures[symbol.name] += 1 elif self.is_simple_name(symbol.name): unforced_captures[symbol.name] += 1 new_symbols: List[TemplateSymbol] = [] if action is not None: for capture_group in CaptureRe.finditer(action.text): capture = capture_group.group(1) if capture in forced_captures: used_captures.add(capture) elif (capture in unforced_captures): if unforced_captures[capture] > 1: raise CCError(action.loc, f"ambiguous capture '${capture}'") implicit_captures.add(capture) else: raise CCError(action.loc, f"undefined capture '${capture}'") for capture, capture_loc in forced_captures.items(): if capture not in used_captures: raise CCError(capture_loc, f"capture '${capture}' is not used") for symbol in symbols: symbol_capture = symbol.capture if (symbol_capture is None) and (symbol.name in implicit_captures): symbol_capture = symbol.name new_symbols.append( TemplateSymbol(symbol.loc, symbol.name, symbol.params, symbol_capture)) rule = TemplateNonTerminalRule(loc, nt, param_names, cond, new_symbols, action) nt.add_rule(rule)
def _apply_types(self) -> None: type_locs: Dict[str, SrcLoc] = dict() for (loc, name, type_name) in self.types: type = self._get_type(loc, type_name) if name in type_locs: raise CCError( loc, f"'{name}' type already assigned at {type_locs[name]}") if name == "terminal": unify_type(loc, self.grammar.terminal_type, type) else: nt = self.template.find_template(name) if nt is None: raise CCError(loc, f"nonterminal '{name}' not found") unify_type(loc, nt.type, type) type_locs[name] = loc
def _construct_terminals(self) -> None: for terminal in self.shared.terminals.values(): self.grammar.add_terminal(SymbolTerminal(terminal)) if not self.shared.term_eof: raise CCError(None, "no terminal designated for {eof}") self.grammar.eof = self.grammar.find_terminal( self.shared.term_eof.name)
def set_parser_source(self, loc: SrcLoc, contents: str) -> None: if self.parser_generator.grammar.parser_source: raise CCError( loc, f"parser.source block already defined at {self.parser_generator.grammar.parser_source.loc}" ) self.parser_generator.grammar.parser_source = CodeBlock(loc, contents)
def set_parser_header(self, loc: SrcLoc, contents: str) -> None: if self.parser_generator.grammar.parser_header: raise CCError( loc, f"parser.header block already defined at {self.parser_generator.grammar.parser_header.loc}" ) self.parser_generator.grammar.parser_header = CodeBlock(loc, contents)
def _typecheck_parser(self) -> None: while True: progress = False discarded_constraints: Set[TypeConstraint] = set() for constraint in self.template.type_constraints: nonnull_types: List[Type] = [] for _, type in constraint.params: if not isinstance(type.repr(), TypeVoid): nonnull_types.append(type) consumed = False if len(nonnull_types) == 0: unify_type(constraint.loc, constraint.type, Void) consumed = True elif len(nonnull_types) == 1: unify_type(constraint.loc, constraint.type, nonnull_types[0]) consumed = True if consumed: discarded_constraints.add(constraint) progress = True if progress: self.template.type_constraints = list( filter(lambda c: c not in discarded_constraints, self.template.type_constraints)) else: break for template in self.template.templates.values(): repr = template.type.repr() if isinstance(repr, TypeVariable): raise CCError(template.loc, f"cannot infer type for '{template.name}'")
def register_vm_action(self, loc: SrcLoc, name: str, action: Tuple[SrcLoc, str]): old_val = self.parser_generator.grammar.vm_actions.get(name, None) if old_val is not None: raise CCError(loc, f"{name} vm action already defined at {old_val[0]}") self.parser_generator.grammar.vm_actions[name] = (loc, name, action)
def instantiate(self, loc: SrcLoc, values: Tuple[int, ...]) -> SymbolNonTerminal: if values in self.instances: return self.instances[values] if len(values) != self.param_count: raise CCError( loc, f"mismatch number of template arguments " + f"for {self.name}: " + f"got {self.param_count}, expected {len(values)}") return self._create_instance(values)
def _populate_parser(self) -> None: for loc, name in self.exposed_nt: template = self.template.find_template(name) if not template: raise CCError(loc, f"nonterminal '{name}' not found") nt_export = SymbolNonTerminal(f"{name}") nt_export.exported = True nt_export.add_rule([template.instantiate(loc, ())], None) self.grammar.add_nonterminal(nt_export) self.grammar.exports[name] = nt_export self.grammar.keep.add(nt_export)
def construct(self) -> None: for idx, (loc, name, re) in enumerate(self.lexer_rules): if name not in self.shared.terminals: raise CCError(loc, f"terminal '{name}' not found") term = self.shared.terminals[name] end_state = NFAState() rule = NFARule(idx, loc, term) end_state.rule = rule re.build_nfa(self.nfa_ctx, self.nfa_init, end_state) self.nfa_rules.append(rule) self.nfa_ends.append(end_state)
def build(self) -> None: n = self.find_n() if n is None: print(f"KEYWORDS:", file=sys.stderr) for keyword in self.keywords: for string in keyword.strings: print(f" {json.dumps(string)}", file=sys.stderr) raise CCError( None, "too many (or conflicting) keywords, cannot compute perfect hash functions" ) self.n = n
def _get_nt(self, loc: SrcLoc, name: str, param_count: int) -> TemplateNonTerminal: term = self.grammar.find_terminal(name) if term is not None: raise CCError( loc, f"name '{name}' is already assigned to a terminal at {term.terminal.loc}" ) nt = self.template.find_template(name) if not nt: nt = TemplateNonTerminal(self.template, loc, name, param_count) self.template.add_template(nt) return nt return nt
def _instantiate_symbol(self, ctx: 'TemplateGrammar', symbol: TemplateSymbol, vars: InstanceVars, type_stack: List[TypeParam], symbols: List[Symbol]) -> None: template = ctx.find_template(symbol.name) if template: vals = [] if symbol.params is not None: for expr in symbol.params: vals.append(expr.eval(vars)) symbols.append(template.instantiate(symbol.loc, tuple(vals))) type_stack.append((symbol.capture, template.type)) else: terminal = ctx.grammar.find_terminal(symbol.name) if not terminal: raise CCError(symbol.loc, f"unresolved name '{symbol.name}'") if symbol.params is not None: raise CCError( symbol.loc, f"terminal '{symbol.name}' doesn't expect template arguments" ) symbols.append(terminal) assert ctx.grammar.terminal_type is not None type_stack.append((symbol.capture, ctx.grammar.terminal_type))
def run(self) -> None: if not self.shared.term_error: raise CCError(None, "no {error} terminal found") builder = Builder(self.shared.term_error, self.nfa_rules, 0) dfa = builder.build(self.nfa_init) keywords = builder.keywords min_dfa = minimize(dfa) self.inject_error_state(min_dfa) list_states: List[DFAState] = [] min_dfa.visit(lambda state: list_states.append(state)) codegen = Codegen(self.lexer_grammar, min_dfa) codegen.run()
def unify_type(loc: SrcLoc, ty1: Type, ty2: Type) -> None: rty1 = ty1.repr() rty2 = ty2.repr() if isinstance(rty1, TypeVariable): rty1.merge(loc, rty2) elif isinstance(rty2, TypeVariable): rty2.merge(loc, rty1) elif rty1 == rty2: return else: origin = ty2.get_origin() if origin: origin_str = f" (from {origin})" else: origin_str = "" raise CCError(loc, f"cannot unify type '{rty1}' with '{rty2}'{origin_str}")
def _instantiate_action(self, ctx: 'TemplateGrammar', action: TemplateAction, type_stack: List[TypeParam]) -> Action: source = action.text.strip() prod_action: Action = Action(action.loc, type_stack, TypeVariable(None), action.text) param_names = set() for type in type_stack: if type[0] is not None: param_names.add(type[0]) if source == "$" + type[0]: unify_type(prod_action.loc, prod_action.type, type[1]) for param_match in CaptureRe.finditer(source): param_name = param_match.group(1) if param_name not in param_names: raise CCError(prod_action.loc, f"unresolved reference '{param_name}'") type_stack.clear() type_stack.append((None, prod_action.type)) return prod_action
def get_fragment(self, loc: SrcLoc, name: str) -> Tuple[NFAState, NFAState]: if name not in self.fragments: raise CCError(loc, f"fragment '{name}' not found") fragment = self.fragments[name] return fragment.build(self)
def add_fragment(self, loc: SrcLoc, name: str, re: 'Re') -> None: if name in self.fragments: raise CCError(loc, f"duplicate fragment '{name}', previous definition at {self.fragments[name].loc}") self.fragments[name] = NFAFragment(loc, name, re)
def report(self, message: str, loc: Optional[SrcLoc] = None) -> NoReturn: if loc is None: loc = self.loc() raise CCError(loc, message)