def visit_Rule(self, node: Rule) -> None: is_loop = node.is_loop() is_gather = node.is_gather() rhs = node.flatten() if node.left_recursive: if node.leader: self.print("@memoize_left_rec") else: # Non-leader rules in a cycle are not memoized, # but they must still be logged. self.print("@logger") else: self.print("@memoize") node_type = node.type or "Any" self.print(f"def {node.name}(self) -> Optional[{node_type}]:") with self.indent(): self.print(f"# {node.name}: {rhs}") self.print("mark = self._mark()") if self.alts_uses_locations(node.rhs.alts): self.print("tok = self._tokenizer.peek()") self.print("start_lineno, start_col_offset = tok.start") if is_loop: self.print("children = []") self.visit(rhs, is_loop=is_loop, is_gather=is_gather) if is_loop: self.print("return children") else: self.print("return None")
def name_gather(self, node: Gather) -> str: self.counter += 1 name = f"_gather_{self.counter}" self.counter += 1 extra_function_name = f"_loop0_{self.counter}" extra_function_alt = Alt( [ NamedItem(None, node.separator), NamedItem("elem", node.node), ], action="elem", ) self.todo[extra_function_name] = Rule( extra_function_name, None, Rhs([extra_function_alt]), ) alt = Alt([ NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)), ], ) self.todo[name] = Rule( name, None, Rhs([alt]), ) return name
def visit_Rule(self, node: Rule) -> None: is_loop = node.is_loop() is_gather = node.is_gather() rhs = node.flatten() if node.left_recursive: if node.leader: self.print("@memoize_left_rec") else: # Non-leader rules in a cycle are not memoized, # but they must still be logged. self.print("@logger") else: self.print("@memoize") node_type = node.type or "Any" self.print(f"def {node.name}(self) -> Optional[{node_type}]:") with self.indent(): self.print(f"# {node.name}: {rhs}") if node.nullable: self.print(f"# nullable={node.nullable}") self.print("mark = self.mark()") if is_loop: self.print("children = []") self.visit(rhs, is_loop=is_loop, is_gather=is_gather) if is_loop: self.print("return children") else: self.print("return None")
def visit_Rule(self, node: Rule) -> None: is_loop = node.is_loop() is_gather = node.is_gather() rhs = node.flatten() if is_loop or is_gather: result_type = "asdl_seq *" elif node.type: result_type = node.type else: result_type = "void *" for line in str(node).splitlines(): self.print(f"// {line}") if node.left_recursive and node.leader: self.print(f"static {result_type} {node.name}_raw(Parser *);") self.print(f"static {result_type}") self.print(f"{node.name}_rule(Parser *p)") if node.left_recursive and node.leader: self._set_up_rule_memoization(node, result_type) self.print("{") if is_loop: self._handle_loop_rule_body(node, rhs) else: self._handle_default_rule_body(node, rhs, result_type) self.print("}")
def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: memoize = self._should_memoize(node) is_repeat1 = node.name.startswith("_loop1") with self.indent(): self._check_for_errors() self.print("void *_res = NULL;") if memoize: self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))") with self.indent(): self.print("return _res;") self.print("int _mark = p->mark;") self.print("int _start_mark = p->mark;") self.print("void **_children = PyMem_Malloc(sizeof(void *));") self.out_of_memory_return(f"!_children") self.print("ssize_t _children_capacity = 1;") self.print("ssize_t _n = 0;") if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): self._set_up_token_start_metadata_extraction() self.visit( rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name, ) if is_repeat1: self.print("if (_n == 0 || p->error_indicator) {") with self.indent(): self.print("PyMem_Free(_children);") self.print("return NULL;") self.print("}") self.print("asdl_seq *_seq = _Py_asdl_seq_new(_n, p->arena);") self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") self.print("for (int i = 0; i < _n; i++) asdl_seq_SET(_seq, i, _children[i]);") self.print("PyMem_Free(_children);") if node.name: self.print(f"_PyPegen_insert_memo(p, _start_mark, {node.name}_type, _seq);") self.print("return _seq;")
def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None: memoize = self._should_memoize(node) with self.indent(): self.print(f"{result_type} res = NULL;") if memoize: self.print(f"if (is_memoized(p, {node.name}_type, &res))") with self.indent(): self.print("return res;") self.print("int mark = p->mark;") if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): self._set_up_token_start_metadata_extraction() self.visit( rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name if memoize else None, ) if self.debug: self.print( f'fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark);') self.print("res = NULL;") self.print(" done:") with self.indent(): if memoize: self.print(f"insert_memo(p, mark, {node.name}_type, res);") self.print("return res;")
def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None: memoize = self._should_memoize(node) with self.indent(): self.add_level() self._check_for_errors() self.print(f"{result_type} _res = NULL;") if memoize: self.print( f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{" ) with self.indent(): self.add_return("_res") self.print("}") self.print("int _mark = p->mark;") if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): self._set_up_token_start_metadata_extraction() self.visit( rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name, ) if self.debug: self.print( f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));' ) self.print("_res = NULL;") self.print(" done:") with self.indent(): if memoize: self.print( f"_PyPegen_insert_memo(p, _mark, {node.name}_type, _res);") self.add_return("_res")
def name_loop(self, node: Plain, is_repeat1: bool) -> str: self.counter += 1 if is_repeat1: prefix = '_loop1_' else: prefix = '_loop0_' name = f'{prefix}{self.counter}' # TODO: It's ugly to signal via the name. self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) return name
def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str: self.counter += 1 if is_repeat1: prefix = "_loop1_" else: prefix = "_loop0_" name = f"{prefix}{self.counter}" self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) return name
def visit_Rule(self, node: Rule) -> None: is_loop = node.is_loop() is_gather = node.is_gather() rhs = node.flatten() if is_loop or is_gather: result_type = "asdl_seq *" elif node.type: result_type = node.type else: result_type = "void *" for line in str(node).splitlines(): self.print(f"// {line}") if node.left_recursive and node.leader: self.print(f"static {result_type} {node.name}_raw(Parser *);") self.print(f"static {result_type}") self.print(f"{node.name}_rule(Parser *p)") if node.left_recursive and node.leader: self._set_up_rule_memoization(node, result_type) self.print("{") if node.name.endswith("without_invalid"): with self.indent(): self.print("int _prev_call_invalid = p->call_invalid_rules;") self.print("p->call_invalid_rules = 0;") self.cleanup_statements.append( "p->call_invalid_rules = _prev_call_invalid;") if is_loop: self._handle_loop_rule_body(node, rhs) else: self._handle_default_rule_body(node, rhs, result_type) if node.name.endswith("without_invalid"): self.cleanup_statements.pop() self.print("}")
def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: memoize = self._should_memoize(node) is_repeat1 = node.name.startswith("_loop1") with self.indent(): self.print("if (p->error_indicator) {") with self.indent(): self.print("return NULL;") self.print("}") self.print(f"void *res = NULL;") if memoize: self.print( f"if (_PyPegen_is_memoized(p, {node.name}_type, &res))") with self.indent(): self.print("return res;") self.print("int mark = p->mark;") self.print("int start_mark = p->mark;") self.print("void **children = PyMem_Malloc(sizeof(void *));") self.out_of_memory_return(f"!children", "NULL") self.print("ssize_t children_capacity = 1;") self.print("ssize_t n = 0;") if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): self._set_up_token_start_metadata_extraction() self.visit( rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name if memoize else None, ) if is_repeat1: self.print("if (n == 0) {") with self.indent(): self.print("PyMem_Free(children);") self.print("return NULL;") self.print("}") self.print("asdl_seq *seq = _Py_asdl_seq_new(n, p->arena);") self.out_of_memory_return( f"!seq", "NULL", message=f"asdl_seq_new {node.name}", cleanup_code="PyMem_Free(children);", ) self.print( "for (int i = 0; i < n; i++) asdl_seq_SET(seq, i, children[i]);" ) self.print("PyMem_Free(children);") if node.name: self.print( f"_PyPegen_insert_memo(p, start_mark, {node.name}_type, seq);" ) self.print("return seq;")
def __init__( self, grammar: grammar.Grammar, file: Optional[IO[Text]], *, tokens: Dict[int, str] = token.tok_name, skip_actions: bool = False, ): keywords = grammar.metas.get("keywords") self.use_reserved_words = self.parse_bool(keywords, "keywords", True) if skip_actions and ("start" not in grammar.rules and "trailer" not in grammar.metas): first_rule = next(iter(grammar.rules)) grammar.rules["start"] = Rule( "start", None, Rhs([Alt([NamedItem(None, NameLeaf(first_rule))])])) super().__init__(grammar, tokens, file) self.skip_actions = skip_actions self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor( self)
def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: memoize = not node.left_recursive is_repeat1 = node.name.startswith("_loop1") with self.indent(): self.print(f"void *res = NULL;") if memoize: self.print(f"if (is_memoized(p, {node.name}_type, &res))") with self.indent(): self.print("return res;") self.print("int mark = p->mark;") self.print("void **children = PyMem_Malloc(sizeof(void *));") self.out_of_memory_return(f"!children", "NULL") self.print("ssize_t children_capacity = 1;") self.print("ssize_t n = 0;") self._set_up_token_start_metadata_extraction() self.visit( rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name if memoize else None, ) if is_repeat1: self.print("if (n == 0) {") with self.indent(): self.print("PyMem_Free(children);") self.print("return NULL;") self.print("}") self.print("asdl_seq *seq = _Py_asdl_seq_new(n, p->arena);") self.out_of_memory_return(f"!seq", "NULL", message=f"asdl_seq_new {node.name}") self.print( "for (int i = 0; i < n; i++) asdl_seq_SET(seq, i, children[i]);" ) self.print("PyMem_Free(children);") if node.name: self.print(f"insert_memo(p, mark, {node.name}_type, seq);") self.print("return seq;")
def name_node(self, rhs: Rhs) -> str: self.counter += 1 name = f'_tmp_{self.counter}' # TODO: Pick a nicer name. self.todo[name] = Rule(name, None, rhs) return name
def visit_Rule(self, rule: Rule) -> None: self.visit(rule.flatten())
def visit_Rule(self, node: Rule) -> None: is_loop = node.is_loop() is_repeat1 = node.name.startswith("_loop1") memoize = not node.left_recursive rhs = node.flatten() if is_loop: type = "asdl_seq *" elif node.type: type = node.type else: type = "void *" self.print(f"// {node}") if node.left_recursive and node.leader: self.print(f"static {type} {node.name}_raw(Parser *);") self.print(f"static {type}") self.print(f"{node.name}_rule(Parser *p)") if node.left_recursive and node.leader: self.print("{") with self.indent(): self.print(f"{type} res = NULL;") self.print(f"if (is_memoized(p, {node.name}_type, &res))") with self.indent(): self.print("return res;") self.print("int mark = p->mark;") self.print("int resmark = p->mark;") self.print("while (1) {") with self.indent(): self.call_with_errorcheck_return( f"update_memo(p, mark, {node.name}_type, res)", "res") self.print("p->mark = mark;") self.print(f"void *raw = {node.name}_raw(p);") self.print("if (raw == NULL || p->mark <= resmark)") with self.indent(): self.print("break;") self.print("resmark = p->mark;") self.print("res = raw;") self.print("}") self.print("p->mark = resmark;") self.print("return res;") self.print("}") self.print(f"static {type}") self.print(f"{node.name}_raw(Parser *p)") self.print("{") with self.indent(): if is_loop: self.print(f"void *res = NULL;") else: self.print(f"{type} res = NULL;") if memoize: self.print(f"if (is_memoized(p, {node.name}_type, &res))") with self.indent(): self.print("return res;") self.print("int mark = p->mark;") if is_loop: self.print("void **children = PyMem_Malloc(0);") self.out_of_memory_return(f"!children", "NULL") self.print("ssize_t n = 0;") self.visit(rhs, is_loop=is_loop, rulename=node.name if memoize else None) if is_loop: if is_repeat1: self.print("if (n == 0) {") with self.indent(): self.print("PyMem_Free(children);") self.print("return NULL;") self.print("}") self.print("asdl_seq *seq = _Py_asdl_seq_new(n, p->arena);") self.out_of_memory_return(f"!seq", "NULL", message=f"asdl_seq_new {node.name}") self.print( "for (int i = 0; i < n; i++) asdl_seq_SET(seq, i, children[i]);" ) self.print("PyMem_Free(children);") if node.name: self.print(f"insert_memo(p, mark, {node.name}_type, seq);") self.print("return seq;") else: if self.debug: self.print( f'fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark);' ) self.print("res = NULL;") if not is_loop: self.print(" done:") with self.indent(): if memoize: self.print(f"insert_memo(p, mark, {node.name}_type, res);") self.print("return res;") self.print("}")
if cut: return None return None @memoize def rule(self) -> Optional[Rule]: # rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE mark = self.mark() cut = False if ((rulename := self.rulename()) and (opt := self.memoflag(), ) and (literal := self.expect(":")) and (alts := self.alts()) and (newline_ := self.expect('NEWLINE')) and (indent_ := self.expect('INDENT')) and (more_alts := self.more_alts()) and (dedent_ := self.expect('DEDENT'))): return Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt) self.reset(mark) if cut: return None cut = False if ((rulename := self.rulename()) and (opt := self.memoflag(), ) and (literal := self.expect(":")) and (newline_ := self.expect('NEWLINE')) and (indent_ := self.expect('INDENT')) and (more_alts := self.more_alts()) and (dedent_ := self.expect('DEDENT'))): return Rule(rulename[0], rulename[1], more_alts, memo=opt) self.reset(mark) if cut: return None cut = False if ((rulename := self.rulename()) and (opt := self.memoflag(), )
def artifical_rule_from_rhs(self, rhs: Rhs) -> str: self.counter += 1 name = f"_tmp_{self.counter}" # TODO: Pick a nicer name. self.all_rules[name] = Rule(name, None, rhs) return name
if ( (rulename := self.rulename()) and (literal := self.expect(":")) and (alts := self.alts()) and (newline := self.expect('NEWLINE')) and (indent := self.expect('INDENT')) and (more_alts := self.more_alts()) and (dedent := self.expect('DEDENT')) ): return Rule ( rulename [ 0 ] , rulename [ 1 ] , Rhs ( alts . alts + more_alts . alts ) ) self.reset(mark) if cut: return None cut = False if ( (rulename := self.rulename()) and (literal := self.expect(":")) and (newline := self.expect('NEWLINE')) and (indent := self.expect('INDENT')) and (more_alts := self.more_alts()) and (dedent := self.expect('DEDENT'))