def rule(self, ast, *args): decorators = ast.decorators name = ast.name exp = ast.exp base = ast.base params = ast.params kwparams = OrderedDict(ast.kwparams) if ast.kwparams else None if 'override' not in decorators and name in self.rules: self.new_name(name) elif 'override' in decorators: self.known_name(name) if not base: rule = grammars.Rule(ast, name, exp, params, kwparams, decorators=decorators) else: self.known_name(base) base_rule = self.rules[base] rule = grammars.BasedRule(ast, name, exp, base_rule, params, kwparams, decorators=decorators) self.rules[name] = rule return rule
def token(self, ast): name = ast.name if ast.value: exp = model.Token(ast.value) self.tokens[name] = exp else: exp = model.Fail() rule = model.Rule(ast, name, exp, [], {}) self.synthetic_rules.append(rule) return exp
def generate_tatsu_grammar(grammar: gll_grammar.Grammar, extra_rules=()) -> tatsu_grammars.Grammar: tatsu_rules = list(extra_rules) for (symbol, rule) in grammar.rules.items(): tatsu_rule = tatsu_grammars.Rule( ast=None, name=symbol, exp=node_to_tatsu(rule), params=None, kwparams=None, ) tatsu_rules.append(tatsu_rule) return tatsu_grammars.Grammar(name=grammar.name, rules=tatsu_rules)
class LIFETIME: lifetime: str RULE = grammars.Rule( ast=None, name="LIFETIME", exp=grammars.Pattern(r"\s*'\s*" + IDENT._IDENTIFIER), params=None, kwparams=None, ) @classmethod def from_ast(cls, ast: str) -> LIFETIME: return cls(ast.strip()[1:].strip())
class PUNCT: punct: str # from https://github.com/rust-lang/rust/blob/1.46.0/src/librustc_lexer/src/lib.rs#L72-L126 # minus (){}[] (which mess with TokenTree RULE = grammars.Rule( ast=None, name="PUNCT", exp=grammars.Pattern(r"\s*[;,.@#~?:$=!<>\-&+*/^%]"), params=None, kwparams=None, ) @classmethod def from_ast(cls, ast: str) -> PUNCT: return cls(ast.strip())
def rule(self, ast): name = camel2py(ast.name) exp = ast.exp if name[0].isupper(): name = name.upper() if isinstance(exp, model.Token): if name in self.token_rules: self.token_rules[name].exp = exp # it is a model._Decorator else: self.token_rules[name] = exp return None elif not ast.fragment and not isinstance(exp, model.Sequence): ref = model.RuleRef(name.lower()) if name in self.token_rules: self.token_rules[name].exp = ref else: self.token_rules[name] = ref name = name.lower() return model.Rule(ast, name, exp, ast.params, ast.kwparams)
class IDENT: ident: str # from https://doc.rust-lang.org/reference/identifiers.html _IDENTIFIER_OR_KEYWORD = "([a-zA-Z][a-zA-Z0-9_]*|_[a-zA-Z0-9_]+)" _RAW_IDENTIFIER = ("r#" + _IDENTIFIER_OR_KEYWORD ) # TODO: Except crate, self, super, Self _NON_KEYWORD_IDENTIFIER = (_IDENTIFIER_OR_KEYWORD ) # TODO: Except a strict or reserved keyword _IDENTIFIER = rf"({_NON_KEYWORD_IDENTIFIER}|{_RAW_IDENTIFIER})" RULE = grammars.Rule( ast=None, name="IDENT", exp=grammars.Pattern(r"\s*" + _IDENTIFIER), params=None, kwparams=None, ) @classmethod def from_ast(cls, ast: str) -> IDENT: return cls(ast.strip())
class TOKEN_TREE: tokens: list RULE = grammars.Rule( ast=None, name="TOKEN_TREE", exp=grammars.Choice([ grammars.RuleRef("LITERAL"), grammars.RuleRef("IDENT"), grammars.RuleRef("LIFETIME"), grammars.RuleRef("PUNCT"), grammars.Sequence( AST(sequence=[ grammars.Token("("), grammars.Closure(grammars.RuleRef("TOKEN_TREE")), grammars.Token(")"), ])), grammars.Sequence( AST(sequence=[ grammars.Token("{"), grammars.Closure(grammars.RuleRef("TOKEN_TREE")), grammars.Token("}"), ])), grammars.Sequence( AST(sequence=[ grammars.Token("["), grammars.Closure(grammars.RuleRef("TOKEN_TREE")), grammars.Token("]"), ])), ]), params=None, kwparams=None, ) @classmethod def from_ast(cls, ast) -> TOKEN_TREE: return cls(ast)
class LITERAL: literal: str # from https://github.com/rust-lang/rust/blob/1.46.0/src/librustc_lexer/src/lib.rs#L133-L150 _PATTERNS = [ r"0b([01_]+\.?[01_]*|[01_]*\.[01_]+)([fui][0-9]+)?", # int/float (bin) r"0x([0-9a-f_]+\.?[0-9a-f_]*|[0-9a-f_]*\.[0-9a-f_]+)([fui][0-9]+)?", # int/float (hex) r"([0-9][0-9_]*\.?[0-9_]*|([0-9][0-9_]*)?\.[0-9_]+)([fui][0-9]+)?", # int/float (dec) r"b?'(\\\\|[^\\])'", # char and byte r'b?"([^\\]|\\\\)*"', # str and bytestr # TODO: rawstr and rawbytestr ] RULE = grammars.Rule( ast=None, name="LITERAL", exp=grammars.Pattern(rf"\s*({'|'.join(_PATTERNS)})"), params=None, kwparams=None, ) @classmethod def from_ast(cls, ast: str) -> PUNCT: return cls(ast.strip())