def run(token_cls): print('== {} =='.format(token_cls.__name__)) class FooNode(ASTNode): pass class Identifier(FooNode): token_node = True class Number(FooNode): token_node = True foo_lexer = Lexer(token_cls) foo_lexer.add_rules( (Pattern('[0-9]+'), token_cls.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), token_cls.Identifier), ) emit_and_print_errors(lkt_file='foo.lkt', lexer=foo_lexer, generate_unparser=True) BaseToken.Number.name = None BaseToken.Identifier.name = None print('')
python_lexer.add_rules( (Pattern('(u|U)?(r|R)?' '({MLSTRING_SQ}|{MLSTRING_DBQ}' '|{STRING_SQ}|{STRING_DBQ})'), Token.String), (Pattern(r'[ \r\t]+'), Ignore()), (Pattern(r"#(.?)+"), Token.Comment), (Literal('>>='), Token.RshAssign), (Literal('is'), Token.Is), (Literal('=='), Token.Equals), (Literal('def'), Token.Def), (Literal('<='), Token.Lte), (Literal('raise'), Token.Raise), (Literal('%'), Token.Mod), (Literal('yield'), Token.Yield), (Literal('^='), Token.XorAssign), (Literal('as'), Token.As), (Literal('lambda'), Token.Lambda), (Literal('`'), Token.Backtick), (Literal('try'), Token.Try), (Literal('/'), Token.Divide), (Literal('~'), Token.Invert), (Literal('return'), Token.Return), (Literal('assert'), Token.Assert), (Literal('^'), Token.Xor), (Literal('break'), Token.Break), (Literal(']'), Token.Rbrack), (Literal('**='), Token.PowerAssign), (Literal('import'), Token.Import), (Literal('exec'), Token.Exec), (Literal(','), Token.Comma), (Literal('('), Token.LPar), (Literal('.'), Token.Dot), (Literal('>='), Token.Gte), (Literal('//='), Token.FloordivAssign), (Literal('*'), Token.Multiply), (Literal('/='), Token.DivAssign), (Literal('@'), Token.At), (Literal('='), Token.Assign), (Literal('//'), Token.Floordiv), (Literal('!='), Token.Notequal), (Literal('*='), Token.MultAssign), (Literal('%='), Token.ModAssign), (Literal('>'), Token.Gt), (Literal('**'), Token.Power), (Literal('&'), Token.Amp), (Literal('not'), Token.Not), (Literal(':'), Token.Colon), (Literal('<>'), Token.Diamond), (Literal('in'), Token.In), (Literal('{'), Token.LCurl), (Literal('class'), Token.Class), (Literal('|='), Token.OrAssign), (Literal('elif'), Token.Elif), (Literal('and'), Token.And), (Literal(';'), Token.Semicolon), (Literal('+='), Token.AddAsign), (Literal('print'), Token.Print), (Literal('<<'), Token.Lsh), (Literal('continue'), Token.Continue), (Literal('while'), Token.While), (Literal('except'), Token.Except), (Literal('if'), Token.If), (Literal('else'), Token.Else), (Literal('del'), Token.Del), (Literal('-='), Token.MinusAssign), (Literal('or'), Token.Or), (Literal('-'), Token.Minus), (Literal('['), Token.Lbrack), (Literal('&='), Token.AndAssign), (Literal(')'), Token.RPar), (Literal('global'), Token.Global), (Literal('for'), Token.For), (Literal('from'), Token.From), (Literal('>>'), Token.Rsh), (Literal('finally'), Token.Finally), (Literal('pass'), Token.Pass), (Literal('<<='), Token.LshAssign), (Literal('|'), Token.BinOr), (Literal('}'), Token.Rcurl), (Literal('with'), Token.With), (Literal('+'), Token.Plus), (Literal('<'), Token.Lt), (Pattern('[0-9]+'), Token.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), )
RBrace, Equal, Plus) Comments = TokenFamily(Comment) foo_lexer = Lexer(Token) foo_lexer.add_rules( (Pattern(r'[ \n\r\t]+'), Token.Whitespace), (Literal('def'), Token.Def), (Literal('var'), Token.Var), (Literal('error'), Token.Error), (Literal('example'), Token.Example), (Literal('null'), Token.Null), (Literal(','), Token.Comma), (Literal('.'), Token.Dot), (Literal(';'), Token.Semicolon), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('='), Token.Equal), (Literal('+'), Token.Plus), (Literal('-'), Token.Minus), (Literal('<'), Token.LessThan), (Pattern('[0-9]+'), Token.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), (Pattern(r'\"[^\"]*\"'), Token.String), (Pattern('#(.?)+'), Token.Comment), ) foo_lexer.add_spacing((Token.Alphanumericals, Token.Alphanumericals)) foo_lexer.add_newline_after(Token.Comment)
gpr_lexer.add_rules( (Pattern(r"[ \t\r\n]+"), Token.Whitespace), (Pattern(r"--(.?)+"), Token.Comment), (NoCaseLit("all"), Token.All), (NoCaseLit("abstract"), Token.Abstract), (NoCaseLit("at"), Token.At), (NoCaseLit("case"), Token.Case), (NoCaseLit("end"), Token.End), (NoCaseLit("for"), Token.For), (NoCaseLit("is"), Token.Is), (NoCaseLit("limited"), Token.Limited), (NoCaseLit("private"), Token.Private), (NoCaseLit("null"), Token.Null), (NoCaseLit("others"), Token.Others), (NoCaseLit("package"), Token.Package), (NoCaseLit("renames"), Token.Renames), (NoCaseLit("type"), Token.Type), (NoCaseLit("use"), Token.Use), (NoCaseLit("pragma"), Token.Pragma), (NoCaseLit("when"), Token.When), (NoCaseLit("with"), Token.With), (NoCaseLit("extends"), Token.Extends), (Literal("("), Token.ParOpen), (Literal(")"), Token.ParClose), (Literal(";"), Token.Semicolon), (Literal(":"), Token.Colon), (Literal(","), Token.Comma), (Literal("."), Token.Dot), (Literal("&"), Token.Amp), (Literal("'"), Token.Tick), (Literal("|"), Token.Pipe), (Literal(":="), Token.Assign), (Literal("=>"), Token.Arrow), (Pattern("{integer}"), Token.Number), (Pattern(r"[_a-zA-Z][_a-zA-Z0-9]*"), Token.Identifier), (Pattern("{p_string}"), Token.String), )
from langkit.dsl import ASTNode from langkit.lexer import Ignore, Lexer, LexerToken, Pattern, WithText from langkit.parsers import Grammar from utils import emit_and_print_errors class BaseToken(LexerToken): Example = WithText() Whitespace = Ignore() class FooNode(ASTNode): pass class Example(FooNode): token_node = True foo_lexer = Lexer(BaseToken) foo_lexer.add_rules((Pattern('[ \t]+'), BaseToken.Whitespace), (Pattern('example'), BaseToken.Example)) g = Grammar('main_rule') g.add_rules(main_rule=Example(BaseToken.Example)) emit_and_print_errors(g, foo_lexer, generate_unparser=True) print('Done')
lkt_lexer.add_rules( # Whitespace & EOF (Pattern(r"[ \t\r\n\f]+"), Ignore()), # Operators (Literal('!'), Token.ExclMark), (Literal(':'), Token.Colon), (Literal('?'), Token.IntMark), (Literal('/'), Token.Div), (Literal('*'), Token.Times), (Literal('+'), Token.Plus), (Literal('|'), Token.Pipe), (Literal('<-'), Token.LeftArrow), (Literal('.'), Token.Dot), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('['), Token.LBrack), (Literal(']'), Token.RBrack), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('|>'), Token.Comb), (Literal(','), Token.Comma), (Literal('@'), Token.At), (Literal('list+'), Token.ListPlus), (Literal('list*'), Token.ListStar), (Literal('='), Token.Equal), # Keywords (Literal('grammar'), Token.GrammarKw), (Literal('or'), Token.OrKw), (Literal('class'), Token.ClassKw), (Literal('fun'), Token.FunKw), (Literal('public'), Token.PublicKw), (Literal('private'), Token.PrivateKw), (Literal('null'), Token.NullKw), # Identifiers (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), # Strings (Pattern('{STRING_SQ}|{STRING_DBQ}'), Token.String), )
LPar = WithText() RPar = WithText() LBrace = WithText() RBrace = WithText() Equal = WithText() Plus = WithText() Number = WithText() Identifier = WithSymbol() foo_lexer = Lexer(Token) foo_lexer.add_rules( (Pattern(r'[ \n\r\t]+'), Ignore()), (Eof(), Token.Termination), (Literal('def'), Token.Def), (Literal('error'), Token.Error), (Literal('example'), Token.Example), (Literal('null'), Token.Null), (Literal(','), Token.Comma), (Literal('.'), Token.Dot), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('='), Token.Equal), (Literal('+'), Token.Plus), (Pattern('[0-9]+'), Token.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), )
def create_lexer(ctx, lkt_units): """ Create and populate a lexer from a Lktlang unit. :param list[liblktlang.AnalysisUnit] lkt_units: Non-empty list of analysis units where to look for the grammar. :rtype: langkit.lexer.Lexer """ import liblktlang # Look for the LexerDecl node in top-level lists full_lexer = find_toplevel_decl(ctx, lkt_units, liblktlang.LexerDecl, 'lexer') with ctx.lkt_context(full_lexer): lexer_annot = parse_annotations(ctx, lexer_annotations, full_lexer) patterns = {} """ Mapping from pattern names to the corresponding regular expression. :type: dict[names.Name, str] """ token_family_sets = {} """ Mapping from token family names to the corresponding sets of tokens that belong to this family. :type: dict[names.Name, Token] """ token_families = {} """ Mapping from token family names to the corresponding token families. We build this late, once we know all tokens and all families. :type: dict[names.Name, TokenFamily] """ tokens = {} """ Mapping from token names to the corresponding tokens. :type: dict[names.Name, Token] """ rules = [] pre_rules = [] """ Lists of regular and pre lexing rules for this lexer. :type: list[(langkit.lexer.Matcher, langkit.lexer.Action)] """ newline_after = [] """ List of tokens after which we must introduce a newline during unparsing. :type: list[Token] """ def ignore_constructor(start_ignore_layout, end_ignore_layout): """ Adapter to build a Ignore instance with the same API as WithText constructors. """ del start_ignore_layout, end_ignore_layout return Ignore() def process_family(f): """ Process a LexerFamilyDecl node. Register the token family and process the rules it contains. :type f: liblktlang.LexerFamilyDecl """ with ctx.lkt_context(f): # Create the token family, if needed name = names.Name.from_lower(text_as_str(f.f_syn_name)) token_set = token_family_sets.setdefault(name, set()) for r in f.f_rules: check_source_language( isinstance(r.f_decl, liblktlang.GrammarRuleDecl), 'Only lexer rules allowed in family blocks') process_token_rule(r, token_set) def process_token_rule(r, token_set=None): """ Process the full declaration of a GrammarRuleDecl node: create the token it declares and lower the optional associated lexing rule. :param liblktlang.FullDecl r: Full declaration for the GrammarRuleDecl to process. :param None|set[TokenAction] token_set: If this declaration appears in the context of a token family, this adds the new token to this set. Must be left to None otherwise. """ with ctx.lkt_context(r): rule_annot = parse_annotations(ctx, token_annotations, r) # Gather token action info from the annotations. If absent, # fallback to WithText. token_cons = None start_ignore_layout = False end_ignore_layout = False if 'ignore' in rule_annot: token_cons = ignore_constructor for name in ('text', 'trivia', 'symbol'): try: start_ignore_layout, end_ignore_layout = rule_annot[name] except KeyError: continue check_source_language(token_cons is None, 'At most one token action allowed') token_cons = token_cls_map[name] is_pre = rule_annot.get('pre_rule', False) if token_cons is None: token_cons = WithText # Create the token and register it where needed: the global token # mapping, its token family (if any) and the "newline_after" group # if the corresponding annotation is present. token_lower_name = text_as_str(r.f_decl.f_syn_name) token_name = names.Name.from_lower(token_lower_name) check_source_language( token_lower_name not in ('termination', 'lexing_failure'), '{} is a reserved token name'.format(token_lower_name)) check_source_language(token_name not in tokens, 'Duplicate token name') token = token_cons(start_ignore_layout, end_ignore_layout) tokens[token_name] = token if token_set is not None: token_set.add(token) if 'newline_after' in rule_annot: newline_after.append(token) # Lower the lexing rule, if present matcher_expr = r.f_decl.f_expr if matcher_expr is not None: rule = (lower_matcher(matcher_expr), token) if is_pre: pre_rules.append(rule) else: rules.append(rule) def process_pattern(full_decl): """ Process a pattern declaration. :param liblktlang.FullDecl r: Full declaration for the ValDecl to process. """ parse_annotations(ctx, [], full_decl) decl = full_decl.f_decl lower_name = text_as_str(decl.f_syn_name) name = names.Name.from_lower(lower_name) with ctx.lkt_context(decl): check_source_language(name not in patterns, 'Duplicate pattern name') check_source_language( decl.f_decl_type is None, 'Patterns must have automatic types in' ' lexers') check_source_language( isinstance(decl.f_val, liblktlang.StringLit) and decl.f_val.p_is_regexp_literal, 'Pattern string literal expected') # TODO: use StringLit.p_denoted_value when properly implemented patterns[name] = pattern_as_str(decl.f_val) def lower_matcher(expr): """ Lower a token matcher to our internals. :type expr: liblktlang.GrammarExpr :rtype: langkit.lexer.Matcher """ with ctx.lkt_context(expr): if isinstance(expr, liblktlang.TokenLit): return Literal(json.loads(text_as_str(expr))) elif isinstance(expr, liblktlang.TokenNoCaseLit): return NoCaseLit(json.loads(text_as_str(expr))) elif isinstance(expr, liblktlang.TokenPatternLit): return Pattern(pattern_as_str(expr)) else: check_source_language(False, 'Invalid lexing expression') def lower_token_ref(ref): """ Return the Token that `ref` refers to. :type ref: liblktlang.RefId :rtype: Token """ with ctx.lkt_context(ref): token_name = names.Name.from_lower(text_as_str(ref)) check_source_language(token_name in tokens, 'Unknown token: {}'.format(token_name.lower)) return tokens[token_name] def lower_family_ref(ref): """ Return the TokenFamily that `ref` refers to. :type ref: liblktlang.RefId :rtype: TokenFamily """ with ctx.lkt_context(ref): name_lower = text_as_str(ref) name = names.Name.from_lower(name_lower) check_source_language( name in token_families, 'Unknown token family: {}'.format(name_lower)) return token_families[name] def lower_case_alt(alt): """ Lower the alternative of a case lexing rule. :type alt: liblktlang.BaseLexerCaseRuleAlt :rtype: Alt """ prev_token_cond = None if isinstance(alt, liblktlang.LexerCaseRuleCondAlt): prev_token_cond = [ lower_token_ref(ref) for ref in alt.f_cond_exprs ] return Alt(prev_token_cond=prev_token_cond, send=lower_token_ref(alt.f_send.f_sent), match_size=int(alt.f_send.f_match_size.text)) # Go through all rules to register tokens, their token families and lexing # rules. for full_decl in full_lexer.f_decl.f_rules: with ctx.lkt_context(full_decl): if isinstance(full_decl, liblktlang.LexerFamilyDecl): # This is a family block: go through all declarations inside it process_family(full_decl) elif isinstance(full_decl, liblktlang.FullDecl): # There can be various types of declarations in lexers... decl = full_decl.f_decl if isinstance(decl, liblktlang.GrammarRuleDecl): # Here, we have a token declaration, potentially associated # with a lexing rule. process_token_rule(full_decl) elif isinstance(decl, liblktlang.ValDecl): # This is the declaration of a pattern process_pattern(full_decl) else: check_source_language(False, 'Unexpected declaration in lexer') elif isinstance(full_decl, liblktlang.LexerCaseRule): syn_alts = list(full_decl.f_alts) # This is a rule for conditional lexing: lower its matcher and # its alternative rules. matcher = lower_matcher(full_decl.f_expr) check_source_language( len(syn_alts) == 2 and isinstance( syn_alts[0], liblktlang.LexerCaseRuleCondAlt) and isinstance(syn_alts[1], liblktlang.LexerCaseRuleDefaultAlt), 'Invalid case rule topology') rules.append( Case(matcher, lower_case_alt(syn_alts[0]), lower_case_alt(syn_alts[1]))) else: # The grammar should make the following dead code assert False, 'Invalid lexer rule: {}'.format(full_decl) # Create the LexerToken subclass to define all tokens and token families items = {} for name, token in tokens.items(): items[name.camel] = token for name, token_set in token_family_sets.items(): tf = TokenFamily(*list(token_set)) token_families[name] = tf items[name.camel] = tf token_class = type('Token', (LexerToken, ), items) # Create the Lexer instance and register all patterns and lexing rules result = Lexer(token_class, 'track_indent' in lexer_annot, pre_rules) for name, regexp in patterns.items(): result.add_patterns((name.lower, regexp)) result.add_rules(*rules) # Register spacing/newline rules for tf1, tf2 in lexer_annot.get('spacing', []): result.add_spacing((lower_family_ref(tf1), lower_family_ref(tf2))) result.add_newline_after(*newline_after) return result
Colon = WithText() # Comment Comment = WithTrivia() # Numeric Numeral = WithText() # String StringLiteral = WithText() rflx_lexer = Lexer(Token) rflx_lexer.add_rules( (Pattern(r"[ \t\r\n]+"), Ignore()), (Pattern(r"--.*"), Token.Comment), ) # Hack to support keywords that equal attributes # Inspired by Libadalang grammar (ada/language/lexer.py) rflx_lexer.add_rules(*[ Case( Literal(text), Alt( prev_token_cond=(Token.Tick, ), send=token, match_size=len(text), ), Alt(send=Token.UnqualifiedIdentifier, match_size=len(text)), ) for text, token in [ ("First", Token.First),
LBrace = WithText() RBrace = WithText() Plus = WithText() Number = WithText() Identifier = WithSymbol() foo_lexer = Lexer(Token, track_indent=True) foo_lexer.add_rules( (Pattern(r'[ \r\t]+'), Ignore()), (Eof(), Token.Termination), (Literal("example"), Token.Example), (Literal("null"), Token.Null), (Literal(','), Token.Comma), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('+'), Token.Plus), (Pattern('[0-9]+'), Token.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), ) L = foo_lexer Diagnostics.set_lang_source_dir(os.path.abspath(__file__)) @root_grammar_class() class FooNode(ASTNode): pass
lkt_lexer.add_rules( # Whitespace & EOF (Pattern(r"[ \t\r\n\f]+"), Ignore()), # Operators (Literal('!'), Token.ExclMark), (Literal(';'), Token.Semicolon), (Literal(':'), Token.Colon), (Literal('?'), Token.IntMark), (Literal('/'), Token.Div), (Literal('*'), Token.Times), (Literal('+'), Token.Plus), (Literal('&'), Token.Amp), (Literal('-'), Token.Minus), (Literal('|'), Token.Pipe), (Literal('<-'), Token.LeftArrow), (Literal('->'), Token.RightArrow), (Literal('.'), Token.Dot), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('['), Token.LBrack), (Literal(']'), Token.RBrack), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('|>'), Token.Comb), (Literal(','), Token.Comma), (Literal('@'), Token.At), (Literal('=>'), Token.FatRightArrow), (Literal('='), Token.Equal), (Literal('<='), Token.LTE), (Literal('>='), Token.GTE), (Literal('<'), Token.LT), (Literal('>'), Token.GT), (Literal('%'), Token.Percent), # Keywords (Literal('lexer'), Token.LexerKw), (Literal('grammar'), Token.GrammarKw), (Literal('class'), Token.ClassKw), (Literal('struct'), Token.StructKw), (Literal('fun'), Token.FunKw), (Literal('public'), Token.PublicKw), (Literal('private'), Token.PrivateKw), (Literal('null'), Token.NullKw), (Literal('is'), Token.IsKw), (Literal('val'), Token.ValKw), (Literal('if'), Token.IfKw), (Literal('elif'), Token.ElifKw), (Literal('else'), Token.ElseKw), (Literal('then'), Token.ThenKw), (Literal('and'), Token.AndKw), (Literal('or'), Token.OrKw), (Literal('not'), Token.NotKw), (Literal('bind'), Token.BindKw), (Literal('match'), Token.MatchKw), (Literal('case'), Token.CaseKw), (Literal('raise'), Token.RaiseKw), (Literal('try'), Token.TryKw), (Literal('enum'), Token.EnumKw), (Literal('generic'), Token.GenericKw), (Literal('discard'), Token.DiscardKw), (Literal('import'), Token.ImportKw), (Literal('implements'), Token.ImplementsKw), (Literal('trait'), Token.TraitKw), # Identifiers (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), # Numbers (Pattern('[0-9]+'), Token.Number), (Pattern('[0-9]+b'), Token.BigNumber), # Strings & chars (Pattern('{STRING_LIT}'), Token.String), (Pattern('[a-zA-Z]{STRING_LIT}'), Token.PString), (Pattern('{CHAR_LIT}'), Token.Char), # Comments (Pattern(r"#(.?)+"), Token.Comment), (Pattern(r"##(.?)+"), Token.DocComment), )
from langkit.lexer import (Ignore, Lexer, LexerToken, Literal, Pattern, WithText, WithTrivia) from langkit.parsers import Grammar, List from utils import build_and_run class Token(LexerToken): Example = WithText() Comment = WithTrivia() foo_lexer = Lexer(Token) foo_lexer.add_rules( (Pattern(r'[ \n\r\t]+'), Ignore()), (Literal('example'), Token.Example), (Pattern('#(.?)+'), Token.Comment), ) class FooNode(ASTNode): pass class Example(FooNode): pass g = Grammar('main_rule') g.add_rules(main_rule=List(Example('example'))) build_and_run(g, lexer=foo_lexer, py_script='main.py', ada_main='main.adb')
class Token(LexerToken): Ident = WithSymbol() Colon = WithText() Arrow = WithText() Equal = WithText() ParOpen = WithText() ParClose = WithText() Backslash = WithText() Dot = WithText() Comment = WithTrivia() Whitespace = WithTrivia() Newlines = WithText() dependz_lexer = Lexer(Token) dependz_lexer.add_rules( # Blanks and trivia (Pattern(r"[ \r\t]+"), Token.Whitespace), (Pattern(r"[\n]+"), Token.Newlines), (Pattern(r"#(.?)+"), Token.Comment), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Ident), (Literal(':'), Token.Colon), (Literal('->'), Token.Arrow), (Literal('='), Token.Equal), (Literal('('), Token.ParOpen), (Literal(')'), Token.ParClose), (Literal('\\'), Token.Backslash), (Literal('.'), Token.Dot))
class Token(LexerToken): Def = WithText() LPar = WithText() RPar = WithText() Comma = WithText() Identifier = WithSymbol() Comment = WithTrivia() foo_lexer = Lexer(Token, track_indent=True) foo_lexer.add_rules( (Pattern(r'[ \r\t]+'), Ignore()), (Literal('def'), Token.Def), (Literal(','), Token.Comma), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), (Pattern('#.*'), Token.Comment), ) L = foo_lexer class FooNode(ASTNode): pass class Def(FooNode): name = Field() stmts = Field()
ada_lexer.add_rules( (Pattern(r"[ \t\r\n]+"), Ignore()), (Pattern(r"--(.?)+"), Token.Comment), (NoCase("abort"), Token.Abort), (NoCase("else"), Token.Else), (NoCase("new"), Token.New), (NoCase("return"), Token.Return), (NoCase("abs"), Token.Abs), (NoCase("elsif"), Token.Elsif), (NoCase("not"), Token.Not), (NoCase("reverse"), Token.Reverse), (NoCase("abstract"), Token.Abstract), (NoCase("end"), Token.End), (NoCase("null"), Token.Null), (NoCase("accept"), Token.Accept), (NoCase("entry"), Token.Entry), (NoCase("select"), Token.Select), (NoCase("access"), Token.Access), (NoCase("exception"), Token.Exception), (NoCase("of"), Token.Of), (NoCase("separate"), Token.Separate), (NoCase("aliased"), Token.Aliased), (NoCase("exit"), Token.Exit), (NoCase("or"), Token.Or), (NoCase("some"), Token.Some), (NoCase("all"), Token.All), (NoCase("others"), Token.Others), (NoCase("subtype"), Token.Subtype), (NoCase("and"), Token.And), (NoCase("for"), Token.For), (NoCase("out"), Token.Out), (NoCase("synchronized"), Token.Synchronized), (NoCase("array"), Token.Array), (NoCase("function"), Token.Function), (NoCase("overriding"), Token.Overriding), (NoCase("at"), Token.At), (NoCase("tagged"), Token.Tagged), (NoCase("generic"), Token.Generic), (NoCase("package"), Token.Package), (NoCase("task"), Token.Task), (NoCase("begin"), Token.Begin), (NoCase("goto"), Token.Goto), (NoCase("pragma"), Token.Pragma), (NoCase("terminate"), Token.Terminate), (NoCase("body"), Token.Body), (NoCase("private"), Token.Private), (NoCase("then"), Token.Then), (NoCase("if"), Token.If), (NoCase("procedure"), Token.Procedure), (NoCase("type"), Token.Type), (NoCase("case"), Token.Case), (NoCase("in"), Token.In), (NoCase("protected"), Token.Protected), (NoCase("constant"), Token.Constant), (NoCase("interface"), Token.Interface), (NoCase("is"), Token.Is), (NoCase("raise"), Token.Raise), (NoCase("use"), Token.Use), (NoCase("declare"), Token.Declare), (NoCase("range"), Token.Range), (NoCase("delay"), Token.Delay), (NoCase("until"), Token.Until), (NoCase("limited"), Token.Limited), (NoCase("record"), Token.Record), (NoCase("when"), Token.When), (NoCase("delta"), Token.Delta), (NoCase("loop"), Token.Loop), (NoCase("rem"), Token.Rem), (NoCase("while"), Token.While), (NoCase("digits"), Token.Digits), (NoCase("renames"), Token.Renames), (NoCase("with"), Token.With), (NoCase("do"), Token.Do), (NoCase("mod"), Token.Mod), (NoCase("requeue"), Token.Requeue), (NoCase("xor"), Token.Xor), (Literal("("), Token.ParOpen), (Literal(")"), Token.ParClose), (Literal(";"), Token.Semicolon), (Literal(":"), Token.Colon), (Literal(","), Token.Comma), (Literal(".."), Token.Doubledot), (Literal(":="), Token.Assign), (Literal("."), Token.Dot), (Literal("<>"), Token.Diamond), (Literal("<="), Token.Lte), (Literal(">="), Token.Gte), (Literal("=>"), Token.Arrow), (Literal("="), Token.Equal), (Literal("<"), Token.Lt), (Literal(">"), Token.Gt), (Literal("+"), Token.Plus), (Literal("-"), Token.Minus), (Literal("**"), Token.Power), (Literal("*"), Token.Mult), (Literal("&"), Token.Amp), (Literal("/="), Token.Notequal), (Literal("/"), Token.Divide), (Literal("'"), Token.Tick), (Literal("|"), Token.Pipe), (ada_lexer.patterns.integer_literal, Token.Integer), (ada_lexer.patterns.decimal_literal, Token.Decimal), (ada_lexer.patterns.based_integer_literal, Token.Integer), (ada_lexer.patterns.based_decimal_literal, Token.Decimal), (ada_lexer.patterns.identifier, Token.Identifier), (Pattern(r"<<{ws}({identifier})?{ws}>>"), Token.Label), (ada_lexer.patterns.p_string, Token.String), (ada_lexer.patterns.p_percent_string, Token.String), Case(Pattern("'.'"), Alt(prev_token_cond=(Token.Identifier, Token.All), send=Token.Tick, match_size=1), Alt(send=Token.Char, match_size=3)), )
lkql_lexer.add_rules( (Pattern(r"[ \t\n\r]"), Token.Whitespace), (Literal("."), Token.Dot), (Literal("?."), Token.QuestionDot), (Literal("?["), Token.QuestionBrack), (Literal("?"), Token.Question), (Literal(","), Token.Coma), (Literal(";"), Token.SemiCol), (Literal(":"), Token.Colon), (Literal("_"), Token.UnderScore), (Literal("="), Token.Eq), (Literal("=="), Token.EqEq), (Literal("!="), Token.Neq), (Literal("!!"), Token.ExclExcl), (Literal("<"), Token.Lt), (Literal("<="), Token.LEq), (Literal(">"), Token.Gt), (Literal(">="), Token.GEq), (Literal("and"), Token.And), (Literal("or"), Token.Or), (Literal("+"), Token.Plus), (Literal("-"), Token.Minus), (Literal("*"), Token.Mul), (Literal("/"), Token.Div), (Literal("&"), Token.Amp), (Literal("("), Token.LPar), (Literal(")"), Token.RPar), (Literal("{"), Token.LCurl), (Literal("}"), Token.RCurl), (Literal("["), Token.LBrack), (Literal("]"), Token.RBrack), (Literal("@"), Token.At), (Pattern(r"\|\"(.?)+"), Token.SubBlockLiteral), (Literal("|"), Token.Pipe), (Literal("<-"), Token.LArrow), (Literal("=>"), Token.BigRArrow), (Literal("<>"), Token.Box), (Literal("let"), Token.Let), (Literal("select"), Token.SelectTok), (Literal("from"), Token.FromTok), (Literal("when"), Token.When), (Literal("val"), Token.Val), (Literal("fun"), Token.Fun), (Literal("import"), Token.Import), (Literal("selector"), Token.Selector), (Literal("match"), Token.Match), (Literal("rec"), Token.Rec), (Literal("for"), Token.For), (Literal("skip"), Token.Skip), (Literal("is"), Token.Is), (Literal("in"), Token.In), (Literal("true"), Token.TrueLit), (Literal("false"), Token.FalseLit), (Literal("if"), Token.If), (Literal("else"), Token.Else), (Literal("then"), Token.Then), (Literal("not"), Token.Not), (Literal("null"), Token.Null), (Pattern("[0-9]+"), Token.Integer), (Pattern("[a-z][A-Za-z0-9_]*"), Token.Identifier), (Pattern("[A-Z][A-Za-z_]*(.list)?"), Token.KindName), (Pattern(r'"(\\.|[^"])*"'), Token.String), (Pattern(r"#(.?)+"), Token.Comment))
(Literal(">>"), Token.LabelEnd), (Literal("@"), Token.Target), # Literals (Pattern('{integer_literal}'), Token.Integer), (Pattern('{decimal_literal}'), Token.Decimal), (Pattern('{based_integer_literal}'), Token.Integer), (Pattern('{based_decimal_literal}'), Token.Decimal), (Pattern('{p_string}'), Token.String), (Pattern('{p_percent_string}'), Token.String), # Identifiers (Pattern('{identifier}'), Token.Identifier), (Pattern("'{bracket_char}'"), Token.Char), # Attribute vs character literal quirk: A character literal is match via # '.'. However, this sequence of characters can happen in other cases, like # a qualified expression with a char as parameter: A'Class'('b'). In those # cases, we need to send the tick token, rather than the char token. Case( Pattern("'.'"), Alt(prev_token_cond=(Token.Identifier, ), send=Token.Tick, match_size=1), Alt(send=Token.Char, match_size=3)), ] ada_lexer.add_rules(*rules) ada_lexer.add_spacing((Token.Alphanumericals, Token.Alphanumericals)) ada_lexer.add_newline_after(Token.Comment, Token.PrepLine)
kconfig_lexer.add_rules( (Pattern(r"[ \t\r\n]+"), Ignore()), (Pattern(r"#.*"), Ignore()), # Keywords (Literal("config"), Token.Config), (Literal("menuconfig"), Token.Menuconfig), (Literal("choice"), Token.Choice), (Literal("endchoice"), Token.Endchoice), (Literal("comment"), Token.Comment), (Literal("menu"), Token.Menu), (Literal("endmenu"), Token.Endmenu), (Literal("if"), Token.If), (Literal("endif"), Token.Endif), (Literal("source"), Token.Source), (Literal("mainmenu"), Token.Mainmenu), (Literal("depends"), Token.Depends), (Literal("on"), Token.On), (Literal("help"), Token.Help), (Literal("--help--"), Token.Help), (Literal("prompt"), Token.Prompt), (Literal("default"), Token.Default), (Literal("select"), Token.Select), (Literal("imply"), Token.Imply), (Literal("range"), Token.Range), (Literal("visible"), Token.Visible), (Literal("option"), Token.Option), # Options (Literal("defconfig_list"), Token.OptDefConfigList), (Literal("modules"), Token.OptModules), (Literal("env"), Token.OptEnv), (Literal("allnoconfig_y"), Token.OptAllNoConfY), # Types (Literal("tristate"), Token.Tristate), (Literal("bool"), Token.Bool), (Literal("def_tristate"), Token.DefTristate), (Literal("def_bool"), Token.DefBool), (Literal("int"), Token.Int), (Literal("hex"), Token.Hex), (Literal("string"), Token.StringType), (Literal("="), Token.Equal), (Literal("!="), Token.Different), (Literal("("), Token.LPar), (Literal(")"), Token.RPar), (Literal("!"), Token.Not), (Literal("&&"), Token.And), (Literal("||"), Token.Or), (Literal("y"), Token.Yes), (Literal("n"), Token.No), (Literal("m"), Token.Module), (Pattern(r"[a-zA-Z][a-zA-Z0-9_]*"), Token.Identifier), (Pattern(r"[0-9]+"), Token.Number), (Pattern(r"0x[0-9]+"), Token.HexNumber), (Pattern(r'\"(\"\"|(\[\"([0-9A-F][0-9A-F]){2,4}\"\])|[^\n\"])*\"'), Token.String), )