예제 #1
0
파일: test.py 프로젝트: yakobowski/langkit
def run(token_cls):
    print('== {} =='.format(token_cls.__name__))

    class FooNode(ASTNode):
        pass

    class Identifier(FooNode):
        token_node = True

    class Number(FooNode):
        token_node = True

    foo_lexer = Lexer(token_cls)
    foo_lexer.add_rules(
        (Pattern('[0-9]+'), token_cls.Number),
        (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), token_cls.Identifier),
    )

    emit_and_print_errors(lkt_file='foo.lkt',
                          lexer=foo_lexer,
                          generate_unparser=True)

    BaseToken.Number.name = None
    BaseToken.Identifier.name = None
    print('')
예제 #2
0
python_lexer.add_rules(
    (Pattern('(u|U)?(r|R)?'
             '({MLSTRING_SQ}|{MLSTRING_DBQ}'
             '|{STRING_SQ}|{STRING_DBQ})'), Token.String),
    (Pattern(r'[ \r\t]+'), Ignore()),
    (Pattern(r"#(.?)+"), Token.Comment),
    (Literal('>>='), Token.RshAssign),
    (Literal('is'), Token.Is),
    (Literal('=='), Token.Equals),
    (Literal('def'), Token.Def),
    (Literal('<='), Token.Lte),
    (Literal('raise'), Token.Raise),
    (Literal('%'), Token.Mod),
    (Literal('yield'), Token.Yield),
    (Literal('^='), Token.XorAssign),
    (Literal('as'), Token.As),
    (Literal('lambda'), Token.Lambda),
    (Literal('`'), Token.Backtick),
    (Literal('try'), Token.Try),
    (Literal('/'), Token.Divide),
    (Literal('~'), Token.Invert),
    (Literal('return'), Token.Return),
    (Literal('assert'), Token.Assert),
    (Literal('^'), Token.Xor),
    (Literal('break'), Token.Break),
    (Literal(']'), Token.Rbrack),
    (Literal('**='), Token.PowerAssign),
    (Literal('import'), Token.Import),
    (Literal('exec'), Token.Exec),
    (Literal(','), Token.Comma),
    (Literal('('), Token.LPar),
    (Literal('.'), Token.Dot),
    (Literal('>='), Token.Gte),
    (Literal('//='), Token.FloordivAssign),
    (Literal('*'), Token.Multiply),
    (Literal('/='), Token.DivAssign),
    (Literal('@'), Token.At),
    (Literal('='), Token.Assign),
    (Literal('//'), Token.Floordiv),
    (Literal('!='), Token.Notequal),
    (Literal('*='), Token.MultAssign),
    (Literal('%='), Token.ModAssign),
    (Literal('>'), Token.Gt),
    (Literal('**'), Token.Power),
    (Literal('&'), Token.Amp),
    (Literal('not'), Token.Not),
    (Literal(':'), Token.Colon),
    (Literal('<>'), Token.Diamond),
    (Literal('in'), Token.In),
    (Literal('{'), Token.LCurl),
    (Literal('class'), Token.Class),
    (Literal('|='), Token.OrAssign),
    (Literal('elif'), Token.Elif),
    (Literal('and'), Token.And),
    (Literal(';'), Token.Semicolon),
    (Literal('+='), Token.AddAsign),
    (Literal('print'), Token.Print),
    (Literal('<<'), Token.Lsh),
    (Literal('continue'), Token.Continue),
    (Literal('while'), Token.While),
    (Literal('except'), Token.Except),
    (Literal('if'), Token.If),
    (Literal('else'), Token.Else),
    (Literal('del'), Token.Del),
    (Literal('-='), Token.MinusAssign),
    (Literal('or'), Token.Or),
    (Literal('-'), Token.Minus),
    (Literal('['), Token.Lbrack),
    (Literal('&='), Token.AndAssign),
    (Literal(')'), Token.RPar),
    (Literal('global'), Token.Global),
    (Literal('for'), Token.For),
    (Literal('from'), Token.From),
    (Literal('>>'), Token.Rsh),
    (Literal('finally'), Token.Finally),
    (Literal('pass'), Token.Pass),
    (Literal('<<='), Token.LshAssign),
    (Literal('|'), Token.BinOr),
    (Literal('}'), Token.Rcurl),
    (Literal('with'), Token.With),
    (Literal('+'), Token.Plus),
    (Literal('<'), Token.Lt),
    (Pattern('[0-9]+'), Token.Number),
    (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier),
)
예제 #3
0
                              RBrace, Equal, Plus)
    Comments = TokenFamily(Comment)


foo_lexer = Lexer(Token)
foo_lexer.add_rules(
    (Pattern(r'[ \n\r\t]+'), Token.Whitespace),
    (Literal('def'), Token.Def),
    (Literal('var'), Token.Var),
    (Literal('error'), Token.Error),
    (Literal('example'), Token.Example),
    (Literal('null'), Token.Null),
    (Literal(','), Token.Comma),
    (Literal('.'), Token.Dot),
    (Literal(';'), Token.Semicolon),
    (Literal('('), Token.LPar),
    (Literal(')'), Token.RPar),
    (Literal('{'), Token.LBrace),
    (Literal('}'), Token.RBrace),
    (Literal('='), Token.Equal),
    (Literal('+'), Token.Plus),
    (Literal('-'), Token.Minus),
    (Literal('<'), Token.LessThan),
    (Pattern('[0-9]+'), Token.Number),
    (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier),
    (Pattern(r'\"[^\"]*\"'), Token.String),
    (Pattern('#(.?)+'), Token.Comment),
)
foo_lexer.add_spacing((Token.Alphanumericals, Token.Alphanumericals))
foo_lexer.add_newline_after(Token.Comment)
예제 #4
0
gpr_lexer.add_rules(
    (Pattern(r"[ \t\r\n]+"), Token.Whitespace),
    (Pattern(r"--(.?)+"), Token.Comment),
    (NoCaseLit("all"), Token.All),
    (NoCaseLit("abstract"), Token.Abstract),
    (NoCaseLit("at"), Token.At),
    (NoCaseLit("case"), Token.Case),
    (NoCaseLit("end"), Token.End),
    (NoCaseLit("for"), Token.For),
    (NoCaseLit("is"), Token.Is),
    (NoCaseLit("limited"), Token.Limited),
    (NoCaseLit("private"), Token.Private),
    (NoCaseLit("null"), Token.Null),
    (NoCaseLit("others"), Token.Others),
    (NoCaseLit("package"), Token.Package),
    (NoCaseLit("renames"), Token.Renames),
    (NoCaseLit("type"), Token.Type),
    (NoCaseLit("use"), Token.Use),
    (NoCaseLit("pragma"), Token.Pragma),
    (NoCaseLit("when"), Token.When),
    (NoCaseLit("with"), Token.With),
    (NoCaseLit("extends"), Token.Extends),
    (Literal("("), Token.ParOpen),
    (Literal(")"), Token.ParClose),
    (Literal(";"), Token.Semicolon),
    (Literal(":"), Token.Colon),
    (Literal(","), Token.Comma),
    (Literal("."), Token.Dot),
    (Literal("&"), Token.Amp),
    (Literal("'"), Token.Tick),
    (Literal("|"), Token.Pipe),
    (Literal(":="), Token.Assign),
    (Literal("=>"), Token.Arrow),
    (Pattern("{integer}"), Token.Number),
    (Pattern(r"[_a-zA-Z][_a-zA-Z0-9]*"), Token.Identifier),
    (Pattern("{p_string}"), Token.String),
)
예제 #5
0
파일: test.py 프로젝트: shintakezou/langkit
from langkit.dsl import ASTNode
from langkit.lexer import Ignore, Lexer, LexerToken, Pattern, WithText
from langkit.parsers import Grammar

from utils import emit_and_print_errors


class BaseToken(LexerToken):
    Example = WithText()
    Whitespace = Ignore()


class FooNode(ASTNode):
    pass


class Example(FooNode):
    token_node = True


foo_lexer = Lexer(BaseToken)
foo_lexer.add_rules((Pattern('[ \t]+'), BaseToken.Whitespace),
                    (Pattern('example'), BaseToken.Example))

g = Grammar('main_rule')
g.add_rules(main_rule=Example(BaseToken.Example))
emit_and_print_errors(g, foo_lexer, generate_unparser=True)

print('Done')
예제 #6
0
파일: lexer.py 프로젝트: nyulacska/langkit
lkt_lexer.add_rules(
    # Whitespace & EOF
    (Pattern(r"[ \t\r\n\f]+"), Ignore()),

    # Operators
    (Literal('!'), Token.ExclMark),
    (Literal(':'), Token.Colon),
    (Literal('?'), Token.IntMark),
    (Literal('/'), Token.Div),
    (Literal('*'), Token.Times),
    (Literal('+'), Token.Plus),
    (Literal('|'), Token.Pipe),
    (Literal('<-'), Token.LeftArrow),
    (Literal('.'), Token.Dot),
    (Literal('('), Token.LPar),
    (Literal(')'), Token.RPar),
    (Literal('['), Token.LBrack),
    (Literal(']'), Token.RBrack),
    (Literal('{'), Token.LBrace),
    (Literal('}'), Token.RBrace),
    (Literal('|>'), Token.Comb),
    (Literal(','), Token.Comma),
    (Literal('@'), Token.At),
    (Literal('list+'), Token.ListPlus),
    (Literal('list*'), Token.ListStar),
    (Literal('='), Token.Equal),

    # Keywords
    (Literal('grammar'), Token.GrammarKw),
    (Literal('or'), Token.OrKw),
    (Literal('class'), Token.ClassKw),
    (Literal('fun'), Token.FunKw),
    (Literal('public'), Token.PublicKw),
    (Literal('private'), Token.PrivateKw),
    (Literal('null'), Token.NullKw),

    # Identifiers
    (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier),

    # Strings
    (Pattern('{STRING_SQ}|{STRING_DBQ}'), Token.String),
)
예제 #7
0
    LPar = WithText()
    RPar = WithText()
    LBrace = WithText()
    RBrace = WithText()
    Equal = WithText()
    Plus = WithText()

    Number = WithText()
    Identifier = WithSymbol()


foo_lexer = Lexer(Token)
foo_lexer.add_rules(
    (Pattern(r'[ \n\r\t]+'), Ignore()),
    (Eof(), Token.Termination),
    (Literal('def'), Token.Def),
    (Literal('error'), Token.Error),
    (Literal('example'), Token.Example),
    (Literal('null'), Token.Null),
    (Literal(','), Token.Comma),
    (Literal('.'), Token.Dot),
    (Literal('('), Token.LPar),
    (Literal(')'), Token.RPar),
    (Literal('{'), Token.LBrace),
    (Literal('}'), Token.RBrace),
    (Literal('='), Token.Equal),
    (Literal('+'), Token.Plus),
    (Pattern('[0-9]+'), Token.Number),
    (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier),
)
예제 #8
0
def create_lexer(ctx, lkt_units):
    """
    Create and populate a lexer from a Lktlang unit.

    :param list[liblktlang.AnalysisUnit] lkt_units: Non-empty list of analysis
        units where to look for the grammar.
    :rtype: langkit.lexer.Lexer
    """
    import liblktlang

    # Look for the LexerDecl node in top-level lists
    full_lexer = find_toplevel_decl(ctx, lkt_units, liblktlang.LexerDecl,
                                    'lexer')
    with ctx.lkt_context(full_lexer):
        lexer_annot = parse_annotations(ctx, lexer_annotations, full_lexer)

    patterns = {}
    """
    Mapping from pattern names to the corresponding regular expression.

    :type: dict[names.Name, str]
    """

    token_family_sets = {}
    """
    Mapping from token family names to the corresponding sets of tokens that
    belong to this family.

    :type: dict[names.Name, Token]
    """

    token_families = {}
    """
    Mapping from token family names to the corresponding token families.  We
    build this late, once we know all tokens and all families.

    :type: dict[names.Name, TokenFamily]
    """

    tokens = {}
    """
    Mapping from token names to the corresponding tokens.

    :type: dict[names.Name, Token]
    """

    rules = []
    pre_rules = []
    """
    Lists of regular and pre lexing rules for this lexer.

    :type: list[(langkit.lexer.Matcher, langkit.lexer.Action)]
    """

    newline_after = []
    """
    List of tokens after which we must introduce a newline during unparsing.

    :type: list[Token]
    """
    def ignore_constructor(start_ignore_layout, end_ignore_layout):
        """
        Adapter to build a Ignore instance with the same API as WithText
        constructors.
        """
        del start_ignore_layout, end_ignore_layout
        return Ignore()

    def process_family(f):
        """
        Process a LexerFamilyDecl node. Register the token family and process
        the rules it contains.

        :type f: liblktlang.LexerFamilyDecl
        """
        with ctx.lkt_context(f):
            # Create the token family, if needed
            name = names.Name.from_lower(text_as_str(f.f_syn_name))
            token_set = token_family_sets.setdefault(name, set())

            for r in f.f_rules:
                check_source_language(
                    isinstance(r.f_decl, liblktlang.GrammarRuleDecl),
                    'Only lexer rules allowed in family blocks')
                process_token_rule(r, token_set)

    def process_token_rule(r, token_set=None):
        """
        Process the full declaration of a GrammarRuleDecl node: create the
        token it declares and lower the optional associated lexing rule.

        :param liblktlang.FullDecl r: Full declaration for the GrammarRuleDecl
            to process.
        :param None|set[TokenAction] token_set: If this declaration appears in
            the context of a token family, this adds the new token to this set.
            Must be left to None otherwise.
        """
        with ctx.lkt_context(r):
            rule_annot = parse_annotations(ctx, token_annotations, r)

            # Gather token action info from the annotations. If absent,
            # fallback to WithText.
            token_cons = None
            start_ignore_layout = False
            end_ignore_layout = False
            if 'ignore' in rule_annot:
                token_cons = ignore_constructor
            for name in ('text', 'trivia', 'symbol'):
                try:
                    start_ignore_layout, end_ignore_layout = rule_annot[name]
                except KeyError:
                    continue

                check_source_language(token_cons is None,
                                      'At most one token action allowed')
                token_cons = token_cls_map[name]
            is_pre = rule_annot.get('pre_rule', False)
            if token_cons is None:
                token_cons = WithText

            # Create the token and register it where needed: the global token
            # mapping, its token family (if any) and the "newline_after" group
            # if the corresponding annotation is present.
            token_lower_name = text_as_str(r.f_decl.f_syn_name)
            token_name = names.Name.from_lower(token_lower_name)

            check_source_language(
                token_lower_name not in ('termination', 'lexing_failure'),
                '{} is a reserved token name'.format(token_lower_name))
            check_source_language(token_name not in tokens,
                                  'Duplicate token name')

            token = token_cons(start_ignore_layout, end_ignore_layout)
            tokens[token_name] = token
            if token_set is not None:
                token_set.add(token)
            if 'newline_after' in rule_annot:
                newline_after.append(token)

            # Lower the lexing rule, if present
            matcher_expr = r.f_decl.f_expr
            if matcher_expr is not None:
                rule = (lower_matcher(matcher_expr), token)
                if is_pre:
                    pre_rules.append(rule)
                else:
                    rules.append(rule)

    def process_pattern(full_decl):
        """
        Process a pattern declaration.

        :param liblktlang.FullDecl r: Full declaration for the ValDecl to
            process.
        """
        parse_annotations(ctx, [], full_decl)
        decl = full_decl.f_decl
        lower_name = text_as_str(decl.f_syn_name)
        name = names.Name.from_lower(lower_name)

        with ctx.lkt_context(decl):
            check_source_language(name not in patterns,
                                  'Duplicate pattern name')
            check_source_language(
                decl.f_decl_type is None,
                'Patterns must have automatic types in'
                ' lexers')
            check_source_language(
                isinstance(decl.f_val, liblktlang.StringLit)
                and decl.f_val.p_is_regexp_literal,
                'Pattern string literal expected')
            # TODO: use StringLit.p_denoted_value when properly implemented
            patterns[name] = pattern_as_str(decl.f_val)

    def lower_matcher(expr):
        """
        Lower a token matcher to our internals.

        :type expr: liblktlang.GrammarExpr
        :rtype: langkit.lexer.Matcher
        """
        with ctx.lkt_context(expr):
            if isinstance(expr, liblktlang.TokenLit):
                return Literal(json.loads(text_as_str(expr)))
            elif isinstance(expr, liblktlang.TokenNoCaseLit):
                return NoCaseLit(json.loads(text_as_str(expr)))
            elif isinstance(expr, liblktlang.TokenPatternLit):
                return Pattern(pattern_as_str(expr))
            else:
                check_source_language(False, 'Invalid lexing expression')

    def lower_token_ref(ref):
        """
        Return the Token that `ref` refers to.

        :type ref: liblktlang.RefId
        :rtype: Token
        """
        with ctx.lkt_context(ref):
            token_name = names.Name.from_lower(text_as_str(ref))
            check_source_language(token_name in tokens,
                                  'Unknown token: {}'.format(token_name.lower))
            return tokens[token_name]

    def lower_family_ref(ref):
        """
        Return the TokenFamily that `ref` refers to.

        :type ref: liblktlang.RefId
        :rtype: TokenFamily
        """
        with ctx.lkt_context(ref):
            name_lower = text_as_str(ref)
            name = names.Name.from_lower(name_lower)
            check_source_language(
                name in token_families,
                'Unknown token family: {}'.format(name_lower))
            return token_families[name]

    def lower_case_alt(alt):
        """
        Lower the alternative of a case lexing rule.

        :type alt: liblktlang.BaseLexerCaseRuleAlt
        :rtype: Alt
        """
        prev_token_cond = None
        if isinstance(alt, liblktlang.LexerCaseRuleCondAlt):
            prev_token_cond = [
                lower_token_ref(ref) for ref in alt.f_cond_exprs
            ]
        return Alt(prev_token_cond=prev_token_cond,
                   send=lower_token_ref(alt.f_send.f_sent),
                   match_size=int(alt.f_send.f_match_size.text))

    # Go through all rules to register tokens, their token families and lexing
    # rules.
    for full_decl in full_lexer.f_decl.f_rules:
        with ctx.lkt_context(full_decl):
            if isinstance(full_decl, liblktlang.LexerFamilyDecl):
                # This is a family block: go through all declarations inside it
                process_family(full_decl)

            elif isinstance(full_decl, liblktlang.FullDecl):
                # There can be various types of declarations in lexers...
                decl = full_decl.f_decl

                if isinstance(decl, liblktlang.GrammarRuleDecl):
                    # Here, we have a token declaration, potentially associated
                    # with a lexing rule.
                    process_token_rule(full_decl)

                elif isinstance(decl, liblktlang.ValDecl):
                    # This is the declaration of a pattern
                    process_pattern(full_decl)

                else:
                    check_source_language(False,
                                          'Unexpected declaration in lexer')

            elif isinstance(full_decl, liblktlang.LexerCaseRule):
                syn_alts = list(full_decl.f_alts)

                # This is a rule for conditional lexing: lower its matcher and
                # its alternative rules.
                matcher = lower_matcher(full_decl.f_expr)
                check_source_language(
                    len(syn_alts) == 2 and isinstance(
                        syn_alts[0], liblktlang.LexerCaseRuleCondAlt)
                    and isinstance(syn_alts[1],
                                   liblktlang.LexerCaseRuleDefaultAlt),
                    'Invalid case rule topology')
                rules.append(
                    Case(matcher, lower_case_alt(syn_alts[0]),
                         lower_case_alt(syn_alts[1])))

            else:
                # The grammar should make the following dead code
                assert False, 'Invalid lexer rule: {}'.format(full_decl)

    # Create the LexerToken subclass to define all tokens and token families
    items = {}
    for name, token in tokens.items():
        items[name.camel] = token
    for name, token_set in token_family_sets.items():
        tf = TokenFamily(*list(token_set))
        token_families[name] = tf
        items[name.camel] = tf
    token_class = type('Token', (LexerToken, ), items)

    # Create the Lexer instance and register all patterns and lexing rules
    result = Lexer(token_class, 'track_indent' in lexer_annot, pre_rules)
    for name, regexp in patterns.items():
        result.add_patterns((name.lower, regexp))
    result.add_rules(*rules)

    # Register spacing/newline rules
    for tf1, tf2 in lexer_annot.get('spacing', []):
        result.add_spacing((lower_family_ref(tf1), lower_family_ref(tf2)))
    result.add_newline_after(*newline_after)

    return result
예제 #9
0
    Colon = WithText()

    # Comment
    Comment = WithTrivia()

    # Numeric
    Numeral = WithText()

    # String
    StringLiteral = WithText()


rflx_lexer = Lexer(Token)

rflx_lexer.add_rules(
    (Pattern(r"[ \t\r\n]+"), Ignore()),
    (Pattern(r"--.*"), Token.Comment),
)

# Hack to support keywords that equal attributes
# Inspired by Libadalang grammar (ada/language/lexer.py)
rflx_lexer.add_rules(*[
    Case(
        Literal(text),
        Alt(
            prev_token_cond=(Token.Tick, ),
            send=token,
            match_size=len(text),
        ),
        Alt(send=Token.UnqualifiedIdentifier, match_size=len(text)),
    ) for text, token in [
        ("First", Token.First),
예제 #10
0
    LBrace = WithText()
    RBrace = WithText()
    Plus = WithText()

    Number = WithText()
    Identifier = WithSymbol()


foo_lexer = Lexer(Token, track_indent=True)
foo_lexer.add_rules(
    (Pattern(r'[ \r\t]+'), Ignore()),
    (Eof(), Token.Termination),
    (Literal("example"), Token.Example),
    (Literal("null"), Token.Null),
    (Literal(','), Token.Comma),
    (Literal('('), Token.LPar),
    (Literal(')'), Token.RPar),
    (Literal('{'), Token.LBrace),
    (Literal('}'), Token.RBrace),
    (Literal('+'), Token.Plus),
    (Pattern('[0-9]+'), Token.Number),
    (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier),
)
L = foo_lexer

Diagnostics.set_lang_source_dir(os.path.abspath(__file__))


@root_grammar_class()
class FooNode(ASTNode):
    pass
예제 #11
0
파일: lexer.py 프로젝트: pmderodat/langkit
lkt_lexer.add_rules(
    # Whitespace & EOF
    (Pattern(r"[ \t\r\n\f]+"), Ignore()),

    # Operators
    (Literal('!'), Token.ExclMark),
    (Literal(';'), Token.Semicolon),
    (Literal(':'), Token.Colon),
    (Literal('?'), Token.IntMark),
    (Literal('/'), Token.Div),
    (Literal('*'), Token.Times),
    (Literal('+'), Token.Plus),
    (Literal('&'), Token.Amp),
    (Literal('-'), Token.Minus),
    (Literal('|'), Token.Pipe),
    (Literal('<-'), Token.LeftArrow),
    (Literal('->'), Token.RightArrow),
    (Literal('.'), Token.Dot),
    (Literal('('), Token.LPar),
    (Literal(')'), Token.RPar),
    (Literal('['), Token.LBrack),
    (Literal(']'), Token.RBrack),
    (Literal('{'), Token.LBrace),
    (Literal('}'), Token.RBrace),
    (Literal('|>'), Token.Comb),
    (Literal(','), Token.Comma),
    (Literal('@'), Token.At),
    (Literal('=>'), Token.FatRightArrow),
    (Literal('='), Token.Equal),
    (Literal('<='), Token.LTE),
    (Literal('>='), Token.GTE),
    (Literal('<'), Token.LT),
    (Literal('>'), Token.GT),
    (Literal('%'), Token.Percent),

    # Keywords
    (Literal('lexer'), Token.LexerKw),
    (Literal('grammar'), Token.GrammarKw),
    (Literal('class'), Token.ClassKw),
    (Literal('struct'), Token.StructKw),
    (Literal('fun'), Token.FunKw),
    (Literal('public'), Token.PublicKw),
    (Literal('private'), Token.PrivateKw),
    (Literal('null'), Token.NullKw),
    (Literal('is'), Token.IsKw),
    (Literal('val'), Token.ValKw),
    (Literal('if'), Token.IfKw),
    (Literal('elif'), Token.ElifKw),
    (Literal('else'), Token.ElseKw),
    (Literal('then'), Token.ThenKw),
    (Literal('and'), Token.AndKw),
    (Literal('or'), Token.OrKw),
    (Literal('not'), Token.NotKw),
    (Literal('bind'), Token.BindKw),
    (Literal('match'), Token.MatchKw),
    (Literal('case'), Token.CaseKw),
    (Literal('raise'), Token.RaiseKw),
    (Literal('try'), Token.TryKw),
    (Literal('enum'), Token.EnumKw),
    (Literal('generic'), Token.GenericKw),
    (Literal('discard'), Token.DiscardKw),
    (Literal('import'), Token.ImportKw),
    (Literal('implements'), Token.ImplementsKw),
    (Literal('trait'), Token.TraitKw),

    # Identifiers
    (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier),

    # Numbers
    (Pattern('[0-9]+'), Token.Number),
    (Pattern('[0-9]+b'), Token.BigNumber),

    # Strings & chars
    (Pattern('{STRING_LIT}'), Token.String),
    (Pattern('[a-zA-Z]{STRING_LIT}'), Token.PString),
    (Pattern('{CHAR_LIT}'), Token.Char),

    # Comments
    (Pattern(r"#(.?)+"), Token.Comment),
    (Pattern(r"##(.?)+"), Token.DocComment),
)
예제 #12
0
파일: test.py 프로젝트: shintakezou/langkit
from langkit.lexer import (Ignore, Lexer, LexerToken, Literal, Pattern,
                           WithText, WithTrivia)
from langkit.parsers import Grammar, List

from utils import build_and_run


class Token(LexerToken):
    Example = WithText()
    Comment = WithTrivia()


foo_lexer = Lexer(Token)
foo_lexer.add_rules(
    (Pattern(r'[ \n\r\t]+'), Ignore()),
    (Literal('example'), Token.Example),
    (Pattern('#(.?)+'), Token.Comment),
)


class FooNode(ASTNode):
    pass


class Example(FooNode):
    pass


g = Grammar('main_rule')
g.add_rules(main_rule=List(Example('example')))
build_and_run(g, lexer=foo_lexer, py_script='main.py', ada_main='main.adb')
예제 #13
0
파일: lexer.py 프로젝트: Roldak/Dependz
class Token(LexerToken):
    Ident = WithSymbol()

    Colon = WithText()
    Arrow = WithText()
    Equal = WithText()
    ParOpen = WithText()
    ParClose = WithText()
    Backslash = WithText()
    Dot = WithText()

    Comment = WithTrivia()
    Whitespace = WithTrivia()
    Newlines = WithText()


dependz_lexer = Lexer(Token)

dependz_lexer.add_rules(
    # Blanks and trivia
    (Pattern(r"[ \r\t]+"), Token.Whitespace),
    (Pattern(r"[\n]+"), Token.Newlines),
    (Pattern(r"#(.?)+"), Token.Comment),
    (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Ident),
    (Literal(':'), Token.Colon),
    (Literal('->'), Token.Arrow),
    (Literal('='), Token.Equal),
    (Literal('('), Token.ParOpen),
    (Literal(')'), Token.ParClose),
    (Literal('\\'), Token.Backslash),
    (Literal('.'), Token.Dot))
예제 #14
0
파일: test.py 프로젝트: nyulacska/langkit
class Token(LexerToken):
    Def = WithText()
    LPar = WithText()
    RPar = WithText()
    Comma = WithText()
    Identifier = WithSymbol()
    Comment = WithTrivia()


foo_lexer = Lexer(Token, track_indent=True)
foo_lexer.add_rules(
    (Pattern(r'[ \r\t]+'), Ignore()),
    (Literal('def'), Token.Def),
    (Literal(','), Token.Comma),
    (Literal('('), Token.LPar),
    (Literal(')'), Token.RPar),
    (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier),
    (Pattern('#.*'), Token.Comment),
)
L = foo_lexer


class FooNode(ASTNode):
    pass


class Def(FooNode):
    name = Field()
    stmts = Field()
예제 #15
0
파일: lexer.py 프로젝트: AdaCore/libadalang
ada_lexer.add_rules(
    (Pattern(r"[ \t\r\n]+"),                    Ignore()),
    (Pattern(r"--(.?)+"),                       Token.Comment),
    (NoCase("abort"),                           Token.Abort),
    (NoCase("else"),                            Token.Else),
    (NoCase("new"),                             Token.New),
    (NoCase("return"),                          Token.Return),
    (NoCase("abs"),                             Token.Abs),
    (NoCase("elsif"),                           Token.Elsif),
    (NoCase("not"),                             Token.Not),
    (NoCase("reverse"),                         Token.Reverse),
    (NoCase("abstract"),                        Token.Abstract),
    (NoCase("end"),                             Token.End),
    (NoCase("null"),                            Token.Null),
    (NoCase("accept"),                          Token.Accept),
    (NoCase("entry"),                           Token.Entry),
    (NoCase("select"),                          Token.Select),
    (NoCase("access"),                          Token.Access),
    (NoCase("exception"),                       Token.Exception),
    (NoCase("of"),                              Token.Of),
    (NoCase("separate"),                        Token.Separate),
    (NoCase("aliased"),                         Token.Aliased),
    (NoCase("exit"),                            Token.Exit),
    (NoCase("or"),                              Token.Or),
    (NoCase("some"),                            Token.Some),
    (NoCase("all"),                             Token.All),
    (NoCase("others"),                          Token.Others),
    (NoCase("subtype"),                         Token.Subtype),
    (NoCase("and"),                             Token.And),
    (NoCase("for"),                             Token.For),
    (NoCase("out"),                             Token.Out),
    (NoCase("synchronized"),                    Token.Synchronized),
    (NoCase("array"),                           Token.Array),
    (NoCase("function"),                        Token.Function),
    (NoCase("overriding"),                      Token.Overriding),
    (NoCase("at"),                              Token.At),
    (NoCase("tagged"),                          Token.Tagged),
    (NoCase("generic"),                         Token.Generic),
    (NoCase("package"),                         Token.Package),
    (NoCase("task"),                            Token.Task),
    (NoCase("begin"),                           Token.Begin),
    (NoCase("goto"),                            Token.Goto),
    (NoCase("pragma"),                          Token.Pragma),
    (NoCase("terminate"),                       Token.Terminate),
    (NoCase("body"),                            Token.Body),
    (NoCase("private"),                         Token.Private),
    (NoCase("then"),                            Token.Then),
    (NoCase("if"),                              Token.If),
    (NoCase("procedure"),                       Token.Procedure),
    (NoCase("type"),                            Token.Type),
    (NoCase("case"),                            Token.Case),
    (NoCase("in"),                              Token.In),
    (NoCase("protected"),                       Token.Protected),
    (NoCase("constant"),                        Token.Constant),
    (NoCase("interface"),                       Token.Interface),
    (NoCase("is"),                              Token.Is),
    (NoCase("raise"),                           Token.Raise),
    (NoCase("use"),                             Token.Use),
    (NoCase("declare"),                         Token.Declare),
    (NoCase("range"),                           Token.Range),
    (NoCase("delay"),                           Token.Delay),
    (NoCase("until"),                           Token.Until),
    (NoCase("limited"),                         Token.Limited),
    (NoCase("record"),                          Token.Record),
    (NoCase("when"),                            Token.When),
    (NoCase("delta"),                           Token.Delta),
    (NoCase("loop"),                            Token.Loop),
    (NoCase("rem"),                             Token.Rem),
    (NoCase("while"),                           Token.While),
    (NoCase("digits"),                          Token.Digits),
    (NoCase("renames"),                         Token.Renames),
    (NoCase("with"),                            Token.With),
    (NoCase("do"),                              Token.Do),
    (NoCase("mod"),                             Token.Mod),
    (NoCase("requeue"),                         Token.Requeue),
    (NoCase("xor"),                             Token.Xor),

    (Literal("("),                              Token.ParOpen),
    (Literal(")"),                              Token.ParClose),
    (Literal(";"),                              Token.Semicolon),
    (Literal(":"),                              Token.Colon),
    (Literal(","),                              Token.Comma),
    (Literal(".."),                             Token.Doubledot),
    (Literal(":="),                             Token.Assign),
    (Literal("."),                              Token.Dot),
    (Literal("<>"),                             Token.Diamond),
    (Literal("<="),                             Token.Lte),
    (Literal(">="),                             Token.Gte),
    (Literal("=>"),                             Token.Arrow),
    (Literal("="),                              Token.Equal),
    (Literal("<"),                              Token.Lt),
    (Literal(">"),                              Token.Gt),
    (Literal("+"),                              Token.Plus),
    (Literal("-"),                              Token.Minus),
    (Literal("**"),                             Token.Power),
    (Literal("*"),                              Token.Mult),
    (Literal("&"),                              Token.Amp),
    (Literal("/="),                             Token.Notequal),
    (Literal("/"),                              Token.Divide),
    (Literal("'"),                              Token.Tick),
    (Literal("|"),                              Token.Pipe),

    (ada_lexer.patterns.integer_literal,        Token.Integer),
    (ada_lexer.patterns.decimal_literal,        Token.Decimal),
    (ada_lexer.patterns.based_integer_literal,  Token.Integer),
    (ada_lexer.patterns.based_decimal_literal,  Token.Decimal),

    (ada_lexer.patterns.identifier,             Token.Identifier),

    (Pattern(r"<<{ws}({identifier})?{ws}>>"),   Token.Label),

    (ada_lexer.patterns.p_string,               Token.String),
    (ada_lexer.patterns.p_percent_string,       Token.String),

    Case(Pattern("'.'"),
         Alt(prev_token_cond=(Token.Identifier, Token.All),
             send=Token.Tick,
             match_size=1),
         Alt(send=Token.Char, match_size=3)),
)
예제 #16
0
lkql_lexer.add_rules(
    (Pattern(r"[ \t\n\r]"), Token.Whitespace), (Literal("."), Token.Dot),
    (Literal("?."), Token.QuestionDot), (Literal("?["), Token.QuestionBrack),
    (Literal("?"), Token.Question), (Literal(","), Token.Coma),
    (Literal(";"), Token.SemiCol), (Literal(":"), Token.Colon),
    (Literal("_"), Token.UnderScore), (Literal("="), Token.Eq),
    (Literal("=="), Token.EqEq), (Literal("!="), Token.Neq),
    (Literal("!!"), Token.ExclExcl), (Literal("<"), Token.Lt),
    (Literal("<="), Token.LEq), (Literal(">"), Token.Gt),
    (Literal(">="), Token.GEq), (Literal("and"), Token.And),
    (Literal("or"), Token.Or), (Literal("+"), Token.Plus),
    (Literal("-"), Token.Minus), (Literal("*"), Token.Mul),
    (Literal("/"), Token.Div), (Literal("&"), Token.Amp),
    (Literal("("), Token.LPar), (Literal(")"), Token.RPar),
    (Literal("{"), Token.LCurl), (Literal("}"), Token.RCurl),
    (Literal("["), Token.LBrack), (Literal("]"), Token.RBrack),
    (Literal("@"), Token.At), (Pattern(r"\|\"(.?)+"), Token.SubBlockLiteral),
    (Literal("|"), Token.Pipe), (Literal("<-"), Token.LArrow),
    (Literal("=>"), Token.BigRArrow), (Literal("<>"), Token.Box),
    (Literal("let"), Token.Let), (Literal("select"), Token.SelectTok),
    (Literal("from"), Token.FromTok), (Literal("when"), Token.When),
    (Literal("val"), Token.Val), (Literal("fun"), Token.Fun),
    (Literal("import"), Token.Import), (Literal("selector"), Token.Selector),
    (Literal("match"), Token.Match), (Literal("rec"), Token.Rec),
    (Literal("for"), Token.For), (Literal("skip"), Token.Skip),
    (Literal("is"), Token.Is), (Literal("in"), Token.In),
    (Literal("true"), Token.TrueLit), (Literal("false"), Token.FalseLit),
    (Literal("if"), Token.If), (Literal("else"), Token.Else),
    (Literal("then"), Token.Then), (Literal("not"), Token.Not),
    (Literal("null"), Token.Null), (Pattern("[0-9]+"), Token.Integer),
    (Pattern("[a-z][A-Za-z0-9_]*"),
     Token.Identifier), (Pattern("[A-Z][A-Za-z_]*(.list)?"), Token.KindName),
    (Pattern(r'"(\\.|[^"])*"'), Token.String), (Pattern(r"#(.?)+"),
                                                Token.Comment))
예제 #17
0
    (Literal(">>"), Token.LabelEnd),
    (Literal("@"), Token.Target),

    # Literals
    (Pattern('{integer_literal}'), Token.Integer),
    (Pattern('{decimal_literal}'), Token.Decimal),
    (Pattern('{based_integer_literal}'), Token.Integer),
    (Pattern('{based_decimal_literal}'), Token.Decimal),
    (Pattern('{p_string}'), Token.String),
    (Pattern('{p_percent_string}'), Token.String),

    # Identifiers
    (Pattern('{identifier}'), Token.Identifier),
    (Pattern("'{bracket_char}'"), Token.Char),

    # Attribute vs character literal quirk: A character literal is match via
    # '.'. However, this sequence of characters can happen in other cases, like
    # a qualified expression with a char as parameter: A'Class'('b'). In those
    # cases, we need to send the tick token, rather than the char token.
    Case(
        Pattern("'.'"),
        Alt(prev_token_cond=(Token.Identifier, ),
            send=Token.Tick,
            match_size=1), Alt(send=Token.Char, match_size=3)),
]

ada_lexer.add_rules(*rules)

ada_lexer.add_spacing((Token.Alphanumericals, Token.Alphanumericals))
ada_lexer.add_newline_after(Token.Comment, Token.PrepLine)
예제 #18
0
kconfig_lexer.add_rules(
    (Pattern(r"[ \t\r\n]+"), Ignore()),
    (Pattern(r"#.*"), Ignore()),

    # Keywords
    (Literal("config"), Token.Config),
    (Literal("menuconfig"), Token.Menuconfig),
    (Literal("choice"), Token.Choice),
    (Literal("endchoice"), Token.Endchoice),
    (Literal("comment"), Token.Comment),
    (Literal("menu"), Token.Menu),
    (Literal("endmenu"), Token.Endmenu),
    (Literal("if"), Token.If),
    (Literal("endif"), Token.Endif),
    (Literal("source"), Token.Source),
    (Literal("mainmenu"), Token.Mainmenu),
    (Literal("depends"), Token.Depends),
    (Literal("on"), Token.On),
    (Literal("help"), Token.Help),
    (Literal("--help--"), Token.Help),
    (Literal("prompt"), Token.Prompt),
    (Literal("default"), Token.Default),
    (Literal("select"), Token.Select),
    (Literal("imply"), Token.Imply),
    (Literal("range"), Token.Range),
    (Literal("visible"), Token.Visible),
    (Literal("option"), Token.Option),

    # Options
    (Literal("defconfig_list"), Token.OptDefConfigList),
    (Literal("modules"), Token.OptModules),
    (Literal("env"), Token.OptEnv),
    (Literal("allnoconfig_y"), Token.OptAllNoConfY),

    # Types
    (Literal("tristate"), Token.Tristate),
    (Literal("bool"), Token.Bool),
    (Literal("def_tristate"), Token.DefTristate),
    (Literal("def_bool"), Token.DefBool),
    (Literal("int"), Token.Int),
    (Literal("hex"), Token.Hex),
    (Literal("string"), Token.StringType),
    (Literal("="), Token.Equal),
    (Literal("!="), Token.Different),
    (Literal("("), Token.LPar),
    (Literal(")"), Token.RPar),
    (Literal("!"), Token.Not),
    (Literal("&&"), Token.And),
    (Literal("||"), Token.Or),
    (Literal("y"), Token.Yes),
    (Literal("n"), Token.No),
    (Literal("m"), Token.Module),
    (Pattern(r"[a-zA-Z][a-zA-Z0-9_]*"), Token.Identifier),
    (Pattern(r"[0-9]+"), Token.Number),
    (Pattern(r"0x[0-9]+"), Token.HexNumber),
    (Pattern(r'\"(\"\"|(\[\"([0-9A-F][0-9A-F]){2,4}\"\])|[^\n\"])*\"'),
     Token.String),
)