# Problem: DICT_KEY might be a different state, to accept either a bare word # foo, or an expression (X=a+2), which is allowed in shell. Python doesn't # allowed unquoted words, but we want to. # TODO: There are 4 shared groups here. I think you should test if that # structure should be preserved through re2c. Do a benchmark. # # If a group has no matches, then return Id.Unknown_Tok? And then you can # chain the groups in order. It might make sense to experiment with the order # too. _SIGNIFICANT_SPACE = R(r'[ \t\r]+', Id.WS_Space) _BACKSLASH = [ R(r'\\[^\n\0]', Id.Lit_EscapedChar), C('\\\n', Id.Ignored_LineCont), ] VAR_NAME_RE = r'[a-zA-Z_][a-zA-Z0-9_]*' # All Kind.VSub _VARS = [ # Unbraced variables R(r'\$' + VAR_NAME_RE, Id.VSub_DollarName), R(r'\$[0-9]', Id.VSub_Number), C(r'$!', Id.VSub_Bang), C(r'$@', Id.VSub_At), C(r'$#', Id.VSub_Pound), C(r'$$', Id.VSub_Dollar), C(r'$*', Id.VSub_Star), C(r'$-', Id.VSub_Hyphen),
import errno import os import re import sys from core.util import log from frontend.lexer import C, R C_DEF = [ R(r'#.*', 'Comment'), R(r'[ \t\n]+', 'Whitespace'), # This could be more space-insensitive. R(r'static.*PyMethodDef (.*)\[\] = ', 'BeginDef'), C(r'{', 'LBrace'), C(r'}', 'RBrace'), C(r',', 'Comma'), C(r';', 'Semi'), R(r'"([^"]*)"', 'Str'), C(r'FILE', 'FILE'), C(r'PyDoc_STR(', 'LDocStr'), C(r')', 'RDocStr'), R(r'[^,}\n]+', 'Opaque'), ] # NOTE: This is copied from osh/match.py because we don't have 're' there. def _CompileAll(pat_list): result = [] for is_regex, pat, token_id in pat_list:
# VS -- a single state here? Or switches into expression state, because } # is an operator # Problem: DICT_KEY might be a different state, to accept either a bare word # foo, or an expression (X=a+2), which is allowed in shell. Python doesn't # allowed unquoted words, but we want to. # TODO: There are 4 shared groups here. I think you should test if that # structure should be preserved through re2c. Do a benchmark. # # If a group has no matches, then return Id.Unknown_Tok? And then you can # chain the groups in order. It might make sense to experiment with the order # too. _BACKSLASH = [ R(r'\\[^\n\0]', Id.Lit_EscapedChar), C('\\\n', Id.Ignored_LineCont), ] VAR_NAME_RE = r'[a-zA-Z_][a-zA-Z0-9_]*' # All Kind.VSub _VARS = [ # Unbraced variables R(r'\$' + VAR_NAME_RE, Id.VSub_DollarName), R(r'\$[0-9]', Id.VSub_Number), C(r'$!', Id.VSub_Bang), C(r'$@', Id.VSub_At), C(r'$#', Id.VSub_Pound), C(r'$$', Id.VSub_Dollar), C(r'$*', Id.VSub_Star), C(r'$-', Id.VSub_Hyphen),
""" from _devbuild.gen.id_kind_asdl import Id, Kind from _devbuild.gen.types_asdl import lex_mode_e from core.meta import ID_SPEC from frontend.lexer import C, R # See unit tests in frontend/match_test.py. # We need the [^\0]* because the re2c translation assumes it's anchored like $. SHOULD_HIJACK_RE = r'#!.*sh[ \t\r\n][^\0]*' _SIGNIFICANT_SPACE = R(r'[ \t\r]+', Id.WS_Space) _BACKSLASH = [ R(r'\\[^\n\0]', Id.Lit_EscapedChar), C('\\\n', Id.Ignored_LineCont), ] VAR_NAME_RE = r'[a-zA-Z_][a-zA-Z0-9_]*' # All Kind.VSub _VARS = [ # Unbraced variables R(r'\$' + VAR_NAME_RE, Id.VSub_DollarName), R(r'\$[0-9]', Id.VSub_Number), C(r'$!', Id.VSub_Bang), C(r'$@', Id.VSub_At), C(r'$#', Id.VSub_Pound), C(r'$$', Id.VSub_Dollar), C(r'$*', Id.VSub_Star), C(r'$-', Id.VSub_Hyphen),