Ejemplo n.º 1
0
Archivo: lex.py Proyecto: mrshu/oil
# Problem: DICT_KEY might be a different state, to accept either a bare word
# foo, or an expression (X=a+2), which is allowed in shell.  Python doesn't
# allowed unquoted words, but we want to.

# TODO: There are 4 shared groups here.  I think you should test if that
# structure should be preserved through re2c.  Do a benchmark.
#
# If a group has no matches, then return Id.Unknown_Tok?  And then you can
# chain the groups in order.  It might make sense to experiment with the order
# too.

_SIGNIFICANT_SPACE = R(r'[ \t\r]+', Id.WS_Space)

_BACKSLASH = [
    R(r'\\[^\n\0]', Id.Lit_EscapedChar),
    C('\\\n', Id.Ignored_LineCont),
]

VAR_NAME_RE = r'[a-zA-Z_][a-zA-Z0-9_]*'

# All Kind.VSub
_VARS = [
    # Unbraced variables
    R(r'\$' + VAR_NAME_RE, Id.VSub_DollarName),
    R(r'\$[0-9]', Id.VSub_Number),
    C(r'$!', Id.VSub_Bang),
    C(r'$@', Id.VSub_At),
    C(r'$#', Id.VSub_Pound),
    C(r'$$', Id.VSub_Dollar),
    C(r'$*', Id.VSub_Star),
    C(r'$-', Id.VSub_Hyphen),
Ejemplo n.º 2
0
import errno
import os
import re
import sys

from core.util import log
from frontend.lexer import C, R

C_DEF = [
    R(r'#.*', 'Comment'),
    R(r'[ \t\n]+', 'Whitespace'),

    # This could be more space-insensitive.
    R(r'static.*PyMethodDef (.*)\[\] = ', 'BeginDef'),
    C(r'{', 'LBrace'),
    C(r'}', 'RBrace'),
    C(r',', 'Comma'),
    C(r';', 'Semi'),
    R(r'"([^"]*)"', 'Str'),
    C(r'FILE', 'FILE'),
    C(r'PyDoc_STR(', 'LDocStr'),
    C(r')', 'RDocStr'),
    R(r'[^,}\n]+', 'Opaque'),
]


# NOTE: This is copied from osh/match.py because we don't have 're' there.
def _CompileAll(pat_list):
    result = []
    for is_regex, pat, token_id in pat_list:
Ejemplo n.º 3
0
# VS    -- a single state here?  Or switches into expression state, because }
#          is an operator
# Problem: DICT_KEY might be a different state, to accept either a bare word
# foo, or an expression (X=a+2), which is allowed in shell.  Python doesn't
# allowed unquoted words, but we want to.

# TODO: There are 4 shared groups here.  I think you should test if that
# structure should be preserved through re2c.  Do a benchmark.
#
# If a group has no matches, then return Id.Unknown_Tok?  And then you can
# chain the groups in order.  It might make sense to experiment with the order
# too.

_BACKSLASH = [
    R(r'\\[^\n\0]', Id.Lit_EscapedChar),
    C('\\\n', Id.Ignored_LineCont),
]

VAR_NAME_RE = r'[a-zA-Z_][a-zA-Z0-9_]*'

# All Kind.VSub
_VARS = [
    # Unbraced variables
    R(r'\$' + VAR_NAME_RE, Id.VSub_DollarName),
    R(r'\$[0-9]', Id.VSub_Number),
    C(r'$!', Id.VSub_Bang),
    C(r'$@', Id.VSub_At),
    C(r'$#', Id.VSub_Pound),
    C(r'$$', Id.VSub_Dollar),
    C(r'$*', Id.VSub_Star),
    C(r'$-', Id.VSub_Hyphen),
Ejemplo n.º 4
0
"""

from _devbuild.gen.id_kind_asdl import Id, Kind
from _devbuild.gen.types_asdl import lex_mode_e
from core.meta import ID_SPEC
from frontend.lexer import C, R

# See unit tests in frontend/match_test.py.
# We need the [^\0]* because the re2c translation assumes it's anchored like $.
SHOULD_HIJACK_RE = r'#!.*sh[ \t\r\n][^\0]*'

_SIGNIFICANT_SPACE = R(r'[ \t\r]+', Id.WS_Space)

_BACKSLASH = [
    R(r'\\[^\n\0]', Id.Lit_EscapedChar),
    C('\\\n', Id.Ignored_LineCont),
]

VAR_NAME_RE = r'[a-zA-Z_][a-zA-Z0-9_]*'

# All Kind.VSub
_VARS = [
    # Unbraced variables
    R(r'\$' + VAR_NAME_RE, Id.VSub_DollarName),
    R(r'\$[0-9]', Id.VSub_Number),
    C(r'$!', Id.VSub_Bang),
    C(r'$@', Id.VSub_At),
    C(r'$#', Id.VSub_Pound),
    C(r'$$', Id.VSub_Dollar),
    C(r'$*', Id.VSub_Star),
    C(r'$-', Id.VSub_Hyphen),