Esempio n. 1
0
#!/usr/bin/python
"""
parse_cpython.py
"""
from __future__ import print_function

import errno
import os
import re
import sys

from core.util import log
from frontend.lexer import C, R

C_DEF = [
    R(r'#.*', 'Comment'),
    R(r'[ \t\n]+', 'Whitespace'),

    # This could be more space-insensitive.
    R(r'static.*PyMethodDef (.*)\[\] = ', 'BeginDef'),
    C(r'{', 'LBrace'),
    C(r'}', 'RBrace'),
    C(r',', 'Comma'),
    C(r';', 'Semi'),
    R(r'"([^"]*)"', 'Str'),
    C(r'FILE', 'FILE'),
    C(r'PyDoc_STR(', 'LDocStr'),
    C(r')', 'RDocStr'),
    R(r'[^,}\n]+', 'Opaque'),
]
Esempio n. 2
0
File: lex.py Progetto: mrshu/oil
# EXPRESSION (takes place of ARITH, VSub_ArgUnquoted, VSub_ArgDQ)
# SQ  RAW_SQ  DQ  RAW_DQ
# VS    -- a single state here?  Or switches into expression state, because }
#          is an operator
# Problem: DICT_KEY might be a different state, to accept either a bare word
# foo, or an expression (X=a+2), which is allowed in shell.  Python doesn't
# allowed unquoted words, but we want to.

# TODO: There are 4 shared groups here.  I think you should test if that
# structure should be preserved through re2c.  Do a benchmark.
#
# If a group has no matches, then return Id.Unknown_Tok?  And then you can
# chain the groups in order.  It might make sense to experiment with the order
# too.

_SIGNIFICANT_SPACE = R(r'[ \t\r]+', Id.WS_Space)

_BACKSLASH = [
    R(r'\\[^\n\0]', Id.Lit_EscapedChar),
    C('\\\n', Id.Ignored_LineCont),
]

VAR_NAME_RE = r'[a-zA-Z_][a-zA-Z0-9_]*'

# All Kind.VSub
_VARS = [
    # Unbraced variables
    R(r'\$' + VAR_NAME_RE, Id.VSub_DollarName),
    R(r'\$[0-9]', Id.VSub_Number),
    C(r'$!', Id.VSub_Bang),
    C(r'$@', Id.VSub_At),
Esempio n. 3
0
# SQ  RAW_SQ  DQ  RAW_DQ
# VS    -- a single state here?  Or switches into expression state, because }
#          is an operator
# Problem: DICT_KEY might be a different state, to accept either a bare word
# foo, or an expression (X=a+2), which is allowed in shell.  Python doesn't
# allowed unquoted words, but we want to.

# TODO: There are 4 shared groups here.  I think you should test if that
# structure should be preserved through re2c.  Do a benchmark.
#
# If a group has no matches, then return Id.Unknown_Tok?  And then you can
# chain the groups in order.  It might make sense to experiment with the order
# too.

_BACKSLASH = [
    R(r'\\[^\n\0]', Id.Lit_EscapedChar),
    C('\\\n', Id.Ignored_LineCont),
]

VAR_NAME_RE = r'[a-zA-Z_][a-zA-Z0-9_]*'

# All Kind.VSub
_VARS = [
    # Unbraced variables
    R(r'\$' + VAR_NAME_RE, Id.VSub_DollarName),
    R(r'\$[0-9]', Id.VSub_Number),
    C(r'$!', Id.VSub_Bang),
    C(r'$@', Id.VSub_At),
    C(r'$#', Id.VSub_Pound),
    C(r'$$', Id.VSub_Dollar),
    C(r'$*', Id.VSub_Star),