Example #1
0
File: lex.py Project: klauer/pypdb
def main():
    import logging
    lexer = lex.lex(debug=1, optimize=0, debuglog=logging.getLogger(__name__))
    lexer._file = '<???>'
    lex.runmain(lexer=lexer)
    if lex.lexer.current_state()!='INITIAL':
        raise DBSyntaxError("open macro at EOF", lexer._file, -1)
 def lexer_output_to_dict(self):
     with open(os.path.join("php_functions", "php_function_list.json"),
               "r") as f:
         php_functions = json.load(f)
     with open(self.input_file, "r") as fin:
         old_stdout = sys.stdout
         sys.stdout = open("lexer_output.txt", "w")
         lex.runmain(lexer=phplex.full_lexer, data=fin.read().rstrip())
         sys.stdout = old_stdout
     linepos = 0
     lineno = 0
     f = open("lexer_output.txt", "r")
     lexer_dict = {}
     for line in f.readlines():
         toks = line.split(",")
         linepos = int(toks[-1].rstrip(")\n"))
         lineno = int(toks[-2])
         toktype = toks[0].lstrip("(")
         tokvalue = self.find_tok_value(line)
         if toktype not in lexer_dict.keys():
             lexer_dict[toktype] = {}
             lexer_dict[toktype][tokvalue] = [(lineno, linepos)]
         else:
             if tokvalue not in lexer_dict[toktype].keys():
                 lexer_dict[toktype][tokvalue] = [(lineno, linepos)]
             else:
                 lexer_dict[toktype][tokvalue].append((lineno, linepos))
     self.find_all_functions(lexer_dict, php_functions)
     number_of_lines = int(lineno)
     number_of_chars = int(linepos)
     return lexer_dict, number_of_lines, number_of_chars
Example #3
0
def getAST():
	lexer = lex.lex()
	lexer.indents = []
	lexer.indents.append(0)
	lexer.paren_stack = []
	lexer.curr_indent = 0
	lexer.token_ = lexer.token
	lexer.token = (lambda: token_override(lexer))
	lexer.begin('indent')
	yacc.yacc(debug=1)
	file = sys.argv[1]
	stream = open(file)
	contents = stream.read()
	lex.runmain(lexer)
	ast=yacc.parse(contents, lexer)
	return ast
Example #4
0
    def run_token_match(self, parser, token_data):
        from ply.lex import runmain  #@UnresolvedImport

        original_stdout = sys.stdout
        stdout_stringio = StringIO()

        sys.stdout = stdout_stringio

        try:
            runmain(lexer=parser.lexer, data=token_data)

            captured_io = stdout_stringio.getvalue()
            return captured_io

        finally:
            sys.stdout = original_stdout
Example #5
0
    def run_token_match(self,
                        parser,
                        token_data):
        from ply.lex import runmain #@UnresolvedImport

        original_stdout = sys.stdout
        stdout_stringio = StringIO()

        sys.stdout = stdout_stringio

        try:
            runmain(lexer = parser.lexer, data = token_data)

            captured_io = stdout_stringio.getvalue()
            return captured_io

        finally:
            sys.stdout = original_stdout
Example #6
0
import ply.lex as lex

tokens = (
    'H_EDIT_DESCRIPTOR',
    )

# Tokens
t_ignore = " \t\n"

def t_H_EDIT_DESCRIPTOR(t):
    r"\d+H.*"                     # This grabs all of the remaining text
    i = t.value.index('H')
    n = eval(t.value[:i])
    
    # Adjust the tokenizing position
    t.lexer.lexpos -= len(t.value) - (i+1+n)
    t.value = t.value[i+1:i+1+n]
    return t                                  
    
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.lexer.skip(1)
    
# Build the lexer
lex.lex()
lex.runmain(data="3Habc 10Habcdefghij 2Hxy")



Example #7
0
t_TYPE = r'Int|Float|Double'
t_ARROW = r'->'
t_COMMA = r','
t_TRIPLEDOT = r'\.\.\.'


def t_NUMBER(t):
    r'\d+'
    t.value = int(t.value)
    return t


def t_ID(t):
    r'[a-z|A-Z][a-z|A-Z|0-9]*'
    t.type = reserved.get(t.value, 'ID')
    return t


def t_singleLine(t):
    r'//.*\n'


def t_multiLine(t):
    r'/\*[^(*/)]*\*/\n'


lexer = lex.lex(debug=1)

if (__name__ == "__main__"):
    lex.runmain()
Example #8
0
                else:
                    # Rewrite close tag as a semicolon.
                    t.type = 'SEMI'
                    break

            t = self.lexer.token()

        self.last_token = t
        return t

    # Iterator interface
    def __iter__(self):
        return self

    def next(self):
        t = self.token()
        if t is None:
            raise StopIteration
        return t

    __next__ = next

full_lexer = lex.lex()
lexer = FilteredLexer(full_lexer)

full_tokens = tokens
tokens = filter(lambda token: token not in unparsed, tokens)

if __name__ == "__main__":
    lex.runmain(full_lexer)
Example #9
0

def t_DOCSTRINGOPEN(t):
    r'/\*\*[ ]+'
    return t;

#t_COMMENTOPEN      = r'/\*'
t_COMMENTCLOSE     = r'\*/'

 
# Preprocessor directive (ignored)
def t_preprocessor(t):
    r'\#(.)*?\n'
    t.lexer.lineno += 1

    
def t_error(t):
    print "Illegal character %s" % repr(t.value[0])
    t.lexer.skip(1)


    
lexer = lex.lex(debug=False)
if __name__ == "__main__":
    lex.runmain(lexer)

    



Example #10
0
t_INTERVAL = 'interval'

# Deliminators
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACE = r'\['
t_RBRACE = r'\]'
t_COMMA = r','
t_SEMICOLON = r';'

# Non-emmitting
t_ignore = (' \t\n\r')


def t_comment(t):
    r'\#[^\n]*'
    pass


def t_error(t):
    print("Illegal character '{}'".format(t), file=sys.stderr)
    sys.exit(-1)


# Create lexer on call and import
function_lexer = lex.lex()  #, optimize=1) #used when stable

# On call run as a util, taking in text and printing the lexed version
if __name__ == "__main__":
    lex.runmain(function_lexer)
Example #11
0
t_EQUALS  = r'='
t_LPAREN  = r'\('
t_RPAREN  = r'\)'
t_NAME    = r'[a-zA-Z_][a-zA-Z0-9_]*'

def t_NUMBER(t):
    r'\d+'
    try:
        t.value = int(t.value)
    except ValueError:
        print("Integer value too large %s" % t.value)
        t.value = 0
    return t

t_ignore = " \t"

def t_newline(t):
    r'\n+'
    t.lineno += t.value.count("\n")
    
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)
    
# Build the lexer
lex.lex(optimize=1,lextab="opt2tab")
lex.runmain(data="3+4")



Example #12
0
t_ignore = " \t"


# Comments
def t_comment(t):
    r'/\*'
    t.lexer.begin('comment')
    print "Entering comment state"


def t_comment_body_part(t):
    r'(.|\n)*\*/'
    print "comment body", t
    t.lexer.begin('INITIAL')


def t_error(t):
    pass


t_comment_error = t_error
t_comment_ignore = t_ignore

import sys

lex.lex()

data = "3 + 4 /* This is a comment */ + 10"

lex.runmain(data=data)
Example #13
0
    @_lex.TOKEN(num + ur'%')
    def t_PERCENTAGE(self, t): 
        return t
    
    t_NUMBER       = num
    
    @_lex.TOKEN(U + R + L + ur'\(' + w + r_or(string, url) + w + ur'\)')
    def t_URI(self, t):
        return t
    
    @_lex.TOKEN(ident + ur'\(')
    def t_FUNCTION(self, t): 
        return t
    
    def t_error(self, t):
        print "Illegal token '%s'" % t.value[0]
        t.lexer.skip(1)
    


def lex(**kw):
    if 'object' in kw: del kw['object']
    kw['module'] = csslexer()
    if 'reflags' not in kw:
        kw['reflags'] = 0
    kw['reflags'] |= re.UNICODE | re.IGNORECASE
    return _lex.lex(**kw)

if '__main__' == __name__:
    _lex.runmain(lexer=lex())
Example #14
0
    def __init__(self, filename):
        self.lex = lex.lex(module=self)
        self.input = self.lex.input
        self.token = self.lex.token

        # For tracking current file/line position
        self.filename = filename
        self.line_offset = 0

        self.filenames = []
        self._filenames_set = set()

        if self.filename:
            self.filenames.append(filename)
            self._filenames_set.add(filename)

        # Doxygen comments
        self.doxygenCommentCache = ""

    def current_location(self):
        return self.filename, self.lex.lineno - self.line_offset

    def get_doxygen(self):
        doxygen = self.doxygenCommentCache
        self.doxygenCommentCache = ""
        return doxygen


if __name__ == "__main__":
    lex.runmain(lexer=Lexer())
Example #15
0
    return t


t_OP_GE = r'>='
t_OP_LE = r'<='
t_OP_EQ = r'=='
t_OP_NE = r'!='


def t_ID(t):
    r'[a-zA-Z_]+[\da-zA-Z_]*'
    t.type = reserved.get(t.value, 'ID')
    return t


t_ignore = ' \t\v\f'


def t_error(t):
    raise SyntaxError('Error at line %d, position %d' % (t.lineno, t.lexpos))


lexer = lex.lex()


#
# Scripting part
#
if __name__ == '__main__':
    lex.runmain()  # pragma: no cover
    t_EQ = r'=='

    def t_error(self, t):
        print("Illegal character '%s'" % t.value[0])
        t.lexer.skip(0)

    def build(self, **kwargs):
        self.lexer = lex.lex(module=self, **kwargs)

    # Test it output
    def test(self, data):
        self.lexer.input(data)
        while True:
            tok = self.lexer.token()
            if not tok:
                break
            print(tok)


# Build the lexer and try it out
cpp_scanner = MyLexer()
cpp_scanner.build()
if __name__ == "__main__":
    if (len(sys.argv) == 2):
        filename = sys.argv[1]
        a = open(filename)
        data = a.read()
        cpp_scanner.test(data)
    else:
        lex.runmain(cpp_scanner.lexer)
Example #17
0
            if tok.type not in self._discard_types:
                tok.location = (self.filename, tok.lineno - self.line_offset)
                break

        return tok

    def token_if(self, *types):
        tok = self.token(eof_ok=True)
        if tok is None:
            return None
        if tok.type not in types:
            # put it back on the left in case it was retrieved
            # from the lookahead buffer
            self.lookahead.appendleft(tok)
            return None
        return tok

    def return_token(self, tok):
        self.lookahead.appendleft(tok)

    def return_tokens(self, toks):
        self.lookahead.extendleft(reversed(toks))


if __name__ == "__main__":
    try:
        lex.runmain(lexer=Lexer(None))
    except EOFError:
        pass
Example #18
0
import ply.lex as lex

tokens = (
    'H_EDIT_DESCRIPTOR',
    )

# Tokens
t_ignore = " \t\n"

def t_H_EDIT_DESCRIPTOR(t):
    r"\d+H.*"                     # This grabs all of the remaining text
    i = t.value.index('H')
    n = eval(t.value[:i])
    
    # Adjust the tokenizing position
    t.lexer.lexpos -= len(t.value) - (i+1+n)
    t.value = t.value[i+1:i+1+n]
    return t                                  
    
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)
    
# Build the lexer
lex.lex()
lex.runmain(data="3Habc 10Habcdefghij 2Hxy")



Example #19
0
 def autotest(self):
     lex.runmain()
Example #20
0
t_COMMA     = r','
t_SEMICOLON = r';'




# Non-emmitting
t_ignore    = (' \t\n\r')

def t_comment(t):
    r'\#[^\n]*'
    pass

def t_error(t):
    print("Illegal character '{}'".format(t), file=sys.stderr)
    sys.exit(-1)








# Create lexer on call and import
function_lexer = lex.lex() #, optimize=1) #used when stable

# On call run as a util, taking in text and printing the lexed version
if __name__ == "__main__":
    lex.runmain(function_lexer)
Example #21
0
    return t


# Newlines
def t_NEWLINE(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")


# Comments
def t_comment(t):
    r'/\*(.|\n)*?\*/'
    t.lexer.lineno += t.value.count('\n')


# Preprocessor directive (ignored)
def t_preprocessor(t):
    r'\#(.)*?\n'
    t.lexer.lineno += 1


# Error handling
def t_error(t):
    print("Illegal character %s" % repr(t.value[0]))
    t.lexer.skip(1)


lexical_analyzer = lex.lex()
if __name__ == "__main__":
    lex.runmain(lexical_analyzer)
Example #22
0
        self.data = data

        return self._lexer.input(data)

    def token(self):
        tok = self._lexer.token()
        if tok is not None:
            # Tokens without a processing function don't set this themselves
            tok.lexer = self._lexer
            # Wrap the token up to present useful data when in the parsing stage
            tok.value = RdlToken(tok)
        return tok


    def __init__(self):
        # hw and sw are properties, but they are lexed as a precedence
        self.keywords.update({prop.name: 'PROPNAME' for prop in properties if prop.name not in self.keywords})

        self.tokens = ['VNUM', 'NUM', 'STRING', 'ID', 'DEREF', 'INC', 'MOD', 'LSQ', 'RSQ', 'RBRACE', 'LBRACE',
                       'COLON', 'COMMA', 'DOT',
                       #'OR',
                       'AT', 'SEMI', 'EQ']
        self.tokens += list(OrderedDict.fromkeys(self.keywords.values()))

        self._lexer = lex.lex(object=self)
        self.lex_errors = 0
        self.data = None

if __name__ == "__main__":
    lex.runmain(RdlLexer())
Example #23
0
# lex_many_tokens.py
#
# Test lex's ability to handle a large number of tokens (beyond the
# 100-group limit of the re module)

import sys
if ".." not in sys.path:
    sys.path.insert(0, "..")

import ply.lex as lex

tokens = ["TOK%d" % i for i in range(1000)]

for tok in tokens:
    if sys.version_info[0] < 3:
        exec("t_%s = '%s:'" % (tok, tok))
    else:
        exec("t_%s = '%s:'" % (tok, tok), globals())

t_ignore = " \t"


def t_error(t):
    pass

lex.lex(optimize=1, lextab="manytab")
lex.runmain(data="TOK34: TOK143: TOK269: TOK372: TOK452: TOK561: TOK999:")
Example #24
0
import sys
if "../.." not in sys.path: sys.path.insert(0,"../..")
from gSLLexer import *
import ply.lex as lex

code = """var
a:numerico
"""
gLexer = gSLLexer()
lex.runmain(data = code)
Example #25
0
# lex_many_tokens.py
#
# Test lex's ability to handle a large number of tokens (beyond the
# 100-group limit of the re module)

import sys
if ".." not in sys.path: sys.path.insert(0, "..")

import ply.lex as lex

tokens = ["TOK%d" % i for i in range(1000)]

for tok in tokens:
    if sys.version_info[0] < 3:
        exec("t_%s = '%s:'" % (tok, tok))
    else:
        exec("t_%s = '%s:'" % (tok, tok), globals())

t_ignore = " \t"


def t_error(t):
    pass


lex.lex()
lex.runmain(data="TOK34: TOK143: TOK269: TOK372: TOK452: TOK561: TOK999:")
def run_on_argv1():
    lex.runmain(full_lexer)
Example #27
0
	t.type = rdict.get(t.value, "IDENTIFIER")
	return t

def t_NEWLINE(t):
	r'\n+'
	t.lexer.lineno += t.value.count("\n")

def t_comment(t):
	r'/\*(.|\n)*?\*/ | //(.)*?\n'
	t.lexer.lineno += t.value.count('\n')

def t_preprocessor(t):
	r'\#(.)*?\n'
	t.lexer.lineno += 1

def t_error(t):
	print("Error : %s" % str(t.value[0]))
	t.lexer.skip(1)

lexer = lex.lex()
if __name__ == "__main__":
	if len(sys.argv) < 2:
		print "{token type, token name, line nunmber, index relative to start of input}"
		lex.runmain(lexer)
	else:
		fo = open(str(sys.argv[1]), "r+")
		data = fo.read()
		fo.close()
		print "{token type, token name, line nunmber, index relative to start of input}"
		lex.runmain(lexer, data)
Example #28
0
# lex_module.py
#

import sys
if ".." not in sys.path: sys.path.insert(0,"..")

import ply.lex as lex
import lex_module_import
lex.lex(module=lex_module_import)
lex.runmain(data="3+4")
Example #29
0
def run_on_argv1():
    lex.runmain(full_lexer)
Example #30
0
def t_SIMB(t):
    r'[></a-zA-Z_+=\*\-][></a-zA-Z0-9_+\*\-=]*'
    #print ('In t_SIMB',t)
    t.type = reserved.get(t.value,'SIMB')    # Check for reserved words
    return t

def t_TEXT(t):
    r'\'[a-zA-Z0-9_+\*\- :,\.\\[\];=()\"$]*\''
    #print ('In t_Text',t)
    t.type = reserved.get(t.value,'TEXT')    # Check for reserved words
    return t

# Define a rule so we can track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

# A string containing ignored characters (spaces and tabs)
t_ignore  = ' \t'

# Error handling rule
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.lexer.skip(1)

# Build the lexer
lex.lex()

if __name__ == '__main__':
    lex.runmain()
Example #31
0
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'

t_ignore = " \t"

# Comments
def t_comment(t):
    r'/\*'
    t.lexer.begin('comment')
    print "Entering comment state"

def t_comment_body_part(t):
    r'(.|\n)*\*/'
    print "comment body", t
    t.lexer.begin('INITIAL')

def t_error(t):
    pass

t_comment_error = t_error
t_comment_ignore = t_ignore

import sys

lex.lex()

data = "3 + 4 /* This is a comment */ + 10"

lex.runmain(data=data)
Example #32
0
    t_ignore_COMMENT = r'//.*'

    # t_MODIFIERBACK    = r'%'
    # t_MODIFIERDEBUG   = r'\#'
    # t_MODIFIERROOT    = r'!'
    # t_MODIFIERDISABLE = r'\*'

    t_ignore = " \t"

    def t_comment(self, t):
        r'/\*(.|\n)*?\*/'
        t.lexer.lineno += t.value.count('\n')

    def t_ID(self, t):
        r'[$]?[a-zA-Z_][a-zA-Z_0-9]*'
        t.type = reserved.get(t.value, 'ID')  # Check for reserved words
        return t

    def t_newline(self, t):
        r'\n+'
        t.lexer.lineno += t.value.count("\n")

    def t_error(self, t):
        error("Illegal character '%s'" % t.value[0])
        t.lexer.skip(1)


#lexer = lex.lex()
if __name__ == "__main__":
    lex.runmain(lexer)
Example #33
0
    @_lex.TOKEN(num + ur'%')
    def t_PERCENTAGE(self, t):
        return t

    t_NUMBER = num

    @_lex.TOKEN(U + R + L + ur'\(' + w + r_or(string, url) + w + ur'\)')
    def t_URI(self, t):
        return t

    @_lex.TOKEN(ident + ur'\(')
    def t_FUNCTION(self, t):
        return t

    def t_error(self, t):
        print "Illegal token '%s'" % t.value[0]
        t.lexer.skip(1)


def lex(**kw):
    if 'object' in kw: del kw['object']
    kw['module'] = csslexer()
    if 'reflags' not in kw:
        kw['reflags'] = 0
    kw['reflags'] |= re.UNICODE | re.IGNORECASE
    return _lex.lex(**kw)


if '__main__' == __name__:
    _lex.runmain(lexer=lex())