class Lexer: """ Takes the program as input and divides it into Tokens. """ def __init__(self): self.lexer = LexerGenerator() def __add_tokens(self): # Print self.lexer.add('PRINT', r'print') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi colon self.lexer.add('SEMI_COLON', r'\;') # Operators self.lexer.add('ADD', r'\+') self.lexer.add('SUB', r'\-') # Number self.lexer.add('NUMBER', r'\d+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self.__add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # lambda self.lexer.add('LAMBDA', r'λ') self.lexer.add('DOT', r'.') # Number self.lexer.add('NUMBER', r'\d+') # LETTER self.lexer.add('VAR', r'[a-z]') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\*') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lg = LexerGenerator() def add_tokens(self): self.lg.add('FRAC', '-?\d+.\d+') self.lg.add('WHOLE', '-?\d+') self.lg.add('WORD', '(""".?""")|(".?")|(\'.?\')') self.lg.add('BOOL', 'true(?!\w)|false(?!\w)') self.lg.add('K_WHOLE', 'whole(?!\w)') self.lg.add('K_FRAC', 'frac(?!\w)') self.lg.add('K_WORD', 'word(?!\w)') self.lg.add('K_BOOL', 'bool(?!\w)') self.lg.add('K_LETTER', 'letter(?!\w)') self.lg.add('IF', 'if(?!\w)') self.lg.add('OTHER', 'other(?!\w)') self.lg.add('OTIF', 'otif(?!\w)') self.lg.add('LOOP', 'loop(?!\w)') self.lg.add('LOOPTILL', 'looptill(?!\w)') self.lg.add('ENDNOW', 'endnow(?!\w)') self.lg.add('RESUME', 'resume(?!\w)') self.lg.add('AND', 'and(?!\w)') self.lg.add('OR', 'or(?!\w)') self.lg.add('NOT', 'not(?!\w)') self.lg.add('FUNCTION', 'func(?!\w)') self.lg.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]+") self.lg.add('EQUAL', '\==') self.lg.add('NOT_EQUAL', '\!=') self.lg.add('GREATER_EQUAL', '\>=') self.lg.add('LESS_EQUAL', '\<=') self.lg.add('GREATER_THAN', '\>') self.lg.add('LESS_THAN', '\<') self.lg.add('ASSIGNMENT', '\=') self.lg.add('LEFT_BRACKET', '\[') self.lg.add('RIGHT_BRACKET', '\]') self.lg.add('LEFT_PARAN', '\{') self.lg.add('RIGHT_PARAN', '\}') self.lg.add('COMMA', '\,') self.lg.add('DOT', '\.') self.lg.add('COLON', '\:') self.lg.add('PLUS', '\+') self.lg.add('MINUS', '\-') self.lg.add('MUL', '\*') self.lg.add('DIV', '\/') self.lg.add('MOD', '\%') self.lg.add('RIGHT_BRACES', '\(') self.lg.add('LEFT_BRACES', '\)') self.lg.add('NEWLINE', '\n') # ignore whitespace self.lg.ignore('[ \t\r\f\v]+') def get_lexer(self): self.add_tokens() return self.lg.build()
def Lexer(): lexer = LexerGenerator() lexer.add('WHILE', r'wh') lexer.add('PRINTF', r'pf') lexer.add('IF', r'if') lexer.add('ELSE', r'el') lexer.add('MAIN', r'mn') lexer.add('RETURN', r'rt') lexer.add('LEFT_PAREN', r'\(') lexer.add('RIGHT_PAREN', r'\)') lexer.add('SEMI_COLON', r'\;') lexer.add('COMMA', r'\,') lexer.add('PLUS', r'\+') lexer.add('MINUS', r'\-') lexer.add('MULT', r'\*') lexer.add('DIV', r'\/') lexer.add('RIGHT_BRACKETS', r'\}') lexer.add('LEFT_BRACKETS', r'\{') lexer.add('EQUAL', r'=') lexer.add('E_EQUAL', r'sm') lexer.add('BT', r'bt') lexer.add('LT', r'lt') lexer.add('OR', r'or') lexer.add('AND', r'and') lexer.add('NOT', r'not') lexer.add('INT', r'\d+') lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*") lexer.ignore('\s+') return lexer.build()
class Lexer: def __init__(self): self.lexer = LexerGenerator() self.build = None def _add_tokens(self): self.lexer.add('->', r'\-\>') self.lexer.add('<->', r'\<\-\>') self.lexer.add('+', r'\+') self.lexer.add('*', r'\*') self.lexer.add('!', r'\!') self.lexer.add('(', r'\(') self.lexer.add(')', r'\)') self.lexer.add('false', r'false') self.lexer.add('true', r'true') self.lexer.add('xor', r'xor') self.lexer.add('LITERAL', r'[A-Za-z]+') self.lexer.ignore(r'\s+') def get_lexer(self): self._add_tokens() self.build = self.lexer.build() return self def lex(self, input): if self.build is None: self.get_lexer() return self.build.lex(input)
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('MON', r'montrer') # Parenthesis self.lexer.add('PARENTESE1', r'\(') self.lexer.add('PARENTESE2', r'\)') # Semi Colon self.lexer.add('POINT_VERG', r'\;') # Operators self.lexer.add('PLUS', r'\+') self.lexer.add('MOINS', r'\-') # Number self.lexer.add('NOMBRE', r'\d+') # Ignore spaces self.lexer.ignore('\s+') self.lexer.add('FOIS', r'\*') self.lexer.add('DIVI', r'\/') self.lexer.add('TERM', r'[a-zA-Z0-9]+') self.lexer.add('EGAL', r'\=+') self.lexer.add('QUOTE', r'\'') self.lexer.add('VERGULE', r',') self.lexer.add('DOLLAR', r'\$') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') self.lexer.add('VAL', r'val') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('EQUAL', r'\=') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\\') # Number self.lexer.add('NUMBER', r'\d+') self.lexer.add('TEXT', r'[A-Za-z_0-9]*') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(object): """description of class""" def __init__(self): self.lexer = LexerGenerator() pass def _add_tokens(self): self.lexer.add("CHAR", r'\w') self.lexer.add("NUMBER", r'\d+') self.lexer.add("STRING", r'\w*') self.lexer.add("ID", r'\w+') self.lexer.add("SELECT", r'SELECT') self.lexer.add("ASTERISK", r'\*') self.lexer.add("FROM", r'FROM') self.lexer.add("WHERE", r'WHERE') self.lexer.add("PATH", r'PATH') self.lexer.add("OPEN_CLAUSE", r'\(') self.lexer.add("CLOSE_CLAUSE", r'\)') self.lexer.add("CITE", r'\'') self.lexer.add("INSERT", r'INSERT') self.lexer.add("INTO", r'INTO') self.lexer.add("CREATE", r'CREATE') self.lexer.add("DOCUMENT", r'DOCUMENT') self.lexer.add("AT", r'AT') self.lexer.add("IS", r'IS') self.lexer.add("COMMA", r'\,') self.lexer.add("TEMPLATE", r'TEMPLATE') self.lexer.ignore('\s+') pass def get_lexer(self): _add_tokens() return self.lexer.build()
class Lexer: def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PUTS', r'puts') # Eval self.lexer.add('EVAL', r'eval') # Parenthesis self.lexer.add('(', r'\(') self.lexer.add(')', r'\)') # Semi Colon self.lexer.add(';', r'\;') # Operators self.lexer.add('+', r'\+') self.lexer.add('-', r'\-') self.lexer.add('*', r'\*') self.lexer.add('/', r'\/') # Number self.lexer.add('NUMBER', r'\d+') # String self.lexer.add('STRING', r'\".*\"') # Functions self.lexer.add('{', r'\{') self.lexer.add('{', r'\}') self.lexer.add('FUNCTION', r'fun') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer: def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'printf') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') # Number self.lexer.add('NUMBER', r'\d+') # quotations #self.lexer.add('QUOTE', r'\"') self.lexer.add('STRING', r'"[^"]*"') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Number self.lexer.add('NUMBER', r'\d+') # Image self.lexer.add('IMAGE', r'[^\s]+(\.(?i)(jpg|png|gif|bmp|jpeg))') # Position self.lexer.add('POSITION', r'position') # Scale self.lexer.add('SCALE', r'scale') # Move self.lexer.add('MOVE', r'move') # Dimensions self.lexer.add('DIMENSIONS', r'dimensions') # Total self.lexer.add('TOTAL', r'total') # Print self.lexer.add('PRINT', r'print') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Comma separator self.lexer.add('COMMA', r'\,') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') # Number self.lexer.add('NUMBER', r'\d+') self.lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*") # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() # Function created accordingly to the programming language's grammar def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') #Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') # Number self.lexer.add('NUMBER', r'\d+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('IN_RA', r'in_ra') # Parenthesis self.lexer.add('MO_NGOAC_TRON', r'\(') self.lexer.add('DONG_NGOAC_TRON', r'\)') # Semi Colon # self.lexer.add('HET_DONG', r'\;') self.lexer.add('HET_DONG', r'(\n)|(\r\n)') # Operators self.lexer.add('CONG', r'\+') self.lexer.add('TRU', r'\-') self.lexer.add('NHAN', r'\*') self.lexer.add('CHIA', r'\/') # bool self.lexer.add('BANG', r'\=\=') self.lexer.add('LON_HON', r'\>') self.lexer.add('NHO_HON)', r'\<') self.lexer.add('KHAC', r'\!\=') # Number self.lexer.add('SO_NGUYEN', r'\d+') # Ignore spaces self.lexer.ignore(r'(^\s+)|( )+|\t+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer: def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') # Braces self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('OPEN_BRACE', r'{') self.lexer.add('CLOSE_BRACE', r'}') self.lexer.add('OPEN_BRAKET', r'\[') self.lexer.add('CLOSE_BRAKET', r'\]') # Operations self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'/') # Datatype self.lexer.add('NUMBER', r'\d+') # Semicolon self.lexer.add('SEMI_COLON', r'\;') # ignore spaces self.lexer.ignore(r'\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Keywords self.lexer.add('PRINT', r'print') self.lexer.add('IF', r'if') self.lexer.add('ELSE', 'else') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Colon self.lexer.add('COLON', r'\:') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'/') self.lexer.add('MORE', r'\>') self.lexer.add('LESS', r'\<') self.lexer.add('EQUALS', r'\=') # Number self.lexer.add('NUMBER', r'\d+') # String self.lexer.add('ID', r'\w+') # Ignore spaces self.lexer.ignore(r'\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lex(): def __init__(self): self.lex = LexerGenerator() def _add_tokens(self): # Print self.lex.add('PRINT', r'print') # Parenthesis self.lex.add('LEFT_PAREN', r'\(') self.lex.add('RIGHT_PAREN', r'\)') # End of line self.lex.add('EOL', r'\;') # Mathematical Operators self.lex.add('SUM', r'\+') self.lex.add('SUB', r'\-') self.lex.add('MUL', r'\*') self.lex.add('DIV', r'\/') # Number self.lex.add('NUMBER', r'\d+') # Ignore spaces self.lex.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lex.build()
def test_position(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 3 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 5 with raises(StopIteration): stream.next() stream = l.lex("2 +\n 37") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 3 t = stream.next() assert t.source_pos.lineno == 2 assert t.source_pos.colno == 5 with raises(StopIteration): stream.next()
class Lexer: def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('PRINT', r'print') self.lexer.add('ASSIGN', r'Var') self.lexer.add('VAR', r'[a-zA-Z]+') self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('COMMA', r',') self.lexer.add('SUB', r'\-') self.lexer.add('SUM', r'\+') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') self.lexer.add('VALUE_SETTER', r'=') self.lexer.add('NUMBER', r'\d+') self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.ignore(r'\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Parenthesis self.lexer.add('OPEN_PAR', r'\(') self.lexer.add('CLOSE_PAR', r'\)') self.lexer.add('OPEN_KEY', r'\{') self.lexer.add('CLOSE_KEY', r'\}') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Comma self.lexer.add('COMMA', r'\,') # Quote self.lexer.add('QUOTE', r'\"') # Dots self.lexer.add('DOTS', r'\:') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'-') self.lexer.add('MULT', r'\*') self.lexer.add('DIV', r'/') # Boolean Operators self.lexer.add('OR', r'\//') self.lexer.add('AND', r'\&&') self.lexer.add('NOT', r'\!') # Relational Operators self.lexer.add('BIGGER_THAN', r'\>') self.lexer.add('SMALLER_THAN', r'\<') self.lexer.add('EQUAL_TO', r'\==') self.lexer.add('DIFF', r'\!=') self.lexer.add('ASSIGN', r'=') # Number self.lexer.add('NUMBER', r'\d+') # Types self.lexer.add('INT', r'IN') self.lexer.add('CHAR', r'CH') self.lexer.add('VOID', r'VO') # While self.lexer.add('WHILE', r'W') # If - else self.lexer.add('IF', r'I') self.lexer.add('ELSE', r'E') # Print self.lexer.add('PRINT', r'P') # Scanf self.lexer.add('SCANF', r'S') # Function self.lexer.add('FUNC', r'F') # Main self.lexer.add('MAIN', r'M') # Identifier self.lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*") # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer: def __init__(self): self.lg = LexerGenerator() self._build_lex_rules() def _build_lex_rules(self): for enum in TokenEnum: self.lg.add(enum.name, enum.value) # Parenthesis self.lg.ignore(AppConstant.SPACE_REGEX) def build_lexer(self): return self.lg.build() def clean_source(self, source_code): comment = re.search(AppConstant.COMMENT_REGEX, source_code) while comment is not None: start, end = comment.span(1) assert start >= 0 and end >= 0 source_code = source_code[0:start] + source_code[end:] comment = re.search(AppConstant.COMMENT_REGEX, source_code) empty_line = re.search(AppConstant.EMPTY_LINE_REGEX, source_code) while empty_line is not None: start, end = empty_line.span(1) assert start >= 0 and end >= 0 source_code = source_code[0:start] + source_code[end:] empty_line = re.search(AppConstant.EMPTY_LINE_REGEX, source_code) return source_code
def lexer_from_mapping(mapping): lg = LexerGenerator() # Escape data with forward slashes lg.add("DATA", r'/.+?/') # Add the special characters for char in mapping.keys(): lg.add(char, r"\\" + char) # Normal tokens lg.add("TYPE", r':') lg.add("AND", r'\&') lg.add("OR", r'\|') lg.add("L_PAREN", r'\(') lg.add("R_PAREN", r'\)') lg.add("EQUAL", r'=') lg.add("CHILD", r'>') lg.add("PARENT", r'<') lg.add("NOT", r'!') # Everything else is data excluded_chars = r'^<>=&|():!' for char in mapping.keys(): excluded_chars += r"\\" + char lg.add("DATA", "[{excluded}]+".format(excluded=excluded_chars)) lg.ignore(r'\s+') lexer = lg.build() return lexer
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('ESCREVA', r'escreva') # Parenthesis self.lexer.add('APAR', r'\(') self.lexer.add('FPAR', r'\)') # Semi Colon self.lexer.add('PONTO_VIRGULA', r'\;') # Operators self.lexer.add('SOMA', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') # Number self.lexer.add('NUMERO', r'\d+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Operators self.lexer.add('PLUS', r'\+') self.lexer.add('MINUS', r'\-') self.lexer.add('DIVIDE', r'\\') self.lexer.add('MULTIPLY', r'\*') # Number self.lexer.add('NUMBER', r'\d+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Begin self.lexer.add('BEGIN', r'Begin') # Nucleo self.lexer.add('NUCLEO', r'Nucleo') # Si self.lexer.add('SI', r'Si') # Imprimir self.lexer.add('IMPRIMIR', r'Imprimir') # Fin self.lexer.add('FIN', r'Fin') # Parentesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('IDENTIFICADOR', r'[a-z]+[0-9]*') # Operators self.lexer.add('IGUAL', r'\=') # self.lexer.add('COMPARACION', r'\=\=')) self.lexer.add('MAS', r'\+') # Number self.lexer.add('INT', r'\d+') # String self.lexer.add('STRING', r'".*"') # self.lexer.add('STRING', r'^\"[a-z]+\"$') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
def get_lexer(): lg = LexerGenerator() for name, rule in RULES: lg.add(name, rule) lg.ignore('\s+') return lg.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): #mains self.lexer.add('INCLUDE', r"#include ") self.lexer.add('MAIN', r"main\(\)") self.lexer.add('PRINT', r"printf") self.lexer.add('NUMBER', r'\d+') #libraries self.lexer.add('LIBstdio.h', r"<stdio.h>") # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') # Signs self.lexer.add('START', '{') self.lexer.add('FINISH', '}') self.lexer.add('OPEN_PARENS', r'\(') self.lexer.add('CLOSE_PARENS', r'\)') self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('STR', r'"(.*?)"') # Ignore spaces self.lexer.ignore(r'\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
def tokenize(): lg = LexerGenerator() lg.add('NUMBER', r'\d+') lg.add('PLUS', r'\+') lg.add('MINUS', r'-') lg.add('MUL', r'\*') lg.add('DIV', r'/') lg.add('OPEN_PAR', r'\(') lg.add('CLOSE_PAR', r'\)') lg.add('OPEN_BLOCK', r'\{') lg.add('CLOSE_BLOCK', r'\}') lg.add('COMMA', r',') lg.add('IMPRIME', r'imprime') lg.add('ESCANEIA', r'escaneia') lg.add('CMD_END', r';') lg.add('GE', r'>=') lg.add('LE', r'<=') lg.add('EQUAL', r'=') lg.add('EQUALS', r'==') lg.add('GREATER', r'>') lg.add('LESS', r'<') lg.add('ENQUANTO', r'enquanto') lg.add('E', r'e') lg.add('OU', r'ou') lg.add('SENAO', r'senao') lg.add('NAO', r'nao') lg.add('SE', r'se') lg.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*") lg.ignore('\s+') return lg.build()
def test_ignore_recursion(self): lg = LexerGenerator() lg.ignore(r"\s") l = lg.build() assert list(l.lex(" " * 2000)) == []
class Lexer (): def __init__(self): super().__init__() self.lexer = LexerGenerator() def _add_tokens (self): # FILE self.lexer.add('FILE', r'fileinitializer') # VARIABLE FILE self.lexer.add('VARIABLE_FILE', r'[w]\d+') # VARIABLE VECTOR # TODO: change v1 and f1 to varV1 and varF1 self.lexer.add('VARIABLE_VECTOR', r'[v]\d+') # TIME STAMP self.lexer.add('TIME_STAMP', r'timestamp') # VECTOR self.lexer.add('VECTOR', r'vector') # ERROR self.lexer.add('ERROR', r'error') # 2D CHART self.lexer.add('2D_CHART', r'2dchart') # PRINT self.lexer.add('PRINT', r'print') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('OPEN_SQUARE_PAREN', r'\[') self.lexer.add('CLOSE_SQUARE_PAREN', r'\]') # Equal self.lexer.add('EQUAL', r'\=') # Semi-colon self.lexer.add('SEMI_COLON', r'\;') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') # Number self.lexer.add('NUMBER', r'\d+') # Text string with any symbol in it self.lexer.add('STRING', r'".+"') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('PRINT', r'print') self.lexer.add('INT', r'int') self.lexer.add('CHAR', r'char') self.lexer.add('FLOAT', r'float') self.lexer.add('IGUAL', r'\==') self.lexer.add('MAIOROUIGUAL', r'\>=') self.lexer.add('MENOROUIGUAL', r'\<=') self.lexer.add('MENOR', r'\<') self.lexer.add('MAIOR', r'\>') self.lexer.add('DIFERENTE', r'\!=') self.lexer.add('AND', r'\&') self.lexer.add('OR', r'\|') self.lexer.add('NOT', r'\!') self.lexer.add('SOMA', r'\+') self.lexer.add('SUBTRACAO', '-') self.lexer.add('MULTIPLICACAO', r'\*') self.lexer.add('DIVISAO', r'\/') self.lexer.add('RESTO', r'mod') self.lexer.add('ATRIBUICAO', r'\=') # self.lexer.add('ASPASSIMPLES', '\'') # self.lexer.add('ASPASDUPLAS', '"') # self.lexer.add('PONTO', '\.') # self.lexer.add('VIRGULA', '\,') # self.lexer.add('DOISPONTOS', r'\:') self.lexer.add('PONTOEVIRGULA', r'\;') self.lexer.add('ABREPARENTESES', r'\(') self.lexer.add('FECHAPARENTESES', r'\)') # self.lexer.add('ABRECHAVES', r'\{') # self.lexer.add('FECHACHAVES', r'\}') self.lexer.add('INICIOBLOCO', r'begin') self.lexer.add('FIMBLOCO', r'end') self.lexer.add('FOR', r'for') self.lexer.add('IF', r'if') self.lexer.add('ELSE', r'else') self.lexer.add('BREAK', r'break') self.lexer.add('WHILE', r'while') self.lexer.add('READ', r'read') self.lexer.add('REAL', r'-?\d+[.]\d+') self.lexer.add('NUMERO', r'-?\d+') self.lexer.add('BOOLEANO', "true(?!\w)|false(?!\w)") self.lexer.add('STRING', r'"(?:\.|(\\\")|[^\""\n])*"') self.lexer.add('IDENT', r'[a-zA-Z][a-zA-Z0-9]*') self.lexer.add('CARACTER', r'\'[a-zA-Z]\'') # self.lexer.add('STRING', r'[a-zA-Z][a-zA-Z]+}') # self.lexer.add('IDENT', r'[a-zA-Z][a-zA-Z0-9]*') # self.lexer.add('CARACTER', r'[a-zA-Z]') # Ignore spaces self.lexer.ignore('[\s\t \r\f\v]+') def get_lexer(self): self._add_tokens() return self.lexer.build()
def test_regex_flags_ignore(self): lg = LexerGenerator() lg.add("ALL", r".*", re.DOTALL) lg.ignore(r".*", re.DOTALL) l = lg.build() stream = l.lex("test\ndotall") with raises(StopIteration): stream.next()
def construct_lexer(): lg = LexerGenerator() #Literals lg.add('NUMBER',r'\d+(\.\d+)?') lg.add('STRING',r'\".*?\"') #Tokens lg.add('OPEN_PAREN',r'\(') lg.add('CLOSE_PAREN',r'\)') lg.add('INDEX_OPEN',r'\[') lg.add('INDEX_CLOSE',r'\]') lg.add('NAME',r'[a-zA-Z0-9_]*') lg.add('RANGE',r'\.\.\.') lg.add('COMMA',',') #Operators lg.add('ADD',r'\+') lg.add('SUBTRACT',r'-') lg.add('MULTIPLY',r'\*') lg.add('DIVIDE','/') lg.add('EXPONENTIATION',r'\*\*') lg.add('AND','and') lg.add('OR','or') lg.add('NOT','not') lg.add('XOR','xor') lg.add('SELF_APPLY','!') lg.add('SINGLE_ARROW','->') lg.add('DOUBLE_ARROW','=>') lg.add('DOT',r'\.') lg.add('IN','in') lg.add('GT','>') lg.add('LT','<') lg.add('LE','<=') lg.add('GE','>=') lg.add('EQ','==') lg.add('NE','!=') #Keywords lg.add('IF','if') lg.add('ELSE','else') lg.add('DO','do') lg.add('END','end') lg.add('DEF','def') lg.add('LET','let') lg.add('WHILE','while') lg.add('FOR','for') #Whitespace lg.ignore(r"\s+") return lg.build()
def test_repr(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") assert str(stream) is not None t = stream.next() assert t.name == "NUMBER" assert t.value == "2" assert str(stream) is not None t = stream.next() assert t.name == "PLUS"
def test_ignore(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") t = stream.next() assert t.name == "NUMBER" assert t.value == "2" t = stream.next() assert t.name == "PLUS" assert t.value == "+" t = stream.next() assert t.name == "NUMBER" assert t.value == "3" assert t.source_pos.idx == 4 with raises(StopIteration): stream.next()
def test_states(self): lg = LexerGenerator(initial_state="scalar") lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") lg.add("OPEN_BRACKET", r"\[", to_state="vector") lg.add("PLUS", r"\+", state="vector") lg.add("NUMBER", r"\d+", state="vector") lg.add("NEW_LINE", r"\n+", state="vector") lg.add("CLOSE_BRACKET", r"\]", state="vector", to_state="scalar") lg.ignore(r" +", state="vector") l = lg.build() stream = l.lex("2 + [ 3 + 4 \n\n 5 + 6 ] + 7") tokens = [ ("NUMBER", "2", "scalar"), ("PLUS", "+", "scalar"), ("OPEN_BRACKET", "[", "scalar"), ("NUMBER", "3", "vector"), ("PLUS", "+", "vector"), ("NUMBER", "4", "vector"), ("NEW_LINE", "\n\n", "vector"), ("NUMBER", "5", "vector"), ("PLUS", "+", "vector"), ("NUMBER", "6", "vector"), ("CLOSE_BRACKET", "]", "vector"), ("PLUS", "+", "scalar"), ("NUMBER", "7", "scalar"), ] for compare_token, token in zip(tokens, stream): name, value, state = compare_token assert token.name == name assert token.value == value assert token.state == state
lg.add('{', '\{') lg.add('}', '\}') lg.add('|', '\|') lg.add(',', ',') lg.add('DOT', '\.') lg.add('COLON', ':') lg.add('MINUS', '-') lg.add('MUL', '\*') lg.add('DIV', '/') lg.add('MOD', '%') lg.add('(', '\(') lg.add(')', '\)') lg.add('NEWLINE', '\n') # ignore whitespace lg.ignore('[ \t\r\f\v]+') lexer = lg.build() def lex(source): comments = r'(#.*)(?:\n|\Z)' multiline = r'([\s]+)(?:\n)' comment = re.search(comments,source) while comment is not None: start, end = comment.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[end:] #remove string part that was a comment comment = re.search(comments,source)
from datetime import datetime from rply import LexerGenerator, ParserGenerator ISO8601_FORMAT = "%Y-%m-%dT%H:%M:%SZ" lg = LexerGenerator() lg.ignore(r"\s+") lg.ignore(r"\# .*") lg.add("COLON", r":") lg.add("LCURLY", r"\{") lg.add("RCURLY", r"\}") lg.add("LBRACKET", r"\[") lg.add("RBRACKET", r"\]") lg.add("COMMA", r",") lg.add("EQUALS", r"=") lg.add("BOOLEAN", r"true|false") lg.add("DATETIME", r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z") lg.add("FLOAT", r"-?\d+\.\d+") lg.add("INTEGER", r"-?\d+") lg.add("STRING", r'"(\\"|[^"])*"') lg.add("KEY", r"[a-zA-Z_][a-zA-Z0-9_#\?\.]*") lexer = lg.build() pg = ParserGenerator([rule.name for rule in lg.rules], cache_id="libtoml") @pg.production("main : statements") def main(p): return p[0]
from rply import LexerGenerator lg = LexerGenerator() lg.add("STEP", r"s") lg.add("TURN_LEFT", r"l") lg.add("TURN_RIGHT", r"r") lg.add("FUNC", r"a|b|c|d|e|f|g|h|i|j|k|m|n|o|p|q|t|u|v|w|x|y|z") lg.add("COLON", r"\:") lg.add("NEWLINE", r"\n+ *\n*") lg.add("NAME", r"[A-Z]") lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.add("MINUS", r"\-") lg.add("(", r"\(") lg.add(")", r"\)") lg.add(",", r"\,") lg.ignore(r" +") lg.ignore(r"\#.*") TOKENS = [r.name for r in lg.rules] lexer = lg.build()
from rply import LexerGenerator lg = LexerGenerator() lg.add("LPAREN", r"\(") lg.add("RPAREN", r"\)") # lg.add('LBRACKET', r'\[') # lg.add('RBRACKET', r'\]') lg.add("IDENTIFIER", r"[^()\[\]{}\s#]+") lg.ignore(r"#.*(?=\r|\n|$)") lg.ignore(r"\s+") lexer = lg.build()
# Grouping lg.add('LPAREN', r'\(') lg.add('RPAREN', r'\)') lg.add('LBRACKET', r'\[') lg.add('RBRACKET', r'\]') lg.add('LBRACE', r'\{') lg.add('RBRACE', r'\}') # Identifiers lg.add('ID', r'[a-zA-Z_][a-zA-Z_0-9]*') # Literals lg.add('NUMBER', r'\d+') #lg.add('QUOTE', r'\"') lg.ignore(r' ') lg.ignore(r'\n') lg.ignore(r'\t') lg.ignore(r'\#.*') lexer = lg.build() def lex(text): stream = lexer.lex(text) tok = stream.next() while tok is not None: yield tok tok = stream.next()
lg.add("ovec", r"\[") lg.add("cvec", r"\]") lg.add("oset", r"#{") lg.add("colon", r":") lg.add("char_nl", r"\\newline") lg.add("char_tab", r"\\tab") lg.add("char_return", r"\\return") lg.add("char_space", r"\\space") lg.add("char", r"\\.") lg.add("ns_symbol", NS_SYMBOL) lg.add("symbol", SYMBOL_RE) lg.add("string", r'"(\\\^.|\\.|[^\"])*"') lg.add("ns_tag", "#" + NS_SYMBOL) lg.add("tag", "#" + SYMBOL_RE) lg.ignore(r"[\s,\n]+") lg.ignore(r";.*\n") lexer = lg.build() pg = ParserGenerator(["boolean", "nil", "float", "number", "olist", "clist", "omap", "cmap", "ovec", "cvec", "oset", "colon", "char_nl", "char_tab", "char_return", "char_space", "char", "symbol", "ns_symbol", "string", "tag", "ns_tag"]) class Char(TaggedValue): def __init__(self, rep): TaggedValue.__init__(self, 'char', rep) NL = Char('\n') TAB = Char('\t')
lg.add("PLUS", r"\+") lg.add("MINUS", r"-") # expo lg.add("POWER",r"\^") # mul div lg.add("MUL", r"\*") lg.add("DIV", r"/") lg.add("MOD",r"%") # () lg.add("LPAREN",r"\(") lg.add("RPAREN",r"\)") #other = ? lg.add("EQU",r"=") # question put print lg.add("QPUT",r"\?") lg.add("DOT",r"\.") lg.add("COMMA",r",") lg.add("MICRO", r"&") # function #procedure lg.ignore(r"\s+") lg.ignore( r"\s*\*[^\n]*") lg.ignore( r"&&[^\n]*") lexer = lg.build() if __name__=='__main__': print [i.name for i in lg.rules] for i in lexer.lex("12.2+0x12f+0.2+12if+ 'IF' \"HELLO\" <> # != ** ^ ! if ass"): print i for i in lexer.lex("[z]"): print i
def __init__(self): lg = LexerGenerator() tokens = [ ("PROTO", r"[a-zA-Z]+://[^ ]+"), ("INT", r"\d+"), ("STRING", r"'[^']+'|\"[^\"]+\""), ("NAME", r"--colors=always"), ("PATH", r"([a-zA-Z0-9/._-]|\\ )+"), ("PATH", r"~([a-zA-Z0-9/._-]|\\ )*"), ("NAME", r"([a-zA-Z0-9_-]|\\ )+"), ("SEMICOLON", r";"), ("ENDL", r"\r?\n"), ] for token in tokens: lg.add(*token) lg.ignore(r"[ ]+") pg = ParserGenerator([x[0] for x in tokens]) @pg.production("main : statements") def main(args): return args[0] @pg.production("statements : statement") def statements_one(args): expression, = args return { "type": "statement", "content": expression, } @pg.production("statements : statement separator statements") def statements_many(args): statement, separtor, statements = args return { "type": "statement_infix_operator", "content": { "left": { "type": "statement", "content": statement, }, "right": statements, "operator": separtor, } } @pg.production("separator : SEMICOLON") @pg.production("separator : ENDL") def separator(args): # don't care return args[0].value @pg.production("statement : atom") def expression_one(args): atom, = args return [atom] @pg.production("statement : atom atoms") def expression_many(args): atom, atoms = args return [atom] + atoms @pg.production("atoms : atom") def atoms_one(args): atom, = args return [atom] @pg.production("atoms : atom atoms") def atoms_many(args): atom, atoms = args return [atom] + atoms @pg.production("atom : NAME") @pg.production("atom : INT") @pg.production("atom : STRING") @pg.production("atom : PATH") @pg.production("atom : PROTO") def atom(args): name, = args return name.value self.pg = pg self.lg = lg self.lexer = self.lg.build() self.parser = self.pg.build()
#! /usr/bin/env python # -*- coding: utf-8 -*- # >> # LTPyB, 2016 # << from rply import LexerGenerator lg = LexerGenerator() lg.add('INTEGER', r'\-?\d+') lg.add('FLOAT', r'\-?\d+\.\d+') lg.add('OP_ASSIGNMENT', r'=') lg.add('OP_EQUAL', r'==') lg.ignore(r'\s+') # ignore whitespace lg.ignore(r'#.*\n') # ignore comments lexer = lg.build()
def __init__(self): _lg = LexerGenerator() for r in grammar: _lg.add(r[0], r[1]) _lg.ignore(r'\s+') self._scanner = _lg.build()
from rply import LexerGenerator lg = LexerGenerator() lg.add('NUMBER', r'\d+(\.\d+)?') lg.add('PLUS', r'\+') lg.add('MINUS', r'-') lg.add('MUL', r'\*') lg.add('DIV', r'/') lg.add('OPEN_PARENS', r'\(') lg.add('CLOSE_PARENS', r'\)') lg.add('EQUALS', r'=') lg.add('SYMBOL', r'[^\s0-9][^\s]*') lg.ignore(r'\s+') lexer = lg.build()
""":mod:`stencil_lang.matrix.lexer` -- Matrix scanner """ from rply import LexerGenerator from stencil_lang.matrix.tokens import TOKENS, IGNORES lg = LexerGenerator() for rule_name, regex in TOKENS.iteritems(): lg.add(rule_name, regex) for regex in IGNORES: lg.ignore(regex) # This has to be called outside a function because the parser must be generated # in Python during translation, not in RPython during runtime. _lexer = lg.build() """This intepreter's lexer instance.""" def lex(text): """Scan text using the generated lexer. :param text: text to lex :type text: :class:`str` :return: parsed stream :rtype: :class:`rply.lexer.LexerStream` """ return _lexer.lex(text)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote) lg.add('UNQUOTE', r'~%s' % end_quote) lg.add('HASHBANG', r'#!.*[^\r\n]') lg.add('HASHREADER', r'#.') # A regexp which matches incomplete strings, used to support # multi-line strings in the interpreter partial_string = r'''(?x) (?:u|r|ur|ru)? # prefix " # start string (?: | [^"\\] # non-quote or backslash | \\. # or escaped single character | \\x[0-9a-fA-F]{2} # or escaped raw character | \\u[0-9a-fA-F]{4} # or unicode escape | \\U[0-9a-fA-F]{8} # or long unicode escape )* # one or more times ''' lg.add('STRING', r'%s"' % partial_string) lg.add('PARTIAL_STRING', partial_string) lg.add('IDENTIFIER', r'[^()\[\]{}\'"\s;]+') lg.ignore(r';.*(?=\r|\n|$)') lg.ignore(r'\s+') lexer = lg.build()
from rply import LexerGenerator lexer_gen = LexerGenerator() lexer_gen.ignore(r"([\s\f\t\n\r\v]+)|#.*$") def get_tokens(): return [ # Keywords ("IF", r"if\b"), ("PRINT", r"print\b"), ("FN", r"fn\b"), ("WHILE", r"while\b"), ("RETURN", r"return\b"), ("LET", r"let\b"), ("BREAK", r"break\b"), ("CONTINUE", r"continue\b"), ("ASYNC", r"async\b"), # Channel Operators ("CHAN_OUT", r"<:"), ("CHAN_IN", r"<-"), # Arithmetic Operators ("MUL", r"\*"), ("DIV", r"/"), ("MOD", r"%"), ("PLUS", r"\+"), ("MINUS", r"-"), # Logical Operators ("AND", r"and\b"), ("OR", r"or\b"),
from rply import ParserGenerator, LexerGenerator from rply.token import BaseBox import AST, Visitor, Environment, sys lg = LexerGenerator() # skip these lg.ignore(r" ") lg.ignore(r"\r") lg.ignore(r"\t") lg.ignore(r"\n") lg.ignore('// (~["\n","\r"])* ("\r")? "\n"') # built in functions lg.add("FIRST", r"first") lg.add("REST", r"rest") lg.add("INSERT", r"insert") lg.add("LISTP", r"listp") lg.add("LIST", r"list") lg.add("EMPTYP", r"emptyp") lg.add("PAIRP", r"pairp") lg.add("EQUALP", r"equalp") lg.add("LENGTH", r"length") lg.add("NUMBERP", r"numberp") lg.add("EXIT", r"exit") # keywords lg.add("DEFUNC", r"defunc") lg.add("DEFCLASS", r"defclass") lg.add("METHOD", r"method") lg.add("FUNC", r"func")
from rply import LexerGenerator lg = LexerGenerator() lg.add("PLUS", r"\+") lg.add("MINUS", r"-") lg.add("MUL", r"/") lg.add("DIV", r"\*") lg.add("NUMBER", r"\d+") lg.ignore(r"\s+") lexer = lg.build()
import re import itertools from collections import deque from rply import ParserGenerator, LexerGenerator from graphextractor.rfc3987 import UrlPattern from graphextractor.flattened import flattened __all__ = ['TweetLexer', 'TweetParser'] lex = LexerGenerator() lex.ignore(ur'(?:[,;\s]+|\band\b|\bor\b)+') lex.add(u'URL', UrlPattern) lex.add(u'BTHASH', ur'#betterthan') lex.add(u'IBTHASH', ur'#isbetterthan') lex.add(u'HASHTAG', ur'#[a-zA-Z0-9_]+') lex.add(u'MENTION', ur'@[a-zA-Z0-9_]+') lex.add(u'FOR', ur'(for|FOR|For)') lex.add(u'WORD', ur'[\w]+') pg = ParserGenerator([u'URL', u'BTHASH', u'IBTHASH', u'HASHTAG', u'MENTION', u'FOR', u'WORD' ], cache_id=u'graphextractor.tweetparser') @pg.production("betterthan : words URL bthash URL topics words") def betterthan(p):
lg.add('RCURLY', r'\}') lg.add('QUOTE', r'\'%s' % end_quote) lg.add('QUASIQUOTE', r'`%s' % end_quote) lg.add('UNQUOTESPLICE', r'~@%s' % end_quote) lg.add('UNQUOTE', r'~%s' % end_quote) lg.add('HASHBANG', r'#!.*[^\r\n]') lg.add('STRING', r'''(?x) (?:u|r|ur|ru)? # prefix " # start string (?: | [^"\\] # non-quote or backslash | \\. # or escaped single character | \\x[0-9a-fA-F]{2} # or escaped raw character | \\u[0-9a-fA-F]{4} # or unicode escape | \\U[0-9a-fA-F]{8} # or long unicode escape )* # one or more times " # end string ''') lg.add('IDENTIFIER', r'[^()\[\]{}\'"\s;]+') lg.ignore(r';.*[\r\n]+') lg.ignore(r'\s+') lexer = lg.build()
lg = LexerGenerator() SYMBOL_RE = r"[<>\.\*\/\+\!\-\_\?\$%&=a-zA-Z][<>\.\*\+\!\-\_\?\$%&=a-zA-Z0-9:#]*" lg.add("nil", r"nil") lg.add("true", r"true") lg.add("false", r"false") lg.add("float", r"\d+\.\d+") lg.add("number", r"\d+") lg.add("olist", r"\(") lg.add("clist", r"\)") lg.add("symbol", SYMBOL_RE) lg.add("string", r'"(\\\^.|\\.|[^\"])*"') lg.ignore(r"[\s,\r\n\t]+") lg.ignore(r";.*\n") lexer = lg.build() pg = ParserGenerator(["true", "false", "nil", "float", "number", "olist", "clist", "symbol", "string"]) class State(object): def __init__(self): pass class ValueList(BaseBox): def __init__(self, value): self.value = value
lg.add('OPLEQ', r'<=') lg.add('OPGEQ', r'>=') lg.add('OPEQ', r'==') lg.add('OPNEQ', r'!=') lg.add('OPLT', r'<') lg.add('OPGT', r'>') lg.add('BANG', r'!') lg.add('EQUALS', r'=') lg.add('SEMI', r';') lg.add('AT', r'@') lg.add('AMP', r'\&') lg.add('BACKSLASH', r'\\') lg.add('NEWLINE', r'(?:(?:\r?\n)[\t ]*)+') lg.ignore(r'[ \t\f\v]+') lg.ignore(r'#.*(?:\n|\r|\r\n|\n\r|$)') # comment klg = LexerGenerator() klg.add('IMPORT', r'^import$') klg.add('MODULE', r'^module$') klg.add('REQUIRE', r'^require$') klg.add('EXPORT', r'^export$') klg.add('VAR', r'^var$') klg.add('LET', r'^let$') klg.add('DEF', r'^def$') klg.add('DEFM', r'^defm$') klg.add('FN', r'^fn$') klg.add('TRUE', r'^True$') klg.add('FALSE', r'^False$') klg.add('DOC', r'^doc:$')
'MINUS': r'-', 'MUL': r'\*', 'NUMBER_SEP': r'/', 'EXPR_OPEN': r'\(', 'EXPR_CLOSE': r'\)', 'AND': r'&', 'OR': r'\|', 'NOT': r'!', 'EQ': r'\?\s*=', 'GT': r'>', 'LT': r'<', 'BOWL': r':', 'BOWL_OPEN': r'{', 'BOWL_CLOSE': r'}', 'NOODLE_OPEN': r'\[', 'NOODLE_SEP': r';', 'NOODLE_CLOSE': r'\]', 'ASSIGN': r'=', 'DENO': r'\^', 'MEM': r'@', } lg = LexerGenerator() for name, regex in op_map.items(): lg.add(name, regex) lg.ignore('\s+') lg.ignore('~\s*#((?!#~).)*#\s*~') lexer = lg.build()
"""Lexer""" from rply import LexerGenerator from .tokens import TOKENS, IGNORES lg = LexerGenerator() for name, rule in TOKENS.iteritems(): lg.add(name, rule) for rule in IGNORES: lg.ignore(*rule) # This has to be called outside a function because the parser must be generated # in Python during translation, not in RPython during runtime. lexer = lg.build() def lex(text): """Scan text using the generated lexer. :param text: text to lex :type text: :class:`str` :return: parsed stream :rtype: :class:`rply.lexer.LexerStream` """