def test_ignore_recursion(self): lg = LexerGenerator() lg.ignore(r"\s") l = lg.build() assert list(l.lex(" " * 2000)) == []
def test_arithmetic(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.add("TIMES", r"\*") pg = ParserGenerator(["NUMBER", "PLUS", "TIMES"], precedence=[ ("left", ["PLUS"]), ("left", ["TIMES"]), ]) @pg.production("main : expr") def main(p): return p[0] @pg.production("expr : expr PLUS expr") @pg.production("expr : expr TIMES expr") def expr_binop(p): return BoxInt({ "+": operator.add, "*": operator.mul }[p[1].getstr()](p[0].getint(), p[2].getint())) @pg.production("expr : NUMBER") def expr_num(p): return BoxInt(int(p[0].getstr())) lexer = lg.build() parser = pg.build() assert parser.parse(lexer.lex("3*4+5"))
def test_regex_flags(self): lg = LexerGenerator() lg.add("ALL", r".*", re.DOTALL) l = lg.build() stream = l.lex("test\ndotall") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 assert t.getstr() == "test\ndotall" with raises(StopIteration): stream.next()
def test_position(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 3 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 5 with raises(StopIteration): stream.next() stream = l.lex("2 +\n 37") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 3 t = stream.next() assert t.source_pos.lineno == 2 assert t.source_pos.colno == 5 with raises(StopIteration): stream.next()
def test_basic_lexer(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") l = lg.build() def f(n): tokens = l.lex("%d+%d+%d" % (n, n, n)) i = 0 s = 0 while i < 5: t = tokens.next() if i % 2 == 0: if t.name != "NUMBER": return -1 s += int(t.value) else: if t.name != "PLUS": return -2 if t.value != "+": return -3 i += 1 if tokens.next() is not None: return -4 return s assert self.run(f, [14]) == 42
def test_repr(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") assert str(stream) is not None t = stream.next() assert t.name == "NUMBER" assert t.value == "2" assert str(stream) is not None t = stream.next() assert t.name == "PLUS"
def test_regex_flags_ignore(self): lg = LexerGenerator() lg.add("ALL", r".*", re.DOTALL) lg.ignore(r".*", re.DOTALL) l = lg.build() stream = l.lex("test\ndotall") with raises(StopIteration): stream.next()
def test_error(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") l = lg.build() stream = l.lex('fail') with raises(LexingError) as excinfo: stream.next() assert 'SourcePosition(' in repr(excinfo.value)
def test_ignore(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") t = stream.next() assert t.name == "NUMBER" assert t.value == "2" t = stream.next() assert t.name == "PLUS" assert t.value == "+" t = stream.next() assert t.name == "NUMBER" assert t.value == "3" assert t.source_pos.idx == 4 with raises(StopIteration): stream.next()
def test_newline_position(self): lg = LexerGenerator() lg.add("NEWLINE", r"\n") lg.add("SPACE", r" ") l = lg.build() stream = l.lex(" \n ") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 2 t = stream.next() assert t.source_pos.lineno == 2 assert t.source_pos.colno == 1
def test_simple(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") l = lg.build() stream = l.lex("2+3") t = stream.next() assert t.name == "NUMBER" assert t.value == "2" t = stream.next() assert t.name == "PLUS" assert t.value == "+" t = stream.next() assert t.name == "NUMBER" assert t.value == "3" assert t.source_pos.idx == 2 t = stream.next() assert t is None
def __init__(self): self.lexer = LexerGenerator()
from rply import LexerGenerator lg = LexerGenerator() lg.add("STEP", r"s") lg.add("TURN_LEFT", r"l") lg.add("TURN_RIGHT", r"r") lg.add("FUNC", r"a|b|c|d|e|f|g|h|i|j|k|m|n|o|p|q|t|u|v|w|x|y|z") lg.add("COLON", r"\:") lg.add("NEWLINE", r"\n+ *\n*") lg.add("NAME", r"[A-Z]") lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.add("MINUS", r"\-") lg.add("(", r"\(") lg.add(")", r"\)") lg.add(",", r"\,") lg.ignore(r" +") lg.ignore(r"\#.*") TOKENS = [r.name for r in lg.rules] lexer = lg.build()
'PARAM_NAME': r'((in|out|inout)) (\D\w+) ((int|varchar|double|bit|boolean))', 'L_PAREN': r'\(+', 'R_PAREN': r'\)+', 'COMMA': ',+', 'SELECT': r'(select)+', 'FROM': r'(from)+', 'ALL': r'(\*)+', 'UPDATE': r'(update)+', 'VALUES': r'(values)+', 'SEMI': r';+', 'WHERE': r'(where)+', 'SET': r'(set)+', 'OPERATOR': r'(\=|\<\=|\>\=|\>|\<|\<\>|\!\=)+', 'ARITHMETICAL': r'(\+|\-|\/)+', 'SINGLE_QUOTE': '\'+', 'LT': '<+', 'NUMBER': r'\d+', 'ALIAS': r'(as)+', 'OBJECT_NAME': r'[a-z_]+', } lg = LexerGenerator() lg.ignore(r'\s') for tok_name, tok_val in TOKENS.items(): lg.add(tok_name, tok_val)
from rply import ParserGenerator, LexerGenerator lg = LexerGenerator() lg.add("INTEGER", r"-?0|([1-9][0-9]*)") lg.add("DECIMAL", r"\.[0-9]+") _id = r"[A-Za-z][A-Za-z0-9_]*" lg.add("KEYWORD", _id + r":") lg.add("IDENTIFIER", _id) lg.add("SYMBOL", r":" + _id) _comment = r"[ \t]*#[^\n]*" lg.add("COMMENT", _comment) lg.add("LPAREN", r"\([ \t\n]*") lg.add("RPAREN", r"[ \t\n]*\)") # TODO: Maybe clear this up to be prettier. lg.add("PREFACE", r"<[A-Za-z0-9_:@, \t\n]+>[ \t\n]*") lg.add("LBRACK", r"{[ \t\n]*") lg.add("RBRACK", r"[ \t\n]*}") lg.add("VERT", r"\|[ \t\n]*") lg.add("LSQ", r"\[[ \t\n]*") lg.add("RSQ", r"[ \t\n]*\]") lg.add("CONT", r"[ \t]+\\(" + _comment + r")?\n[ \t]*") lg.add("SWS", r"[ \t]+") lg.add("COMMA", ",[ \t\n]*") lg.add("TERMINAL", r"[ \t]*\n[ \t\n]*") # TODO: Make strings parsing not suck dick. lg.add("STRING", r"\"[^\"]*\"") # TODO: Finalize operators lg.add("OPERATOR", r"[+\-=*/\^]")
def lex(): lg = LexerGenerator() # build up a set of token names and regexes they match lg.add('WHITESPACE', r'[ ]+') lg.add('INTEGER', r'-?\d+') lg.add('IF', r'if(?!\w)') lg.add('ELSE', r'else(?!\w)') lg.add('WHILE', r'while(?!\w)') lg.add('FOR', 'for i in range') lg.add('FUNCTION', r'def(?!\w)') lg.add('COLON', ':') lg.add('OPENPAREN', '\(') lg.add('CLOSEPAREN', '\)') lg.add('NEWLINE', r'\n') lg.add('IMPORT', 'from karel import \*') lg.add('BEGIN', 'begin_karel_program') lg.add('END', 'end_karel_program') lg.add('NOT', r'not(?!\w)') #commands lg.add('MOVE', 'move') lg.add('LEFTTURN', 'turn_left') lg.add('PUTBEEPER', 'put_beeper') lg.add('PICKBEEPER', 'pick_beeper') #conditions lg.add('FACENORTH', 'facing_north') lg.add('FACESOUTH', 'facing_south') lg.add('FACEWEST', 'facing_west') lg.add('FACEEAST', 'facing_east') lg.add('FRONTCLEAR', 'front_is_clear') lg.add('LEFTCLEAR', 'left_is_clear') lg.add('RIGHTCLEAR', 'right_is_clear') lg.add('PRESENT', 'beepers_present') lg.add('INBAG', 'beepers_in_bag') lg.add('NOTCHECK', 'not') lg.add('IDENTIFIER', '[a-zA-Z_][a-zA-Z0-9_]*') # ignore whitespace lg.ignore('#.*\n') lg.ignore('"""(.|\n)*?"""') lg.ignore("'''(.|\n)*?'''") lexer = lg.build() return lexer
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Operators self.lexer.add('PLUS', r'\+') self.lexer.add('MINUS', r'\-') self.lexer.add('DIVIDE', r'\\') self.lexer.add('MULTIPLY', r'\*') # Number self.lexer.add('NUMBER', r'\d+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Comma self.lexer.add('COMMA', r'\,') # Point self.lexer.add('POINT', r'\.') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Equal simbol self.lexer.add('EQUAL', r'\=') # Declare self.lexer.add('DECLARE', r'DECLARE') # Number self.lexer.add('NUMBER', r'\d+') # Ignore spaces self.lexer.ignore('\s+') # Funciones # Import self.lexer.add('IMPORT', r'IMPORT') # Enddo self.lexer.add('ENDDO', r'Enddo') # Call self.lexer.add('CALL', r'Call') # Inclination self.lexer.add('INCLI', r'Inclination') # Object self.lexer.add('OBJ', r'Object') # Sounds self.lexer.add('SOUND', r'Sound') # Increase self.lexer.add('INC', r'Inc') # Decrease self.lexer.add('DEC', r'Dec') # Dow self.lexer.add('DOW', r'Dow') # Brightness self.lexer.add('BRIGHT', r'Brightness') # Vibration self.lexer.add('VIB', r'Vibration') # Move self.lexer.add('MOV', r'Move') # Temperature self.lexer.add('TEMP', r'Temperature') # Comment self.lexer.add('COMMENT', r'//') # Times self.lexer.add('TIMES', r'Times') # For Cycle self.lexer.add('FOR', r'For') # End Cycle self.lexer.add('FEND', r'Fend') # Case self.lexer.add('CASE', r'CASE') # When self.lexer.add('WHEN', r'WHEN') # Then self.lexer.add('THEN', r'THEN') # Else self.lexer.add('ELSE', r'ELSE') # End Case self.lexer.add('END_CASE', r'END CASE') # Procedure self.lexer.add('PROCEDURE', r'Procedure') # Begin self.lexer.add('BEGIN', r'begin') # End self.lexer.add('END', r'end') # Main self.lexer.add('MAIN', r'Main') # Text self.lexer.add('TEXT', '[a-zA-Z0-9/]*') def get_lexer(self): self._add_tokens() return self.lexer.build()
# -*- coding:utf-8 -*- from rply import LexerGenerator from rply.token import BaseBox lg = LexerGenerator() # Add takes a rule name, and a regular expression that defines the rule. #lg.add("COMMENT", r"\s*\*[^\n]*") # ([0-9]+)|([0-9]*\.[0-9]+)|(0x[0-9A-Fa-f]+) lg.add("DATE", r"0d[0-9]{8}") lg.add("NUMBER", r"(0x[0-9A-Fa-f]+)|([0-9]*\.[0-9]+)|([0-9]+)") #if lg.add("IF",r"if|IF") #lg.add("THEN",r"then|THEN") lg.add("ELSE",r"ELSE|else") lg.add("ELSEIF","ELSEIF|elseif") lg.add("ENDIF","endif|ENDIF") # do lg.add("DO", "do|DO") # do while lg.add("WHILE",r"while|WHILE") # end do lg.add("ENDDO",r"ENDDO|enddo") # do case lg.add("CASE",r"case|CASE") lg.add("ENDCASE", r"ENDCASE|endcase") # otherwise lg.add("OTHERWISE",r"otherwise|OTHERWISE") # exit lg.add("EXIT",r"exit|EXIT") # for, for each lg.add("FOR",r"for|FOR") lg.add("TO", r"to|TO")
class Lexer: def __init__(self, code): self.code = code self.lg = LexerGenerator() self.lg.ignore(r'\s+') self.lg.add('COMMENT', r';') self.lg.add('STRING', r'".*"') self.lg.add('STRING', r'\'.*\'') self.lg.add('IF', r'if') self.lg.add('ELSE', r'else') self.lg.add('LPAREN', r'\(') self.lg.add('RPAREN', r'\)') self.lg.add('LBRACE', r'\{') self.lg.add('RBRACE', r'\}') self.lg.add('IS_EQUAL_TO', r'==') self.lg.add('EQUAL', r'=') self.lg.add('GREATER_EQUAL', r'>=') self.lg.add('LESSER_EQUAL', r'<=') self.lg.add('LESSER', r'<') self.lg.add('GREATER', r'>') self.lg.add('PLUS', r'-') self.lg.add('MINUS', r'\+') self.lg.add('COMMA', r',') self.lg.add('NUMBER', r'\d+') self.lg.add('PRINT', r'print') self.lg.add('NAME', r'[a-zA-Z_][a-zA-Z0-9_]*') self.lexer = self.lg.build() def lex(self): tokens = [] stream = self.lexer.lex(self.code) while True: try: tokens += [stream.next()] except StopIteration: break return tokens
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): #Print self.lexer.add('OUT', r'out') #If self.lexer.add('IF', r'if') #Parentheses self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') #Braces self.lexer.add('OPEN_BRACE', r'\{') self.lexer.add('CLOSE_BRACE', r'\}') #Semicolon self.lexer.add('SEMI_COLON', r'\;') #Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MULTIPLY', r'\*') #Number self.lexer.add('NUMBER', r'\d+') #Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('NUMBER', r'\d+') # Operators self.lexer.add('PLUS', r'\+') self.lexer.add('MINUS', r'-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'/') # Comp self.lexer.add('BIGGER', r'\>') self.lexer.add('SMALLER', r'\<') self.lexer.add('EQUAL', r'\=') self.lexer.add('DIFF', r'\!=') self.lexer.add('OPEN_PARENS', r'\(') self.lexer.add('CLOSE_PARENS', r'\)') self.lexer.add('OPEN_BRACKETS', r'\{') self.lexer.add('CLOSE_BRACKETS', r'\}') self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('QUOTE', r'\"') # Vars self.lexer.add('ATTRIBUTION', r':=') self.lexer.add('VAR', r'var') # Else self.lexer.add('ELSE', r'SENAO') self.lexer.add('ELSE', r'senao') # If self.lexer.add('IF', r'SE') self.lexer.add('IF', r'se') # Print self.lexer.add('PRINT', r'PRINT') self.lexer.add('PRINT', r'print') # Identifier self.lexer.add('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9]*') self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer: def __init__(self): self.lg = LexerGenerator() self.lg.ignore(r"\s+") self.lg.ignore(r"//.*") self.lg.add("PRINT", r"yazdır") self.lg.add("LOOP", r"tekrar") self.lg.add("READ", r"oku") self.lg.add("IF", r"ise") self.lg.add("ELSE", r"değil") self.lg.add("==", r"==") self.lg.add("!=", r"!=") self.lg.add("<=", r"<=") self.lg.add(">=", r">=") self.lg.add(">", r">") self.lg.add("<", r"<") self.lg.add("+=", r"\+=") self.lg.add("-=", r"\-=") self.lg.add("=", r"=") self.lg.add("STRING", r"'.*'") self.lg.add("FLOAT", r"\d+(\.\d+)") #r"[-]?\d+(\.\d+)" self.lg.add("INTEGER", r"\d+") # [-]?\d+ self.lg.add("BOOLEAN", r"(doğru|yanlış)") self.lg.add("ADD", r"\+") self.lg.add("SUB", r"-") self.lg.add("MUL", r"\*") self.lg.add("DIV", r"\/") self.lg.add("MOD", r"\%") self.lg.add("(", r"\(") self.lg.add(")", r"\)") self.lg.add("[", r"\[") self.lg.add("]", r"\]") self.lg.add("{", r"\{") self.lg.add("}", r"\}") self.lg.add(",", r",") self.lg.add("IDENTIFIER", r"[_\w]*[_\w0-9]+") def build(self): return self.lg.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('PRINT', r'print') self.lexer.add('PROGRAM', r'program') self.lexer.add('TYPE', r'type') self.lexer.add('VAR', r'var') self.lexer.add('ARRAY', r'array') self.lexer.add('OF', r'of') self.lexer.add('FOR', r'for') self.lexer.add('TO', r'to') self.lexer.add('IF', r'if') self.lexer.add('ELSE', r'else') self.lexer.add('THEN', r'then') self.lexer.add('AND', r'and') self.lexer.add('OR', r'or') self.lexer.add('NOT', r'not') self.lexer.add('BEGIN', r'begin') self.lexer.add('END', r'end') self.lexer.add('ITYPE', r'integer') self.lexer.add('FTYPE', r'double') self.lexer.add('DO', r'do') self.lexer.add('BTYPE', r'boolean') self.lexer.add('FUNCTION', r'function') self.lexer.add('BOOLEAN', r'(true|false)') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Brackets self.lexer.add('OPEN_BRACKET', r'\[') self.lexer.add('CLOSE_BRACKET', r'\]') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Assignment self.lexer.add('ASSIGNMENT', r'\:\=') self.lexer.add('COLON', r'\:') # Delimiters self.lexer.add('COMMA', r'\,') self.lexer.add('RANGE', r'\.\.') self.lexer.add('DOT', r'\.') # Operators self.lexer.add('EQUAL', r'\=') self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') self.lexer.add('POINTER', r'\^') self.lexer.add('NEQUAL', r'\<\>') self.lexer.add('LESS', r'\<') self.lexer.add('MORE', r'\>') # Number self.lexer.add('FTYPE', r'[0-9]+\.[0-9]+') self.lexer.add('ITYPE', r'\d+') # Ignore spaces self.lexer.add('ID', r'\w+') self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
from rply import LexerGenerator lg = LexerGenerator() lg.add('NUMBER', r'\d+(\.\d+)?') lg.add('PLUS', r'\+') lg.add('MINUS', r'-') lg.add('MUL', r'\*') lg.add('DIV', r'/') lg.add('OPEN_PARENS', r'\(') lg.add('CLOSE_PARENS', r'\)') lg.add('EQUALS', r'=') lg.add('SYMBOL', r'[^\s0-9][^\s]*') lg.ignore(r'\s+') lexer = lg.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('DATA_TYPE', r'(?:double|int|str|bool)(?!\w)') self.lexer.add('DOUBLE', r'-?\d+\.\d+') self.lexer.add('INTEGER', r'-?\d+') self.lexer.add('STRING', r'(?<=")(.+)(?=")') self.lexer.add('BOOL', r"true(?!\w)|false(?!\w)") self.lexer.add('IF', r'if(?!\w)') self.lexer.add('ELSE', r'else(?!\w)') self.lexer.add('AND', r"&&") self.lexer.add('OR', r"\|\|") self.lexer.add('NOT', r"!") self.lexer.add('VAR', r'var(?!\w)') self.lexer.add('FUNCTION', r'func(?!\w)') self.lexer.add('DELETE', r'delete(?!\w)') self.lexer.add('CAST', r'cast(?!\w)') self.lexer.add('PRINT', r'print(?!\w)') self.lexer.add('LOOP', r'loop(?!\w)') self.lexer.add('USING', r'using(?!\w)') self.lexer.add('IDENTIFIER', r'[a-zA-Z_]+[a-zA-Z0-9_]+') self.lexer.add('DOUBLE_EQUAL', r'==') self.lexer.add('NOT_EQUAL', r'!=') self.lexer.add('GTE', r'>=') self.lexer.add('LTE', r'<=') self.lexer.add('GT', r'>') self.lexer.add('LT', r'<') self.lexer.add('EQUAL', r'=') self.lexer.add('LBRACKET', r'\[') self.lexer.add('RBRACKET', r'\]') self.lexer.add('LBRACE', r'\{') self.lexer.add('RBRACE', r'\}') self.lexer.add('PIPE', r'\|') self.lexer.add('COMMA', r'\,') self.lexer.add('SEMICOLON', r';') self.lexer.add('DOT', r'\.') self.lexer.add('COLON', r'\:') self.lexer.add('PLUS', r'\+') self.lexer.add('MINUS', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') self.lexer.add('MOD', r'\%') self.lexer.add('LPAREN', r'\(') self.lexer.add('RPAREN', r'\)') self.lexer.add('DQUOTE', r'"') # ignore whitespace self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
#! /usr/bin/env python # -*- coding: utf-8 -*- # >> # LTPyB, 2016 # << from rply import LexerGenerator lg = LexerGenerator() lg.add('INTEGER', r'\-?\d+') lg.add('FLOAT', r'\-?\d+\.\d+') lg.add('OP_ASSIGNMENT', r'=') lg.add('OP_EQUAL', r'==') lg.ignore(r'\s+') # ignore whitespace lg.ignore(r'#.*\n') # ignore comments lexer = lg.build()
# Copyright 2017 the authors. # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. from rply import LexerGenerator lg = LexerGenerator() # A regexp for something that should end a quoting/unquoting operator # i.e. a space or a closing brace/paren/curly end_quote = r'(?![\s\)\]\}])' identifier = r'[^()\[\]{}\'"\s;]+' lg.add('LPAREN', r'\(') lg.add('RPAREN', r'\)') lg.add('LBRACKET', r'\[') lg.add('RBRACKET', r'\]') lg.add('LCURLY', r'\{') lg.add('RCURLY', r'\}') lg.add('HLCURLY', r'#\{') lg.add('QUOTE', r'\'%s' % end_quote) lg.add('QUASIQUOTE', r'`%s' % end_quote) lg.add('UNQUOTESPLICE', r'~@%s' % end_quote) lg.add('UNQUOTE', r'~%s' % end_quote) lg.add('HASHSTARS', r'#\*+') lg.add('HASHOTHER', r'#%s' % identifier) # A regexp which matches incomplete strings, used to support
lexer = lg.build() parser = pg.build() class BoxInt(BaseBox): def __init__(self, value): self.value = value def getint(self): return self.value ''' from rply import ParserGenerator, LexerGenerator from rply.token import BaseBox lexgen = LexerGenerator() lexgen.add('AND', r"(and)") lexgen.add('WITHOUT', r"(without)") lexgen.add('DIVIDE', r"(divide)") lexgen.add('MULTIPLY', ) keywords = { "return": Keyword("RETURN", "RETURN", EXPR_MID), "if": Keyword("IF", "IF_MOD", EXPR_BEG), "unless": Keyword("UNLESS", "UNLESS_MOD", EXPR_BEG), "then": Keyword("THEN", "THEN", EXPR_BEG), "elsif": Keyword("ELSIF", "ELSIF", EXPR_BEG), "else": Keyword("ELSE", "ELSE", EXPR_BEG),
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Various types self.lexer.add('INTTYPE', r'\b[u]{0,1}int(8|16|32|64|)\b') self.lexer.add('FLOATTYPE', r'\bfloat(32|64|)\b') self.lexer.add('VOID', r'\bvoid\b') self.lexer.add('STRINGTYPE', r'\bString\b') self.lexer.add('BOOL', r'\bbool\b') # Other reserved words self.lexer.add('IF', r'\bif\b') self.lexer.add('ELSE', r'\belse\b') self.lexer.add('WHILE', r'\bwhile\b') self.lexer.add('FOR', r'\bfor\b') self.lexer.add('CLASS', r'\bclass\b') self.lexer.add('OPERATOR', r'\boperator\b') self.lexer.add('MUTABLE', r'\bmutable\b') self.lexer.add('PUBLIC', r'\bpublic\b') self.lexer.add('PRIVATE', r'\bprivate\b') self.lexer.add('IMPORT', r'\bimport\b') self.lexer.add('RETURN', r'\breturn\b') self.lexer.add('EXTENDS', r'\bextends\b') self.lexer.add('NEW', r'\bnew\b') # Name of function or variable self.lexer.add('NAME', r'[a-zA-Z_$][a-zA-Z_$0-9]*') # Number self.lexer.add('DOUBLE', r'\d+[.]\d+') self.lexer.add('INTEGER', r'\d+') # Parenthesis, Curly Braces, and Brackets self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('OPEN_CURLY', r'\{') self.lexer.add('CLOSE_CURLY', r'\}') self.lexer.add('OPEN_BRACKET', r'\[') self.lexer.add('CLOSE_BRACKET', r'\]') # Syntax Helper Bytes self.lexer.add('SEMICOLON', r'\;') self.lexer.add('COMMA', r'\,') self.lexer.add('CHAR', r"\'([^\\\n\r]|\\[rnft\'])\'") self.lexer.add('STRING', r'\"[^"]*\"') # Operators self.lexer.add('!=', '!=') self.lexer.add('==', '==') self.lexer.add('>=', '>=') self.lexer.add('<=', '<=') self.lexer.add('++', '[+][+]') self.lexer.add('--', '[-][-]') self.lexer.add('+=', '[+]=') self.lexer.add('-=', '[-]=') self.lexer.add('*=', '[*]=') self.lexer.add('/=', '[/]=') self.lexer.add('>', '[>]') self.lexer.add('<', '[<]') self.lexer.add('>>', '[>][>]') self.lexer.add('<<', '[<][<]') self.lexer.add('>>=', '>>=') self.lexer.add('<<=', '<<=') self.lexer.add('^^', r'\^\^') self.lexer.add('&&', r'\&\&') self.lexer.add('||', r'\|\|') self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') self.lexer.add('XOR', r'\^') self.lexer.add('AND', r'\&') self.lexer.add('OR', r'\|') self.lexer.add('EQ', r'\=') self.lexer.add('MOD', r'\%') self.lexer.add('MEMBER', r'\.') self.lexer.add('NOT', r'\!') # the rest self.lexer.add('COLON', r'\:') # Ignore comments self.lexer.ignore(r'\/\*.*\*\/') self.lexer.ignore(r'\/\/.*(\n|\r)') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
# Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. from rply import LexerGenerator lg = LexerGenerator() # A regexp for something that should end a quoting/unquoting operator # i.e. a space or a closing brace/paren/curly end_quote = r'(?![\s\)\]\}])' lg.add('LPAREN', r'\(') lg.add('RPAREN', r'\)') lg.add('LBRACKET', r'\[') lg.add('RBRACKET', r'\]') lg.add('LCURLY', r'\{') lg.add('RCURLY', r'\}') lg.add('QUOTE', r'\'%s' % end_quote) lg.add('QUASIQUOTE', r'`%s' % end_quote) lg.add('UNQUOTESPLICE', r'~@%s' % end_quote) lg.add('UNQUOTE', r'~%s' % end_quote)
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Begin self.lexer.add('BEGIN', r'Begin') # Nucleo self.lexer.add('NUCLEO', r'Nucleo') # Si self.lexer.add('SI', r'Si') # Imprimir self.lexer.add('IMPRIMIR', r'Imprimir') # Fin self.lexer.add('FIN', r'Fin') # Parentesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('IDENTIFICADOR', r'[a-z]+[0-9]*') # Operators self.lexer.add('IGUAL', r'\=') # self.lexer.add('COMPARACION', r'\=\=')) self.lexer.add('MAS', r'\+') # Number self.lexer.add('INT', r'\d+') # String self.lexer.add('STRING', r'".*"') # self.lexer.add('STRING', r'^\"[a-z]+\"$') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('LITERAL', r'"[^\"]*"') self.lexer.add('AND', r'and') self.lexer.add('NOT', r'not') self.lexer.add('OR', r'or') self.lexer.add('IF', r'if') self.lexer.add('THEN', r'then') self.lexer.add('ELSE', r'else') self.lexer.add('WHILE', r'while') self.lexer.add('DO', r'do') self.lexer.add('BREAK', r'break') self.lexer.add('CONTINUE', r'continue') self.lexer.add('BEGIN', r'\{') self.lexer.add('END', r'\}') self.lexer.add('INTEGER', r'integer') self.lexer.add('FLOAT', r'float') self.lexer.add('FUNCTION', r'function') self.lexer.add('VAR', r'var') self.lexer.add('PROGRAM', r'program') self.lexer.add('LEQUAL', r'\<=') self.lexer.add('GEQUAL', r'\>=') self.lexer.add('NOT_EQUAL', r'\!=') self.lexer.add('EQUALS', r'\:=') self.lexer.add('PRINT', r'print') self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('COLON', r'\:') self.lexer.add('COMMA', r'\,') self.lexer.add('EQUAL', r'\=') self.lexer.add('GTHAN', r'\>') self.lexer.add('LTHAN', r'\<') self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') self.lexer.add('NUMBER', r'[0-9]+(\.[0-9]+)?') self.lexer.add('ID', r'[a-zA-Z]*') self.lexer.ignore(r'#[^\#]*#') self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
def lexer_from_mapping(mapping): lg = LexerGenerator() # Escape data with forward slashes lg.add("DATA", r'/.+?/') # Add the special characters for char in mapping.keys(): lg.add(char, r"\\" + char) # Normal tokens lg.add("TYPE", r':') lg.add("AND", r'\&') lg.add("OR", r'\|') lg.add("L_PAREN", r'\(') lg.add("R_PAREN", r'\)') lg.add("EQUAL", r'=') lg.add("CHILD", r'>') lg.add("PARENT", r'<') lg.add("NOT", r'!') # Everything else is data excluded_chars = r'^<>=&|():!' for char in mapping.keys(): excluded_chars += r"\\" + char lg.add("DATA", "[{excluded}]+".format(excluded=excluded_chars)) lg.ignore(r'\s+') lexer = lg.build() return lexer
def __init__(self): _lg = LexerGenerator() for r in grammar: _lg.add(r[0], r[1]) _lg.ignore(r'\s+') self._scanner = _lg.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('IMPRIMA', r'mostra_ai') #modificado heuhue self.lexer.add('ABRE_PAR', r'\(') self.lexer.add('FECHA_PAR', r'\)') self.lexer.add('PONTO_VIRGULA', r'\;') self.lexer.add('SOMA', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('NUM', r'\d+') self.lexer.add('MULT', r'\*') #adicionado self.lexer.add('DIV', r'\/') #adicionado self.lexer.add('POT',r'\^') #adicionado self.lexer.add('REST',r'\%') #adicionado self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') # Number self.lexer.add('NUMBER', r'[-+]?[0-9]*\.?[0-9]+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') self.lexer.add('VAL', r'val') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('EQUAL', r'\=') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\\') # Number self.lexer.add('NUMBER', r'\d+') self.lexer.add('TEXT', r'[A-Za-z_0-9]*') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
'MINUS': r'-', 'MUL': r'\*', 'NUMBER_SEP': r'/', 'EXPR_OPEN': r'\(', 'EXPR_CLOSE': r'\)', 'AND': r'&', 'OR': r'\|', 'NOT': r'!', 'EQ': r'\?\s*=', 'GT': r'>', 'LT': r'<', 'BOWL': r':', 'BOWL_OPEN': r'{', 'BOWL_CLOSE': r'}', 'NOODLE_OPEN': r'\[', 'NOODLE_SEP': r';', 'NOODLE_CLOSE': r'\]', 'ASSIGN': r'=', 'DENO': r'\^', 'MEM': r'@', } lg = LexerGenerator() for name, regex in op_map.items(): lg.add(name, regex) lg.ignore('\s+') lg.ignore('~\s*#((?!#~).)*#\s*~') lexer = lg.build()
class Syntax: def __init__(self) -> None: self.lg = LexerGenerator() def Build(self): self.lg.add(";", ";") self.lg.add(".", "\.") self.lg.add(",", ",") self.lg.add("(", "\(") self.lg.add(")", "\)") self.lg.add("{", "\{") self.lg.add("}", "\}") self.lg.add("[", "\[") self.lg.add("]", "\]") self.lg.add("=", "\=") self.lg.add("->", "\-\>") self.lg.add("*", "\*") self.lg.add("STRING", '["]([^"\\\n]|\\.|\\\n)*["]') self.lg.add("&", "\&") self.lg.add("*", "\*") self.lg.add("@", "\@") self.lg.add("NUMBER", "[-]*[0-9]+") self.lg.add("STRUCT", "struct ") self.lg.add("FN", "fn ") self.lg.add("RETURN", "return ") self.lg.add("TO", "to ") self.lg.add("IDENTIFIER", "[_\w][_\w0-9]*") self.lg.ignore("\s+") return self.lg.build()
def test_states(self): lg = LexerGenerator(initial_state="scalar") lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") lg.add("OPEN_BRACKET", r"\[", to_state="vector") lg.add("PLUS", r"\+", state="vector") lg.add("NUMBER", r"\d+", state="vector") lg.add("NEW_LINE", r"\n+", state="vector") lg.add("CLOSE_BRACKET", r"\]", state="vector", to_state="scalar") lg.ignore(r" +", state="vector") l = lg.build() stream = l.lex("2 + [ 3 + 4 \n\n 5 + 6 ] + 7") tokens = [ ("NUMBER", "2", "scalar"), ("PLUS", "+", "scalar"), ("OPEN_BRACKET", "[", "scalar"), ("NUMBER", "3", "vector"), ("PLUS", "+", "vector"), ("NUMBER", "4", "vector"), ("NEW_LINE", "\n\n", "vector"), ("NUMBER", "5", "vector"), ("PLUS", "+", "vector"), ("NUMBER", "6", "vector"), ("CLOSE_BRACKET", "]", "vector"), ("PLUS", "+", "scalar"), ("NUMBER", "7", "scalar"), ] for compare_token, token in zip(tokens, stream): name, value, state = compare_token assert token.name == name assert token.value == value assert token.state == state
from rply import LexerGenerator 分词器母机 = LexerGenerator() 分词器母机.add('数', r'\d+') 分词器母机.add('加', r'\+') 分词器母机.add('减', r'-') 分词器 = 分词器母机.build() for 词 in 分词器.lex('1+1-1'): print(词)
from rply import ParserGenerator, LexerGenerator import box lg = LexerGenerator() lg.add("LPAREN", r"\(") lg.add("RPAREN", r"\)") lg.add("QUOTE", r"'") lg.add("ATOM", r"[^\s()]+") lg.ignore(r"\s+") pg = ParserGenerator(["QUOTE", "LPAREN", "RPAREN", "ATOM"], precedence=[], cache_id="wasp") @pg.error def error_handler(token): type = token.gettokentype() pos = token.getsourcepos() if pos is None: raise ValueError("unexpected %s" % type) else: raise ValueError("unexpected %s at (%s, %s)" % (type, pos.lineno, pos.colno)) @pg.production("main : sexpr") def main(p): return p[0]
# # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. from rply import LexerGenerator lg = LexerGenerator() # A regexp for something that should end a quoting/unquoting operator # i.e. a space or a closing brace/paren/curly end_quote = r'(?![\s\)\]\}])' lg.add('LPAREN', r'\(') lg.add('RPAREN', r'\)') lg.add('LBRACKET', r'\[') lg.add('RBRACKET', r'\]') lg.add('LCURLY', r'\{') lg.add('RCURLY', r'\}') lg.add('QUOTE', r'\'%s' % end_quote) lg.add('QUASIQUOTE', r'`%s' % end_quote)
def __init__(self): lg = LexerGenerator() tokens = [ ("PROTO", r"[a-zA-Z]+://[^ ]+"), ("INT", r"\d+"), ("STRING", r"'[^']+'|\"[^\"]+\""), ("NAME", r"--colors=always"), ("PATH", r"([a-zA-Z0-9/._-]|\\ )+"), ("PATH", r"~([a-zA-Z0-9/._-]|\\ )*"), ("NAME", r"([a-zA-Z0-9_-]|\\ )+"), ("SEMICOLON", r";"), ("ENDL", r"\r?\n"), ] for token in tokens: lg.add(*token) lg.ignore(r"[ ]+") pg = ParserGenerator([x[0] for x in tokens]) @pg.production("main : statements") def main(args): return args[0] @pg.production("statements : statement") def statements_one(args): expression, = args return { "type": "statement", "content": expression, } @pg.production("statements : statement separator statements") def statements_many(args): statement, separtor, statements = args return { "type": "statement_infix_operator", "content": { "left": { "type": "statement", "content": statement, }, "right": statements, "operator": separtor, } } @pg.production("separator : SEMICOLON") @pg.production("separator : ENDL") def separator(args): # don't care return args[0].value @pg.production("statement : atom") def expression_one(args): atom, = args return [atom] @pg.production("statement : atom atoms") def expression_many(args): atom, atoms = args return [atom] + atoms @pg.production("atoms : atom") def atoms_one(args): atom, = args return [atom] @pg.production("atoms : atom atoms") def atoms_many(args): atom, atoms = args return [atom] + atoms @pg.production("atom : NAME") @pg.production("atom : INT") @pg.production("atom : STRING") @pg.production("atom : PATH") @pg.production("atom : PROTO") def atom(args): name, = args return name.value self.pg = pg self.lg = lg self.lexer = self.lg.build() self.parser = self.pg.build()
import re import itertools from collections import deque from rply import ParserGenerator, LexerGenerator from graphextractor.rfc3987 import UrlPattern from graphextractor.flattened import flattened __all__ = ['TweetLexer', 'TweetParser'] lex = LexerGenerator() lex.ignore(ur'(?:[,;\s]+|\band\b|\bor\b)+') lex.add(u'URL', UrlPattern) lex.add(u'BTHASH', ur'#betterthan') lex.add(u'IBTHASH', ur'#isbetterthan') lex.add(u'HASHTAG', ur'#[a-zA-Z0-9_]+') lex.add(u'MENTION', ur'@[a-zA-Z0-9_]+') lex.add(u'FOR', ur'(for|FOR|For)') lex.add(u'WORD', ur'[\w]+') pg = ParserGenerator([u'URL', u'BTHASH', u'IBTHASH', u'HASHTAG', u'MENTION', u'FOR', u'WORD' ], cache_id=u'graphextractor.tweetparser') @pg.production("betterthan : words URL bthash URL topics words") def betterthan(p):
from rply import LexerGenerator lg = LexerGenerator() lg.add("PLUS", r"\+") lg.add("MINUS", r"-") lg.add("MUL", r"/") lg.add("DIV", r"\*") lg.add("NUMBER", r"\d+") lg.ignore(r"\s+") lexer = lg.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): #Select self.lexer.add('SELECT', r'select') #From self.lexer.add('FROM', r'from') #Where self.lexer.add('WHERE', r'where') #Separador self.lexer.add('SEP', r',') #Boxplot self.lexer.add('BOXPLOT', r'boxplot') #Strings alfanumericas self.lexer.add('STR', r'\w+') # # #Col_select # self.lexer.add('COL_SEL',r'\w+') # # #data_from # self.lexer.add('DAT_FR',r'\w+') # # #Col_xbox # self.lexer.add('COL_XBOX',r'\w+') # # #Col_ybox # self.lexer.add('COL_YBOX',r'\w+') #Maior self.lexer.add('GRT', r'>') #Menor self.lexer.add('LSS', r'<') #Igual self.lexer.add('EQ', r'=') #Numero self.lexer.add('NUM', r'\d') #Ignorar espaços em branco self.lexer.ignore(r'\s+') #Ignorar comentários #de uma linha (#) self.lexer.ignore('#(.)*\n') #de multiplas linhas (/* */) self.lexer.ignore('/\*(?s).*\*/') def get_lexer(self): self._add_tokens() return self.lexer.build()
from rply import LexerGenerator lg = LexerGenerator() lg.add("ELLIPSIS", r"\.\.\.") lg.add("NUMBER", r"\d+") lg.add("EQUALS", r"=") lg.add("WORD", r"[a-z]+") lg.ignore(r"\s+") # Ignore whitespace lg.ignore(r"#.*\n") # Ignore comments lexer = lg.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Parenthesis self.lexer.add('OPEN_PAR', r'\(') self.lexer.add('CLOSE_PAR', r'\)') self.lexer.add('OPEN_KEY', r'\{') self.lexer.add('CLOSE_KEY', r'\}') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Comma self.lexer.add('COMMA', r'\,') # Quote self.lexer.add('QUOTE', r'\"') # Dots self.lexer.add('DOTS', r'\:') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'-') self.lexer.add('MULT', r'\*') self.lexer.add('DIV', r'/') # Boolean Operators self.lexer.add('OR', r'\//') self.lexer.add('AND', r'\&&') self.lexer.add('NOT', r'\!') # Relational Operators self.lexer.add('BIGGER_THAN', r'\>') self.lexer.add('SMALLER_THAN', r'\<') self.lexer.add('EQUAL_TO', r'\==') self.lexer.add('DIFF', r'\!=') self.lexer.add('ASSIGN', r'=') # Number self.lexer.add('NUMBER', r'\d+') # Types self.lexer.add('INT', r'IN') self.lexer.add('CHAR', r'CH') self.lexer.add('VOID', r'VO') # While self.lexer.add('WHILE', r'W') # If - else self.lexer.add('IF', r'I') self.lexer.add('ELSE', r'E') # Print self.lexer.add('PRINT', r'P') # Scanf self.lexer.add('SCANF', r'S') # Function self.lexer.add('FUNC', r'F') # Main self.lexer.add('MAIN', r'M') # Identifier self.lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*") # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
qualified = WORD , OP , string ; unqualified = string ; quick = QUICK , string ; variable = DOLLAR , string ; combination = query , [ ( AND | OR ) ] , query ; not = NOT | MINUS ; inverted_query = NOT , query ; subquery = LPAREN , query , RPAREN ; string = ( WORD | STRING ) ; """ lg = LexerGenerator() lg.add('AND', r'AND') lg.add('OR', r'OR') lg.add('NOT', r'NOT') lg.add('WORD', r'[^:"\'()\s=~<>\-#@/$][^:)\s=~<>]*') lg.add('STRING', r'([\'"])(?:(?!\1|\\).|\\.)*\1') lg.add('MINUS', r'-') lg.add('LPAREN', r'\(') lg.add('RPAREN', r'\)') lg.add('OP', r'[:=<>~]') lg.add('QUICK', r'[#@/]') lg.add('DOLLAR', r'\$') lg.ignore(r'\s+') pg = ParserGenerator([rule.name for rule in lg.rules],
from rply import LexerGenerator lexer_gen = LexerGenerator() lexer_gen.ignore(r"([\s\f\t\n\r\v]+)|#.*$") def get_tokens(): return [ # Keywords ("IF", r"if\b"), ("PRINT", r"print\b"), ("FN", r"fn\b"), ("WHILE", r"while\b"), ("RETURN", r"return\b"), ("LET", r"let\b"), ("BREAK", r"break\b"), ("CONTINUE", r"continue\b"), ("ASYNC", r"async\b"), # Channel Operators ("CHAN_OUT", r"<:"), ("CHAN_IN", r"<-"), # Arithmetic Operators ("MUL", r"\*"), ("DIV", r"/"), ("MOD", r"%"), ("PLUS", r"\+"), ("MINUS", r"-"), # Logical Operators ("AND", r"and\b"), ("OR", r"or\b"),
def __init__(self) -> None: self.lg = LexerGenerator()
""":mod:`stencil_lang.matrix.lexer` -- Matrix scanner """ from rply import LexerGenerator from stencil_lang.matrix.tokens import TOKENS, IGNORES lg = LexerGenerator() for rule_name, regex in TOKENS.iteritems(): lg.add(rule_name, regex) for regex in IGNORES: lg.ignore(regex) # This has to be called outside a function because the parser must be generated # in Python during translation, not in RPython during runtime. _lexer = lg.build() """This intepreter's lexer instance.""" def lex(text): """Scan text using the generated lexer. :param text: text to lex :type text: :class:`str` :return: parsed stream :rtype: :class:`rply.lexer.LexerStream` """ return _lexer.lex(text)
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Reserved Words self.lexer.add('FROM', r'from') self.lexer.add('IMPORT', r'import') self.lexer.add('AS', r'as\ ') self.lexer.add('LET', r'let') # Other characters self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('OPEN_CURLY_BRACKET', r'\{') self.lexer.add('CLOSE_CURLY_BRACKET', r'\}') self.lexer.add('OPEN_BRACKET', r'\[') self.lexer.add('CLOSE_BRACKET', r'\]') self.lexer.add('FORWARD_SLASH', r'\/') #self.lexer.add('BACKWARD_SLASH', r'\\') self.lexer.add('COLON', r'\:') self.lexer.add('SEMICOLON', r'\;') self.lexer.add('COMMA', r'\,') self.lexer.add('PERIOD', r'\.') #self.lexer.add('DOLLAR_SIGN', r'$') #self.lexer.add('HASH', r'\#') self.lexer.add('EQUAL', r'=') self.lexer.add('TILDE', r'~') self.lexer.add('EXCLAMATION', r'!') self.lexer.add('NUMBER', r'-?\d+') self.lexer.add('AMINO_ACID_SEQUENCE', r'\$[ACDEFGHIKLMNPQRSTVWY\*]+') self.lexer.add('IDENTIFIER', r'\w[\w\d\_\-]*') # Ignore spaces self.lexer.ignore('\s+') # Comments - Ignore multiline comments in c syntax # Example: /* This is a comment! */ self.lexer.ignore(r'/\*([\s\S]*?)\*/\s*') # Comments - Ignore remainder of line starting with "#". self.lexer.ignore(r'#.*\n') def get_lexer(self): self._add_tokens() return self.lexer.build()
from __future__ import print_function import re import ast import collections from transit.transit_types import Keyword, Symbol, TaggedValue, List, Vector import transit.transit_types transit_true = transit.transit_types.true transit_false = transit.transit_types.false from rply import ParserGenerator, LexerGenerator lg = LexerGenerator() SYMBOL_RE = r"[\.\*\+\!\-\_\?\$%&=a-zA-Z][\.\*\+\!\-\_\?\$%&=a-zA-Z0-9:#]*" NS_SYMBOL = SYMBOL_RE + "/" + SYMBOL_RE lg.add("boolean", r"(true|false)") lg.add("nil", r"nil") lg.add("float", r"\d+\.\d+") lg.add("number", r"[-+]?\d+") lg.add("olist", r"\(") lg.add("clist", r"\)") lg.add("omap", r"{") lg.add("cmap", r"}") lg.add("ovec", r"\[") lg.add("cvec", r"\]") lg.add("oset", r"#{") lg.add("colon", r":")
from rply import LexerGenerator lg = LexerGenerator() lg.add('number', r'\-?[0-9]+') lg.add('add', r'\+') lg.add('sub', r'\-') lg.add('ioperator', r'(==|\>=|\<=|\!=|>|<)') lg.add('equals', r'\=') lg.add('private', r'local') lg.add('if', r'if') lg.add('for', r'for') lg.add('def', r'def') lg.add('class', r'class') lg.add('new', r'new') lg.add('leftbracket', r'\{') lg.add('rightbracket', r'\}') lg.add('end', r'end') lg.add('parenth', r'\(.*?(\)|\s)+') lg.add('variablenam', r'[A-Za-z0-9_.:]+') lg.add('string', r'".+"') lg.ignore(r'\s+') l = lg.build()
from rply import LexerGenerator lg = LexerGenerator() lg.add("LPAREN", r"\(") lg.add("RPAREN", r"\)") # lg.add('LBRACKET', r'\[') # lg.add('RBRACKET', r'\]') lg.add("IDENTIFIER", r"[^()\[\]{}\s#]+") lg.ignore(r"#.*(?=\r|\n|$)") lg.ignore(r"\s+") lexer = lg.build()
# Copyright 2019 the authors. # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. from rply import LexerGenerator lg = LexerGenerator() # A regexp for something that should end a quoting/unquoting operator # i.e. a space or a closing brace/paren/curly end_quote_set = r'\s\)\]\}' end_quote = r'(?![%s])' % end_quote_set identifier = r'[^()\[\]{}\'"\s;]+' lg.add('LPAREN', r'\(') lg.add('RPAREN', r'\)') lg.add('LBRACKET', r'\[') lg.add('RBRACKET', r'\]') lg.add('LCURLY', r'\{') lg.add('RCURLY', r'\}') lg.add('HLCURLY', r'#\{') lg.add('QUOTE', r'\'%s' % end_quote) lg.add('QUASIQUOTE', r'`%s' % end_quote) lg.add('UNQUOTESPLICE', r'~@%s' % end_quote) lg.add('UNQUOTE', r'~%s' % end_quote) lg.add('ANNOTATION', r'\^(?![=%s])' % end_quote_set) lg.add('DISCARD', r'#_') lg.add('HASHSTARS', r'#\*+') lg.add( 'BRACKETSTRING', r'''(?x)