def lexer(): lg = rply.LexerGenerator() lg.add('LPAREN', r'\(') lg.add('RPAREN', r'\)') lg.add('LBRACE', r'{') lg.add('RBRACE', r'}') lg.add('LBRACKET', r'\[') lg.add('RBRACKET', r'\]') lg.add('COMBINE', r'\|=|&=') lg.add('EQUAL', r'=') lg.add('PIPE', r'[|]') lg.add('COMMA', r',') lg.add('AMP', r'&') lg.add('MINUS', r'[-]') lg.add('STAR', r'[*]') lg.add('PLUS', r'[+]') lg.add('QMARK', r'[?]') lg.add('CNAME', r'%s:(%s|\*)' % (NCNAME, NCNAME)) lg.add('QID', r'\\%s' % NCNAME) lg.add('ID', NCNAME) lg.add('LITERAL', r'".*?"') lg.add('DOCUMENTATION', r'##.*') lg.add('COMMENT', r'#.*') lg.add('TILDE', r'~') lg.ignore(r'\s+') return lg.build()
def _pass1(self, source): """calculates label addresses and removes labels from source""" LabelLexer = rply.LexerGenerator() LabelLexer.add("LABEL_DEF", "[_A-Za-z][_A-Za-z0-9]+" + self.LABEL_DEF_CHAR) LabelLexer.add("LABEL_USE", self.LABEL_USE_CHAR + "[_A-Za-z][_A-Za-z0-9]+") LabelLexer.ignore(";(.+)?\n") LabelLexer.ignore("\s+") for op in self.opcodes: asr = op[self.AS_REGEX] for arg in self.is_args: if arg in asr: LabelLexer.add(op[self.ALIAS] + "_ARG_REM", asr.replace(arg, "")) for tr in self.as_g_tr: asr = asr.replace(tr, self.as_g_tr[tr]) LabelLexer.add(op[self.ALIAS], asr) cur_addr = 0 for tok in LabelLexer.build().lex(source): if tok.name == 'LABEL_DEF': source = source.replace(self.LABEL_USE_CHAR + tok.value[:-1], hex(cur_addr)) source = source.replace(tok.value, '') elif tok.name == 'LABEL_USE': continue elif '_ARG_REM' in tok.name: cur_addr += self.num_args[self.aliases.index( tok.name.replace('_ARG_REM', ''))] + 1 else: cur_addr += self.num_args[self.aliases.index(tok.name)] + 1 return source
def __init__(self): partial_lexer = rply.LexerGenerator() for syntax in pydice_syntax_objects.LEXER_SYNTAX: partial_lexer.add(syntax.get_token_name(), syntax.get_token_regex()) # Ignore spaces partial_lexer.ignore(r"\s+") self._lexer = partial_lexer.build()
def create_lexer(): lg = rply.LexerGenerator() lg.ignore(r"\s+") tokens = [ ("LPAREN", r"\("), ("RPAREN", r"\)"), ("LBRACE", r"\{"), ("RBRACE", r"\}"), ("EQ", r"=="), ("NE", r"!="), ("LE", r"<="), ("GE", r">="), ("LT", r"<"), ("GT", r">"), ("DEFINE", r":="), ("COMMA", r","), ("SC", r";"), ("COLON", r":"), ("ASSIGN", r"="), ("NOT", r"!"), ("AND", r"&&"), ("OR", r"\|\|"), ("ADD", r"\+"), ("SUB", r"-"), ("MUL", r"\*"), ("DIV", r"/"), ("POW", r"\^"), ("MOD", r"%"), ("VALUE_FLOAT", r"\d+\.\d+|\d+\.|\.\d+"), ("VALUE_INT", r"\d+"), ("VALUE_STR", r"\"(.*?)\""), ("TRUE", r"true"), ("FALSE", r"false"), ("IF", r"if"), ("ELSE", r"else"), ("FOR", r"for"), ("WHILE", r"while"), ("FN", r"fn"), # ("RETURN", r"return"), # ("BREAK", r"break"), ("INT", r"int"), ("FLOAT", r"float"), ("STR", r"str"), ("BOOL", r"bool"), ("CAST", r"cast"), ("PRINTLN", r"println"), ("PRINT", r"print"), ("SYMBOL", r"[a-zA-Z_][a-zA-Z0-9_]*"), ] token_names = [] for name, pattern in tokens: lg.add(name, pattern) token_names.append(name) return lg.build(), token_names
def create_lexer(): lg = rply.LexerGenerator() lg.add('IF', r'[?]') lg.add('ELSE', r':') lg.add('OR', r'[|][|]') lg.add('AND', r'[&][&]') lg.add('EQ', r'[!=]=') lg.add('CMP', r'[<>]=?') lg.add('ADDSUB', r'[+-]') lg.add('MULDIV', r'[*/%]') lg.add('NOT', r'!') lg.add('LPAR', r'[(]') lg.add('RPAR', r'[)]') lg.add('VAR', r'n') lg.add('INT', r'[0-9]+') lg.ignore(r'[ \t]+') return lg.build()
def lexer(): lg = rply.LexerGenerator() lg.add('ARROW', '->') lg.add('IADD', '\+=') lg.add('EQ', '==') lg.add('NE', '!=') lg.add('GE', '>=') lg.add('LE', '<=') lg.add('LBRA', '\[') lg.add('RBRA', '\]') lg.add('PLUS', '\+') lg.add('MINUS', '-') lg.add('MUL', '\*') lg.add('DIV', '/') lg.add('LACC', '{') lg.add('RACC', '}') lg.add('LT', '<') lg.add('GT', '>') lg.add('DOT', '\.') lg.add('AMP', '&') lg.add('DOLLAR', '\$') lg.add('PIPE', '\|') lg.add('CARET', '\^') lg.add('TILDE', '~') lg.add('MOD', '%') lg.add('LPAR', '\(') lg.add('RPAR', '\)') lg.add('ASGT', '=') lg.add('COMMA', ',') lg.add('COLON', ':') lg.add('QM', '\?') lg.add('STR', r"'(.*?)'") lg.add('STR', r'"(.*?)"') lg.add('BOOL', 'True|False') lg.add('NONE', 'None') lg.add('NAME', r'[a-zA-Z_][a-zA-Z0-9_]*') lg.add('NUM', r'[-+?[0-9]*\.?[0-9]+') lg.add('NL', r'\n') lg.add('COM', r'#(.*)') lg.add('TABS', r'\t+') lg.ignore(r' +') return lg.build()
def _pass2(self, source): """removes labels from source""" ArgLexer = rply.LexerGenerator() ArgLexer.ignore(";(.+)?\n") ArgLexer.ignore("\s+") recargs = [] for tr in self.as_g_tr: if tr in self.is_args: recargs.append(re.compile(self.as_g_tr[tr])) for op in self.opcodes: asr = op[self.AS_REGEX] for tr in self.as_g_tr: asr = asr.replace(tr, self.as_g_tr[tr]) ArgLexer.add(op[self.ALIAS], asr) for tok in ArgLexer.build().lex(source): narg = self.num_args[self.aliases.index(tok.name)] self.ram.append(hex(self.aliases.index(tok.name))[2:]) if narg: for arg in recargs: m = arg.findall(tok.value) if m: self.ram.append(m[0][2:])
import rply lexer = rply.LexerGenerator() tokens = { 'num': '\d+', 'add': '\+', 'sub': '\-', 'equ': '\=', 'var': '', 'end': '\;', 'fac': '\!', 'mul': '\*', 'var': '"[a-zA-Z][a-zA-Z0-9]*"', 'open-paren': '\(', 'close-paren': '\)', 'open-brace': '\{', 'close-brace': '\}' } for i in range(len(tokens)): lexer.add([*tokens][i], tokens[[*tokens][i]]) lexer.ignore('\s+') lexer.ignore('\;') lexer.ignore('\n') l = lexer.build()
def __init__(self): self.flg = rply.LexerGenerator() self.slg = rply.LexerGenerator() self._fl = None self._sl = None