def __init__(self): """Create a ply lexer.""" self.lexer = lex.lex(module=self, debug=False, errorlog=lex.NullLogger()) self.lineno = 1
def __init__(self): self.lex = lex.lex( module=self, debug=False, reflags=(re.UNICODE | re.MULTILINE), errorlog=lex.NullLogger(), )
def build(self, pattern, **kwargs): """ Create a lexer. """ self.lexer = lex.lex(module=self, errorlog=lex.NullLogger(), **kwargs) # self.lexer.input(pattern) self.storeLexTokenList()
def __init__(self, skip=False, debug=0, log=lex.NullLogger()): self.skip = skip self.errors = [] #error list self.log_tmp = LogTemplate() self.lexer = lex.lex(module=self, reflags=re.MULTILINE, debug=debug, debuglog=log)
def create_lexer(nets={}): global reserved reserved.update(nets) global tokens tokens = re_tokens + list(reserved.values()) t_SEMI = r';' t_LPAREN = r'\(' t_RPAREN = r'\)' t_COMMA = r',' t_DOT = r'\.' t_LSQUARE = r'\[' t_RSQUARE = r'\]' t_LCURLY = r'\{' t_RCURLY = r'\}' t_COLON = r':' t_EQ = r'\=' t_BASE = r'\'[bBoOdDhH]' t_SFLOAT = r'[\+-]?[\d_A-Fa-f]+\.[\d_A-Fa-f]+' t_SIGN = r'[\+-]' t_UNSIGNED = r'[\d_A-Fa-fXxZz]+' # NOTE: functionally defined tokens are added first # in the same order in which they are defined def t_ID(t): r'[a-zA-Z_][\w$]*|\\[\S]+' t.type = reserved.get(t.value, 'ID') # check for reserved words return t t_ignore = " \t" def t_ignore_COMMENT(t): r'//.*\n' t.lexer.lineno += 1 # Define a rule to track line numbers (\n tokens otherwise discarded) def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) # Error handling rule def t_error(t): print "Illegal character '%s'" % t.value[0] t.lexer.skip(1) return lex.lex(errorlog=lex.NullLogger())
def c_lexer(): t_EQ = r"==" def t_COMMENT(t): r"\#.*" def t_STRING(t): r"\"[_A-Za-z0-9 \t\n!@#$%^&*()_+;:'<>,./?=]+\"" t.value = str(t.value)[1:-1] return t def t_BOOL(t): r"(true|false)" if t.value == "true": t.value = True if t.value == "false": t.value = False return t def t_ID(t): r"[a-zA-Z_]+[a-zA-Z_0-9]*" if t.value in reserved: t.type = reserved[t.value] else: t.type = "NAME" return t def t_FLOAT(t): r"\d+\.\d+" t.value = float(t.value) return t def t_INT(t): r"\d+" t.value = int(t.value) return t t_ignore = ' \t' def t_error(t): print("Coperr.UnexpectedCharacter {}".format(t.value[0])) t.lexer.skip(1) return lex.lex(errorlog=lex.NullLogger() ) # errorlog=lex.NullLogger() to not show warnings
def reset(self): if debug.logger & debug.flagLexer: logger = debug.logger.getCurrentLogger() else: logger = lex.NullLogger() if debug.logger & debug.flagGrammar: debuglogger = debug.logger.getCurrentLogger() else: debuglogger = None self.lexer = lex.lex(module=self, reflags=re.DOTALL, outputdir=self._tempdir, debuglog=debuglogger, errorlog=logger)
def syntax_parse(argdata, inputfilename, bsvdefines): global globalfilename globalfilename = inputfilename data = preprocess(argdata + '\n', bsvdefines) lexer = lex.lex(errorlog=lex.NullLogger()) parserdir=scripthome+'/syntax' if not os.path.isdir(parserdir): os.makedirs(parserdir) if not (parserdir in sys.path): sys.path.append(parserdir) parser = yacc.yacc(optimize=1,errorlog=yacc.NullLogger(),outputdir=parserdir,debugfile=parserdir+'/parser.out') if noisyFlag: print 'Parsing:', inputfilename if parseDebugFlag: return parser.parse(data,debug=1) return parser.parse(data)
def parse(file, filename, debug=True): lexer = lex.lex(debug=True) if debug \ else lex.lex(debug=False, errorlog=lex.NullLogger()) if file is not None: lexer.input(file.read()) else: with open(filename, 'r') as f: lexer.input(f.read()) setattr(lexer, "filename", filename) parser = yacc.yacc(debug=True) if debug \ else yacc.yacc(debug=False, errorlog=yacc.NullLogger()) parents = [] spec = SpecFile(filename) spec.parent = None parents.append(spec) setattr(parser, "parents", parents) spec = parser.parse(lexer=lexer, tracking=True) return spec
def __init__(self, fp=None, bufsize=65534, verbose=False, **lexer_args): """ constructor ``fp`` is a ``file`` object (or feed me lines via ``send()``) ``bufsize``: read input lines (``fp.readline()``) into a buffer until it's over ``bufsize``, then parse the buffer ``lexer_args`` are passed on to PLY lexer """ if verbose: sys.stdout.write(self.__class__.__name__ + ".init()\n") self._fp = fp self._bufsize = bufsize self._verbose = bool(verbose) self.lexer = lex.lex(module=self, errorlog=lex.NullLogger(), **lexer_args)
def create_lexer(pin_map={}): global pinmap pinmap = pin_map global reserved reserved = { k : 'PIN' for k in pin_map.keys() } global tokens tokens = re_tokens + list(reserved.values()) if 'PIN' not in tokens: tokens.append('PIN') t_NOT_A = r'\'' t_NOT_B = r'\!' t_XOR = r'\^' t_AND = r'\*|\&' t_OR = r'\+|\|' t_ONE = r'1' t_ZERO = r'0' t_LPAR = r'\(' t_RPAR = r'\)' def t_ID(t): r'[a-zA-Z][a-zA-Z0-9]*|\\\"[0-9][^\"]*\\\"' if reserved.get(t.value, 'ID') != 'PIN': print "ERROR: unknown Pin", t.value return else: t.type = 'PIN' return t t_ignore = " \t\n" def t_newline(t): r'\n+|\\\n+' t.lexer.lineno += len(t.value) def t_error(t): print "Illegal character '", t.value[0], "' at", t.lexer.lineno t.lexer.skip(1) return lex.lex(errorlog=lex.NullLogger())
t.type = reserved.get(t.value, 'NATIVEQUOTE') return t class TokenErrorNode(object): def __init__(self, token): self.lineno = iu.Location(iu.filename, token.lineno) def t_error(t): raise iu.IvyError(TokenErrorNode(t), "illegal character '{}'".format(t.value[0])) print "Illegal character '%s'" % t.value[0] lexer = lex.lex(errorlog=lex.NullLogger()) class LexerVersion(object): """ Context Manager that sets the lexer based on the given language version """ def __init__(self, version): self.version = version def __enter__(self): global reserved self.orig_reserved = reserved reserved = dict(all_reserved) # print "version {}".format(self.version) if self.version <= [1, 0]: for s in ['state', 'local']:
def __init__(self, skipinvalid=False, debug=0, log=lex.NullLogger()): self.skipinvalid = skipinvalid self.lexer = lex.lex(module=self, reflags=re.MULTILINE, debug=debug, debuglog=log)
def setUp(self): self.my_lexer = PDFLexer() self.my_lexer.build(debug=False, errorlog=lex.NullLogger())
def parse_to_ast(s, cpp=False, cpp_flags=[], optimise=False): '''Build a lexer and parser for the given grammar and then parse the string s.''' if isinstance(s, str): filename = None else: # Assume s is a file handle. filename = s.name s = s.read() # Pre-process the source with CPP if requested. if cpp: toolprefix = os.environ.get('TOOLPREFIX', '') p = subprocess.Popen(['%scpp' % toolprefix, '-P'] + cpp_flags, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if filename is not None: s = ('#line 0 "%s"\n' % filename) + s stdout, stderr = p.communicate(s) if p.returncode != 0: #pylint: disable=E1101 raise Exception('CPP failed: %s' % stderr) s = stdout log.info('Building keyword list...') # The ID token rule relies on checking whether the matched token is one of # a reserved set of keywords. We need to build that list (in util) before # doing any lexing. util.reset_keywords() util.merge_keywords(IDLKeywords.keywords) util.merge_keywords(ADLKeywords.keywords) log.info('Done') log.info('Building token list...') # lex expects a list of valid tokens to be defined in the variable # 'tokens'. This is quite annoying because the list of tokens can be # deduced automatically (which is what get_tokens() does) and there is no # nice way of namespacing tokens. This means that building the parser below # will generate spurious warnings about unused tokens if you are only # parsing a subset of the CAmkES grammar. tokens = util.get_tokens(globals()) log.info('Done') # Lex and Yacc accept a logger to notify the caller of events, but they are # really noisy; much more so than is relevant for our purposes. Squash # their output unless the user has specifically requested it. errorlog = log.log if log.log.getEffectiveLevel() < logging.WARNING \ else lex.NullLogger() # Enable optimised lexer and parser construction if the caller has # requested it. See the PLY docs for the exact effect of this. optimize = 1 if optimise else 0 log.info('Building lexer...') try: lex.lex(errorlog=errorlog, optimize=optimize).filename = filename except Exception as inst: raise Exception('Failed to build lexer: %s' % str(inst)) log.info('Done') # yacc by default assumes the starting token is the first one it lexically # encounters in globals(). This is almost certainly *not* the behaviour we # want so explicitly direct it by specifying the start symbol according to # the grammar we are trying to parse. start = 'camkes' log.info('Building parser...') try: yacc.yacc(errorlog=errorlog, optimize=optimize) except Exception as inst: raise Exception('Failed to build parser: %s' % str(inst)) log.info('Done\n') ast = yacc.parse(s) # Set the source filename of the AST items if we know it. assign_filenames(ast, filename) return ast
def build_lexer(): # TODO: use file for logging #lex_errorlog = ply.lex.PlyLogger(open(os.path.join(USER_CONFIG_DIR, "lex.log"), "w")) lex_errorlog = lex.NullLogger() lexer = lex.lex(errorlog=lex_errorlog) return lexer
def reset(self): # Build the lexer self.lexer = lex.lex(module=self, optimize=1, errorlog=lex.NullLogger(), lextab='lextabext')
def build(self, **kwargs): self.lexer = lex.lex(module=self, errorlog=lex.NullLogger(), **kwargs) self.lexer.eof= (1,1) self.comment_level = 0 self.string = ""
elif c == "t": c = "\t" new_str += c escaped = 0 else: if c == "\\": escaped = 1 else: new_str += c t.value = new_str return t def t_SYMBOL(t): r'[a-zA-Z_][a-zA-Z0-9_]*' t.type = reserved.get(t.value, 'SYMBOL') return t t_newline = token_newline t_ignore = ' \t' t_error = token_error # Build the lexer lex.lex(reflags=RE_UNICODE, errorlog=lex.NullLogger()) if __name__ == '__main__': lex.runmain()