class BisonParser(object): """ Base parser class You should subclass this, and provide a bunch of methods called 'on_TargetName', where 'TargetName' is the name of each target in your grammar (.y) file. """ # --------------------------------------- # override these if you need to # Command and options for running yacc/bison, except for filename arg bisonCmd = ['bison', '-d', '-v', '-t'] bisonFile = 'tmp.y' bisonCFile = 'tmp.tab.c' # Name of header file generated by bison cmd. bisonHFile = 'tmp.tab.h' # C output file from bison gets renamed to this. bisonCFile1 = 'tmp.bison.c' # Bison-generated header file gets renamed to this. bisonHFile1 = 'tokens.h' # command and options for running [f]lex, except for filename arg. flexCmd = [ 'flex', ] flexFile = 'tmp.l' flexCFile = 'lex.yy.c' # C output file from flex gets renamed to this. flexCFile1 = 'tmp.lex.c' # CFLAGS added before all command line arguments. cflags_pre = ['-fPIC'] # CFLAGS added after all command line arguments. cflags_post = ['-O3', '-g'] # Directory used to store the generated / compiled files. buildDirectory = './' # Add debugging symbols to the binary files. debugSymbols = 1 # Enable verbose debug message sent to stdout. verbose = 0 # Timeout in seconds after which the parser is terminated. # TODO: this is currently not implemented. timeout = 1 # Default to sys.stdin. file = None # Last parsed target, top of parse tree. last = None # Enable this to keep all temporary engine build files. keepfiles = 0 # Prefix of the shared object / dll file. Defaults to 'modulename-engine'. # If the module is executed directly, "__main__" will be used (since that # that is the "module name", in that case). bisonEngineLibName = None # Class to use by default for creating new parse nodes. If set to None, # BisonNode will be used. default_node_class = BisonNode error_threshold = 10 def __init__(self, **kw): """ Abstract representation of parser Keyword arguments: - read - a callable accepting an int arg (nbytes) and returning a string, default is this class' read() method - file - a file object, or string of a pathname to open as a file, defaults to sys.stdin. Note that you can leave this blank, and pass a file keyword argument to the .run() method. - verbose - set to 1 to enable verbose output messages, default 0 - keepfiles - if non-zero, keeps any files generated in the course of building the parser engine; by default, all these files get deleted upon a successful engine build - defaultNodeClass - the class to use for creating parse nodes, default is self.defaultNodeClass (in this base class, BisonNode) """ # setup read = kw.get('read', None) if read: self.read = read fileobj = kw.get('file', None) if fileobj: if isinstance(fileobj, str): try: fileobj = open(fileobj, 'rb') except: raise Exception('Cannot open input file %s' % fileobj) self.file = fileobj else: self.file = sys.stdin nodeClass = kw.get('defaultNodeClass', None) if nodeClass: self.defaultNodeClass = nodeClass self.verbose = kw.get('verbose', 0) if 'keepfiles' in kw: self.keepfiles = kw['keepfiles'] # if engine lib name not declared, invent ont if not self.bisonEngineLibName: self.bisonEngineLibName = self.__class__.__module__ + '-parser' # get an engine self.engine = ParserEngine(self) def __getitem__(self, idx): return self.last[idx] def _handle(self, targetname, option, names, values): """ Callback which receives a target from parser, as a targetname and list of term names and values. Tries to dispatch to on_TargetName() methods if they exist, otherwise wraps the target in a BisonNode object """ handler = getattr(self, 'on_' + targetname, None) if handler: if self.verbose: try: hdlrline = handler.__code__.co_firstlineno except: hdlrline = handler.__init__.__code__.co_firstlineno print('BisonParser._handle: call handler at line %s with: %s' \ % (hdlrline, str((targetname, option, names, values)))) try: self.last = handler(target=targetname, option=option, names=names, values=values) except Exception as e: print("returning exception", e, targetname, option, names, values) self.last = e return e #if self.verbose: # print ('handler for %s returned %s' \ # % (targetname, repr(self.last))) else: if self.verbose: print('no handler for %s, using default' % targetname) cls = self.default_node_class self.last = cls(target=targetname, option=option, names=names, values=values) # assumedly the last thing parsed is at the top of the tree return self.last def handle_timeout(self, signum, frame): raise TimeoutError('Computation exceeded timeout limit.') def reset(self): self.engine.reset() def run(self, **kw): """ Runs the parser, and returns the top-most parse target. Keywords: - file - either a string, comprising a file to open and read input from, or a Python file object - debug - enables garrulous parser debugging output, default 0 """ if self.verbose: print('Parser.run: calling engine') # grab keywords fileobj = kw.get('file', self.file) if isinstance(fileobj, str): filename = fileobj try: fileobj = open(fileobj, 'rb') except: raise Exception('Cannot open input file "%s"' % fileobj) else: filename = None fileobj = None read = kw.get('read', self.read) debug = kw.get('debug', 0) # back up existing attribs oldfile = self.file oldread = self.read # plug in new ones, if given if fileobj: self.file = fileobj if read: self.read = read if self.verbose and self.file.closed: print('Parser.run(): self.file', self.file, 'is closed') error_count = 0 # TODO: add option to fail on first error. while not self.file.closed: # do the parsing job, spew if error self.last = None self.engine.reset() try: self.engine.runEngine(debug) except Exception as e: error_count += 1 if error_count > self.error_threshold: raise self.report_last_error(filename, e) if self.verbose: print('Parser.run: back from engine') if hasattr(self, 'hook_run'): self.last = self.hook_run(filename, self.last) if self.verbose and not self.file.closed: print('last:', self.last) if self.verbose: print('last:', self.last) # restore old values self.file = oldfile self.read = oldread if self.verbose: print('------------------ result=', self.last) # TODO: return last result (see while loop): # return self.last[:-1] return self.last def read(self, nbytes): """ Override this in your subclass, if you desire. Arguments: - nbytes - the maximum length of the string which you may return. DO NOT return a string longer than this, or else Bad Things will happen. """ # default to stdin if self.verbose: print('Parser.read: want %s bytes' % nbytes) bytes = self.file.readline(nbytes) if self.verbose: print('Parser.read: got %s bytes' % len(bytes)) print(bytes) return bytes def report_last_error(self, filename, error): """ Report a raised exception. Depending on the mode in which the parser is running, it will: - write a verbose message to stderr (verbose=True; interactive=True). The written error message will include the type, value and traceback of the raised exception. - write a minimal message to stderr (verbose=False; interactive=True). The written error message will only include the type and value of the raised exception. """ #if filename != None: # msg = '%s:%d: "%s" near "%s"' \ # % ((filename,) + error) # if not self.interactive: # raise BisonSyntaxError(msg) # print >>sys.stderr, msg #elif hasattr(error, '__getitem__') and isinstance(error[0], int): # msg = 'Line %d: "%s" near "%s"' % error # if not self.interactive: # raise BisonSyntaxError(msg) # print >>sys.stderr, msg #else: if not self.interactive: raise if self.verbose: traceback.print_exc() print('ERROR:', error) def report_syntax_error(self, msg, yytext, first_line, first_col, last_line, last_col): yytext = yytext.replace('\n', '\\n') args = (first_line, first_col, last_line, last_col, msg, yytext) raise BisonSyntaxError('%d.%d-%d.%d: "%s" near "%s".' % args, args)
class BisonParser(object): """ Base parser class You should subclass this, and provide a bunch of methods called 'on_TargetName', where 'TargetName' is the name of each target in your grammar (.y) file. """ # --------------------------------------- # override these if you need to # Command and options for running yacc/bison, except for filename arg bisonCmd = ['bison', '-d', '-v', '-t'] bisonFile = 'tmp.y' bisonCFile = 'tmp.tab.c' # Name of header file generated by bison cmd. bisonHFile = 'tmp.tab.h' # C output file from bison gets renamed to this. bisonCFile1 = 'tmp.bison.c' # Bison-generated header file gets renamed to this. bisonHFile1 = 'tokens.h' # command and options for running [f]lex, except for filename arg. flexCmd = ['flex', ] flexFile = 'tmp.l' flexCFile = 'lex.yy.c' # C output file from flex gets renamed to this. flexCFile1 = 'tmp.lex.c' # CFLAGS added before all command line arguments. cflags_pre = ['-fPIC'] # CFLAGS added after all command line arguments. cflags_post = ['-O3', '-g'] # Directory used to store the generated / compiled files. buildDirectory = './' # Add debugging symbols to the binary files. debugSymbols = 1 # Enable verbose debug message sent to stdout. verbose = 0 # Timeout in seconds after which the parser is terminated. # TODO: this is currently not implemented. timeout = 1 # Default to sys.stdin. file = None # Last parsed target, top of parse tree. last = None # Enable this to keep all temporary engine build files. keepfiles = 0 # Prefix of the shared object / dll file. Defaults to 'modulename-engine'. # If the module is executed directly, "__main__" will be used (since that # that is the "module name", in that case). bisonEngineLibName = None # Class to use by default for creating new parse nodes. If set to None, # BisonNode will be used. default_node_class = BisonNode error_threshold = 10 def __init__(self, **kw): """ Abstract representation of parser Keyword arguments: - read - a callable accepting an int arg (nbytes) and returning a string, default is this class' read() method - file - a file object, or string of a pathname to open as a file, defaults to sys.stdin. Note that you can leave this blank, and pass a file keyword argument to the .run() method. - verbose - set to 1 to enable verbose output messages, default 0 - keepfiles - if non-zero, keeps any files generated in the course of building the parser engine; by default, all these files get deleted upon a successful engine build - defaultNodeClass - the class to use for creating parse nodes, default is self.defaultNodeClass (in this base class, BisonNode) """ # setup read = kw.get('read', None) if read: self.read = read fileobj = kw.get('file', None) if fileobj: if isinstance(fileobj, str): try: fileobj = open(fileobj, 'rb') except: raise Exception('Cannot open input file %s' % fileobj) self.file = fileobj else: self.file = sys.stdin nodeClass = kw.get('defaultNodeClass', None) if nodeClass: self.defaultNodeClass = nodeClass self.verbose = kw.get('verbose', 0) if 'keepfiles' in kw: self.keepfiles = kw['keepfiles'] # if engine lib name not declared, invent ont if not self.bisonEngineLibName: self.bisonEngineLibName = self.__class__.__module__ + '-parser' # get an engine self.engine = ParserEngine(self) def __getitem__(self, idx): return self.last[idx] def _handle(self, targetname, option, names, values): """ Callback which receives a target from parser, as a targetname and list of term names and values. Tries to dispatch to on_TargetName() methods if they exist, otherwise wraps the target in a BisonNode object """ handler = getattr(self, 'on_' + targetname, None) if handler: if self.verbose: try: hdlrline = handler.__code__.co_firstlineno except: hdlrline = handler.__init__.__code__.co_firstlineno print('BisonParser._handle: call handler at line %s with: %s' \ % (hdlrline, str((targetname, option, names, values)))) try: self.last = handler(target=targetname, option=option, names=names, values=values) except Exception as e: print("returning exception", e, targetname, option, names, values) self.last = e return e #if self.verbose: # print ('handler for %s returned %s' \ # % (targetname, repr(self.last))) else: if self.verbose: print ('no handler for %s, using default' % targetname) cls = self.default_node_class self.last = cls(target=targetname, option=option, names=names, values=values) # assumedly the last thing parsed is at the top of the tree return self.last def handle_timeout(self, signum, frame): raise TimeoutError('Computation exceeded timeout limit.') def reset(self): self.engine.reset() def run(self, **kw): """ Runs the parser, and returns the top-most parse target. Keywords: - file - either a string, comprising a file to open and read input from, or a Python file object - debug - enables garrulous parser debugging output, default 0 """ if self.verbose: print('Parser.run: calling engine') # grab keywords fileobj = kw.get('file', self.file) if isinstance(fileobj, str): filename = fileobj try: fileobj = open(fileobj, 'rb') except: raise Exception('Cannot open input file "%s"' % fileobj) else: filename = None fileobj = None read = kw.get('read', self.read) debug = kw.get('debug', 0) # back up existing attribs oldfile = self.file oldread = self.read # plug in new ones, if given if fileobj: self.file = fileobj if read: self.read = read if self.verbose and self.file.closed: print('Parser.run(): self.file', self.file, 'is closed') error_count = 0 # TODO: add option to fail on first error. while not self.file.closed: # do the parsing job, spew if error self.last = None self.engine.reset() try: self.engine.runEngine(debug) except Exception as e: error_count += 1 if error_count > self.error_threshold: raise self.report_last_error(filename, e) if self.verbose: print('Parser.run: back from engine') if hasattr(self, 'hook_run'): self.last = self.hook_run(filename, self.last) if self.verbose and not self.file.closed: print('last:', self.last) if self.verbose: print('last:', self.last) # restore old values self.file = oldfile self.read = oldread if self.verbose: print('------------------ result=', self.last) # TODO: return last result (see while loop): # return self.last[:-1] return self.last def read(self, nbytes): """ Override this in your subclass, if you desire. Arguments: - nbytes - the maximum length of the string which you may return. DO NOT return a string longer than this, or else Bad Things will happen. """ # default to stdin if self.verbose: print('Parser.read: want %s bytes' % nbytes) bytes = self.file.readline(nbytes) if self.verbose: print('Parser.read: got %s bytes' % len(bytes)) print(bytes) return bytes def report_last_error(self, filename, error): """ Report a raised exception. Depending on the mode in which the parser is running, it will: - write a verbose message to stderr (verbose=True; interactive=True). The written error message will include the type, value and traceback of the raised exception. - write a minimal message to stderr (verbose=False; interactive=True). The written error message will only include the type and value of the raised exception. """ #if filename != None: # msg = '%s:%d: "%s" near "%s"' \ # % ((filename,) + error) # if not self.interactive: # raise BisonSyntaxError(msg) # print >>sys.stderr, msg #elif hasattr(error, '__getitem__') and isinstance(error[0], int): # msg = 'Line %d: "%s" near "%s"' % error # if not self.interactive: # raise BisonSyntaxError(msg) # print >>sys.stderr, msg #else: if not self.interactive: raise if self.verbose: traceback.print_exc() print('ERROR:', error) def report_syntax_error(self, msg, yytext, first_line, first_col, last_line, last_col): yytext = yytext.replace('\n', '\\n') args = (first_line, first_col, last_line, last_col, msg, yytext) raise BisonSyntaxError('%d.%d-%d.%d: "%s" near "%s".' % args, args)
class BisonParser(object): """ Base parser class You should subclass this, and provide a bunch of methods called 'on_TargetName', where 'TargetName' is the name of each target in your grammar (.y) file. """ #@ @+others #@+node:attributes # --------------------------------------- # override these if you need to # command and options for running yacc/bison, except for filename arg bisonCmd = ["bison", "-d", "-v", '-t'] bisonFile = "tmp.y" bisonCFile = "tmp.tab.c" bisonHFile = "tmp.tab.h" # name of header file generated by bison cmd bisonCFile1 = "tmp.bison.c" # c output file from bison gets renamed to this bisonHFile1 = "tokens.h" # bison-generated header file gets renamed to this flexCmd = ["flex", ] # command and options for running [f]lex, except for filename arg flexFile = "tmp.l" flexCFile = "lex.yy.c" flexCFile1 = "tmp.lex.c" # c output file from lex gets renamed to this verbose = 0 file = None # default to sys.stdin last = None # last parsed target, top of parse tree lasterror = None # gets set if there was an error keepfiles = 0 # set to 1 to keep temporary engine build files bisonEngineLibName = None # defaults to 'modulename-engine' defaultNodeClass = BisonNode # class to use by default for creating new parse nodes #@-node:attributes #@+node:__init__ def __init__(self, **kw): """ Abstract representation of parser Keyword arguments: - read - a callable accepting an int arg (nbytes) and returning a string, default is this class' read() method - file - a file object, or string of a pathname to open as a file, defaults to sys.stdin. Note that you can leave this blank, and pass a file keyword argument to the .run() method. - verbose - set to 1 to enable verbose output messages, default 0 - keepfiles - if non-zero, keeps any files generated in the course of building the parser engine; by default, all these files get deleted upon a successful engine build - defaultNodeClass - the class to use for creating parse nodes, default is self.defaultNodeClass (in this base class, BisonNode) """ # setup read = kw.get('read', None) if read: self.read = read fileobj = kw.get('file', None) if fileobj: if type(fileobj) == type(""): try: fileobj = open(fileobj, "rb") except: raise Exception("Cannot open input file %s" % fileobj) self.file = fileobj else: self.file = sys.stdin nodeClass = kw.get('defaultNodeClass', None) if nodeClass: self.defaultNodeClass = nodeClass self.verbose = kw.get('verbose', 0) if kw.has_key('keepfiles'): self.keepfiles = kw['keepfiles'] # if engine lib name not declared, invent ont if not self.bisonEngineLibName: self.bisonEngineLibName = self.__class__.__module__ + "-parser" # get an engine self.engine = ParserEngine(self) #@-node:__init__ #@+node:__getattr__ def __getitem__(self, idx): return self.last[idx] #@-node:__getattr__ #@+node:_handle def _handle(self, targetname, option, names, values): """ Callback which receives a target from parser, as a targetname and list of term names and values. Tries to dispatch to on_TargetName() methods if they exist, otherwise wraps the target in a BisonNode object """ handler = getattr(self, "on_"+targetname, None) if handler: if self.verbose: try: hdlrline = handler.func_code.co_firstlineno except: hdlrline = handler.__init__.func_code.co_firstlineno print "invoking handler at line %s for %s" % (hdlrline, targetname) self.last = handler(target=targetname, option=option, names=names, values=values) if self.verbose: print "handler for %s returned %s" % (targetname, repr(self.last)) else: if self.verbose: print "no handler for %s, using default" % targetname self.last = BisonNode(targetname, option=option, names=names, values=values) # reset any resulting errors (assume they've been handled) #self.lasterror = None # assumedly the last thing parsed is at the top of the tree return self.last #@-node:_handle #@+node:run def run(self, **kw): """ Runs the parser, and returns the top-most parse target. Keywords: - file - either a string, comprising a file to open and read input from, or a Python file object - debug - enables garrulous parser debugging output, default 0 """ if self.verbose: print "Parser.run: calling engine" # grab keywords fileobj = kw.get('file', self.file) if type(fileobj) == type(""): filename = fileobj try: fileobj = open(fileobj, "rb") except: raise Exception("Cannot open input file %s" % fileobj) else: filename = None fileobj = None read = kw.get('read', self.read) debug = kw.get('debug', 0) # back up existing attribs oldfile = self.file oldread = self.read # plug in new ones, if given if fileobj: self.file = fileobj if read: self.read = read # do the parsing job, spew if error self.lasterror = None self.engine.runEngine(debug) if self.lasterror: #print "Got error: %s" % repr(self.error) if filename != None: raise ParserSyntaxError("%s:%d: '%s' near '%s'" % ((filename,) + self.lasterror)) else: raise ParserSyntaxError("Line %d: '%s' near '%s'" % self.lasterror) # restore old values self.file = oldfile self.read = oldread if self.verbose: print "Parser.run: back from engine" return self.last #@-node:run #@+node:read def read(self, nbytes): """ Override this in your subclass, if you desire. Arguments: - nbytes - the maximum length of the string which you may return. DO NOT return a string longer than this, or else Bad Things will happen. """ # default to stdin if self.verbose: print "Parser.read: want %s bytes" % nbytes bytes = self.file.readline(nbytes) if self.verbose: print "Parser.read: got %s bytes" % len(bytes) return bytes #@-node:read #@+node:_error def _error(self, linenum, msg, tok): print "Parser: line %s: syntax error '%s' before '%s'" % (linenum, msg, tok) #@-node:_error #@+node:error def error(self, value): """ Return the result of this method from a handler to notify a syntax error """ self.lasterror = value return BisonError(value) #@-node:error #@+node:toxml def toxml(self): """ Serialises the parse tree and returns it as a raw xml string """ return self.last.toxml() #@-node:toxml #@+node:toxmldoc def toxmldoc(self): """ Returns an xml.dom.minidom.Document object containing the parse tree """ return self.last.toxmldoc() #@-node:toxmldoc #@+node:toprettyxml def toprettyxml(self): """ Returns a human-readable xml representation of the parse tree """ return self.last.toprettyxml() #@-node:toprettyxml #@+node:loadxml def loadxml(self, raw, namespace=None): """ Loads a parse tree from raw xml text Stores it in the '.last' attribute, which is where the root node of parsed text gets stored Arguments: - raw - string containing the raw xml - namespace - a dict or module object, where the node classes required for reconstituting the parse tree, can be found Returns: - root node object of reconstituted parse tree """ doc = xml.dom.minidom.parseString(raw) tree = self.loadxmldoc(doc, namespace) self.last = tree return tree #@-node:loadxml #@+node:loadxmldoc def loadxmldoc(self, xmldoc, namespace=None): """ Returns a reconstituted parse tree, loaded from an xml.dom.minidom.Document instance Arguments: - xmldoc - an xml.dom.minidom.Document instance - namespace - a dict from which to find the classes needed to translate the document into a tree of parse nodes """ return self.loadxmlobj(xmldoc.childNodes[0], namespace) #@-node:loadxmldoc #@+node:loadxmlobj def loadxmlobj(self, xmlobj, namespace=None): """ Returns a node object, being a parse tree, reconstituted from an xml.dom.minidom.Element object Arguments: - xmlobj - an xml.dom.minidom.Element instance - namespace - a namespace from which the node classes needed for reconstituting the tree, can be found """ # check on namespace if type(namespace) is types.ModuleType: namespace = namespace.__dict__ elif namespace == None: namespace = globals() objname = xmlobj.tagName classname = objname + "_Node" classobj = namespace.get(classname, None) namespacekeys = namespace.keys() # barf if node is not a known parse node or token if (not classobj) and objname not in self.tokens: raise Exception("Cannot reconstitute %s: can't find required node class or token %s" % ( objname, classname)) if classobj: nodeobj = classobj() # add the attribs for k,v in xmlobj.attributes.items(): setattr(nodeobj, k, v) else: nodeobj = None #print "----------------" #print "objname=%s" % repr(objname) #print "classname=%s" % repr(classname) #print "classobj=%s" % repr(classobj) #print "nodeobj=%s" % repr(nodeobj) # now add the children for child in xmlobj.childNodes: #print "%s attributes=%s" % (child, child.attributes.items()) childname = child.attributes['target'].value #print "childname=%s" % childname if childname + "_Node" in namespacekeys: #print "we have a node for class %s" % classname childobj = self.loadxmlobj(child, namespace) else: # it's a token childobj = child.childNodes[0].nodeValue #print "got token %s=%s" % (childname, childobj) nodeobj.names.append(childname) nodeobj.values.append(childobj) # done return nodeobj #@-node:loadxmlobj #@+node:_globals def _globals(self): return globals().keys()