class Parser(PLYParser): def __init__(self, lex_optimize=True, lextab='pyparser.lextab', yacc_optimize=True, yacctab='pyparser.yacctab', yacc_debug=False): """ Create a new parser. Some arguments for controlling the debug/optimization level of the parser are provided. The defaults are tuned for release/performance mode. The simple rules for using them are: *) When tweaking the lexer/parser, set these to False *) When releasing a stable parser, set to True lex_optimize: Set to False when you're modifying the lexer. Otherwise, changes in the lexer won't be used, if some lextab.py file exists. When releasing with a stable lexer, set to True to save the re-generation of the lexer table on each run. lextab: Points to the lex table that's used for optimized mode. Only if you're modifying the lexer and want some tests to avoid re-generating the table, make this point to a local lex table file (that's been earlier generated with lex_optimize=True) yacc_optimize: Set to False when you're modifying the parser. Otherwise, changes in the parser won't be used, if some parsetab.py file exists. When releasing with a stable parser, set to True to save the re-generation of the parser table on each run. yacctab: Points to the yacc table that's used for optimized mode. Only if you're modifying the parser, make this point to a local yacc table file yacc_debug: Generate a parser.out file that explains how yacc built the parsing table from the ammar. """ self.logger = logging.getLogger('parser') self.lex = Lexer(error_func=self._lex_error_func, type_lookup_func=self._lex_type_lookup_func) self.lex.build(optimize=lex_optimize, lextab=lextab) self.tokens = self.lex.tokens self.parser = ply.yacc.yacc(module=self, start='statements', debug=yacc_debug, optimize=yacc_optimize, tabmodule=yacctab) self.addresses = [] self.accounts = {} self.statements = 0 self.totalPages = 0 self.pdf_doc = PDFDoc() self.pdf_doc.InitSecurityHandler() self.timesRoman = Font.Create(self.pdf_doc.GetSDFDoc(), Font.e_times_roman, True) self.courierNew = Font.Create(self.pdf_doc.GetSDFDoc(), Font.e_courier, True) self.eb = ElementBuilder() self.writer = ElementWriter() def parse(self, text, filename='', debuglevel=0): """ Parses a file and returns a pdf. text: A string containing the C source code filename: Name of the file being parsed (for meaningful error messages) debuglevel: Debug level to yacc """ self.lex.filename = filename self.lex.reset_lineno() self._scope_stack = [set()] self.logger.info("_______________________________________________") self.logger.info("parsing input...") if not text or text.isspace(): return [] else: self.logger.info("_______________________________________________") self.logger.info("finished parsing input file...") return self.parser.parse(text, lexer=self.lex, debug=debuglevel) ######################-- PRIVATE --###################### ## ## Precedence and associativity of tokens ## precedence = ( ('left', 'LINENO'), ('left', 'STARTPAGE'), ) def _lex_error_func(self, msg, line, column): self._parse_error(msg, self._coord(line, column)) def _lex_type_lookup_func(self, name): """ Looks up types that were previously defined with typedef. Passed to the lexer for recognizing identifiers that are types. """ return self._is_type_in_scope(name) def p_empty(self, p): 'empty : ' p[0] = '' def p_error(self, p): if p: self._parse_error('before: %s' % p.value, self._coord(p.lineno)) else: self._parse_error('At end of input', '') def p_statements(self, p): '''statements : statements pagelist | pagelist''' pass def p_pagelist(self, p): '''pagelist : addrpage | page''' pass def p_addrpage(self, p): '''addrpage : STARTPAGE lines address lines''' if self.addresses[-1].find('******************') == -1: self.totalPages += 1 self.statements += 1 pagestr = '' for s in p[2:]: pagestr = "{0}{1}".format(pagestr, s) self.buildNewPage(pagestr) # add new entry to accts self.accounts[str(self.statements)] = [ 1, [str(self.totalPages)], self.addresses[-1], [], None ] def p_page(self, p): '''page : STARTPAGE lines''' if self.addresses[-1].find('******************') == -1: self.totalPages += 1 pagestr = '' for s in p[2:]: pagestr = "{0}{1}".format(pagestr, s) self.buildNewPage(pagestr) self.accounts[str(self.statements)][0] = self.accounts[str( self.statements)][0] + 1 self.accounts[str(self.statements)][1].append(str(self.totalPages)) def p_address(self, p): '''address : beginaddress lines stopaddress''' self.addresses.append(p[2]) p[0] = "{0}{1}{2}".format(p[1], p[2], p[3]) def p_lines(self, p): '''lines : lines line | line''' if len(p) > 2: p[0] = "{0}{1}".format(p[1], p[2]) else: p[0] = p[1] def p_line(self, p): '''line : linedata LINENO | LINENO''' if len(p) == 3: p[0] = "{0}\n".format(p[1]) else: p[0] = "\n" def p_linedata(self, p): '''linedata : linedata WHITESPACE | linedata NUMBER | linedata TEXT | empty''' if len(p) == 3: p[0] = "{0}{1}".format(p[1], p[2]) else: p[0] = "{0}".format(p[1]) def p_beginaddress(self, p): '''beginaddress : linedata STARTADDRESS LINENO | linedata STARTADDRESS linedata LINENO''' if len(p) == 4: p[0] = "{0}{1}\n".format(p[1], p[2]) else: p[0] = "{0}{1}{2}\n".format(p[1], p[2], p[3]) def p_stopaddress(self, p): '''stopaddress : linedata ENDADDRESS LINENO | linedata ENDADDRESS linedata LINENO''' if len(p) == 4: p[0] = "{0}{1}\n".format(p[1], p[2]) else: p[0] = "{0}{1}{2}\n".format(p[1], p[2], p[3]) ######################-- PDF BUILDING --################ def buildNewPage(self, pagestr): """Create a new page and add the string contents as text elements.""" self.logger.debug("building page {0}...".format(self.totalPages)) page = self.pdf_doc.PageCreate(Rect(0, 0, 612, 794)) self.writer.Begin(page) self.eb.Reset() # begin writing text elements to the current page element = self.eb.CreateTextBegin(self.courierNew, 8) element.SetTextMatrix( 1, 0, 0, 1, 30, 750 ) # last two digits are x, y coords on page measured from LOWER RIGHT corner self.writer.WriteElement(element) # loop over the split string and write each line for item in pagestr.split('\n'): element = self.eb.CreateTextRun(item) element.GetGState().SetLeading(10) # Set the spacing between lines self.writer.WriteElement(element) self.writer.WriteElement(self.eb.CreateTextNewLine()) self.writer.WriteElement(self.eb.CreateTextEnd()) self.writer.End() # add the page to the document self.pdf_doc.PagePushBack(page) def savePDF(self, filename): """Save the current pdf with the input filename.""" self.logger.debug("saving {0}...".format(filename)) self.pdf_doc.Save(filename, SDFDoc.e_compatibility) def closePDF(self): """Close the current pdf.""" self.pdf_doc.Close()
class Parser(PLYParser): def __init__( self, lex_optimize=True, lextab='pyparser.lextab', yacc_optimize=True, yacctab='pyparser.yacctab', yacc_debug=False): """ Create a new parser. Some arguments for controlling the debug/optimization level of the parser are provided. The defaults are tuned for release/performance mode. The simple rules for using them are: *) When tweaking the lexer/parser, set these to False *) When releasing a stable parser, set to True lex_optimize: Set to False when you're modifying the lexer. Otherwise, changes in the lexer won't be used, if some lextab.py file exists. When releasing with a stable lexer, set to True to save the re-generation of the lexer table on each run. lextab: Points to the lex table that's used for optimized mode. Only if you're modifying the lexer and want some tests to avoid re-generating the table, make this point to a local lex table file (that's been earlier generated with lex_optimize=True) yacc_optimize: Set to False when you're modifying the parser. Otherwise, changes in the parser won't be used, if some parsetab.py file exists. When releasing with a stable parser, set to True to save the re-generation of the parser table on each run. yacctab: Points to the yacc table that's used for optimized mode. Only if you're modifying the parser, make this point to a local yacc table file yacc_debug: Generate a parser.out file that explains how yacc built the parsing table from the ammar. """ self.logger = logging.getLogger('parser') self.lex = Lexer( error_func=self._lex_error_func, type_lookup_func=self._lex_type_lookup_func) self.lex.build( optimize=lex_optimize, lextab=lextab) self.tokens = self.lex.tokens self.parser = ply.yacc.yacc( module=self, start='statements', debug=yacc_debug, optimize=yacc_optimize, tabmodule=yacctab) self.addresses = [] self.accounts = {} self.statements = 0 self.totalPages = 0 self.pdf_doc = PDFDoc() self.pdf_doc.InitSecurityHandler() self.timesRoman = Font.Create(self.pdf_doc.GetSDFDoc(), Font.e_times_roman, True) self.courierNew = Font.Create(self.pdf_doc.GetSDFDoc(), Font.e_courier, True) self.eb = ElementBuilder() self.writer = ElementWriter() def parse(self, text, filename='', debuglevel=0): """ Parses a file and returns a pdf. text: A string containing the C source code filename: Name of the file being parsed (for meaningful error messages) debuglevel: Debug level to yacc """ self.lex.filename = filename self.lex.reset_lineno() self._scope_stack = [set()] self.logger.info("_______________________________________________") self.logger.info("parsing input...") if not text or text.isspace(): return [] else: self.logger.info("_______________________________________________") self.logger.info("finished parsing input file...") return self.parser.parse(text, lexer=self.lex, debug=debuglevel) ######################-- PRIVATE --###################### ## ## Precedence and associativity of tokens ## precedence = ( ('left', 'LINENO'), ('left', 'STARTPAGE'), ) def _lex_error_func(self, msg, line, column): self._parse_error(msg, self._coord(line, column)) def _lex_type_lookup_func(self, name): """ Looks up types that were previously defined with typedef. Passed to the lexer for recognizing identifiers that are types. """ return self._is_type_in_scope(name) def p_empty(self, p): 'empty : ' p[0] = '' def p_error(self, p): if p: self._parse_error( 'before: %s' % p.value, self._coord(p.lineno)) else: self._parse_error('At end of input', '') def p_statements(self, p): '''statements : statements pagelist | pagelist''' pass def p_pagelist(self, p): '''pagelist : addrpage | page''' pass def p_addrpage(self, p): '''addrpage : STARTPAGE lines address lines''' if self.addresses[-1].find('******************') == -1: self.totalPages += 1 self.statements += 1 pagestr = '' for s in p[2:]: pagestr = "{0}{1}".format(pagestr, s) self.buildNewPage(pagestr) # add new entry to accts self.accounts[str(self.statements)] = [1, [str(self.totalPages)], self.addresses[-1], [], None] def p_page(self, p): '''page : STARTPAGE lines''' if self.addresses[-1].find('******************') == -1: self.totalPages += 1 pagestr = '' for s in p[2:]: pagestr = "{0}{1}".format(pagestr, s) self.buildNewPage(pagestr) self.accounts[str(self.statements)][0] = self.accounts[str(self.statements)][0] + 1 self.accounts[str(self.statements)][1].append(str(self.totalPages)) def p_address(self, p): '''address : beginaddress lines stopaddress''' self.addresses.append(p[2]) p[0] = "{0}{1}{2}".format(p[1], p[2], p[3]) def p_lines(self, p): '''lines : lines line | line''' if len(p) > 2: p[0] = "{0}{1}".format(p[1], p[2]) else: p[0] = p[1] def p_line(self, p): '''line : linedata LINENO | LINENO''' if len(p) == 3: p[0] = "{0}\n".format(p[1]) else: p[0] = "\n" def p_linedata(self, p): '''linedata : linedata WHITESPACE | linedata NUMBER | linedata TEXT | empty''' if len(p) == 3: p[0] = "{0}{1}".format(p[1], p[2]) else: p[0] = "{0}".format(p[1]) def p_beginaddress(self, p): '''beginaddress : linedata STARTADDRESS LINENO | linedata STARTADDRESS linedata LINENO''' if len(p) == 4: p[0] = "{0}{1}\n".format(p[1], p[2]) else: p[0] = "{0}{1}{2}\n".format(p[1], p[2], p[3]) def p_stopaddress(self, p): '''stopaddress : linedata ENDADDRESS LINENO | linedata ENDADDRESS linedata LINENO''' if len(p) == 4: p[0] = "{0}{1}\n".format(p[1], p[2]) else: p[0] = "{0}{1}{2}\n".format(p[1], p[2], p[3]) ######################-- PDF BUILDING --################ def buildNewPage(self, pagestr): """Create a new page and add the string contents as text elements.""" self.logger.debug("building page {0}...".format(self.totalPages)) page = self.pdf_doc.PageCreate(Rect(0, 0, 612, 794)) self.writer.Begin(page) self.eb.Reset() # begin writing text elements to the current page element = self.eb.CreateTextBegin(self.courierNew, 8) element.SetTextMatrix(1, 0, 0, 1, 30, 750) # last two digits are x, y coords on page measured from LOWER RIGHT corner self.writer.WriteElement(element) # loop over the split string and write each line for item in pagestr.split('\n'): element = self.eb.CreateTextRun(item) element.GetGState().SetLeading(10) # Set the spacing between lines self.writer.WriteElement(element) self.writer.WriteElement(self.eb.CreateTextNewLine()) self.writer.WriteElement(self.eb.CreateTextEnd()) self.writer.End() # add the page to the document self.pdf_doc.PagePushBack(page) def savePDF(self, filename): """Save the current pdf with the input filename.""" self.logger.debug("saving {0}...".format(filename)) self.pdf_doc.Save(filename, SDFDoc.e_compatibility) def closePDF(self): """Close the current pdf.""" self.pdf_doc.Close()
class BashParser(object): def __init__(self): self.lexer = Lexer() self.lexer.build() tokens = Lexer.tokens precedence = ( ("left", 'PLUS', 'MINUS'), ("left", 'TIMES', 'DIVIDE'), ) def p_program(self, p): """program : instructions""" p[0] = AST.Program(p[1]) p[0].printTree(0) def p_instructions(self, p): """ instructions : instruction instructions | instruction """ if len(p) == 2: p[0] = AST.Instructions(p[1], None) if len(p) == 3: p[0] = AST.Instructions(p[1], p[2]) def p_instruction_assignment(self, p): """instruction : identifier ASSIGNMENT expression | identifier ASSIGNMENT double_paren_dollar_prefix_expression """ p[0] = AST.Assignment(p[1], p[3]) def p_instruction_if(self, p): """instruction : IF condition SEMICOLON THEN instructions SEMICOLON FI | IF condition SEMICOLON THEN instructions SEMICOLON ELSE instructions SEMICOLON FI """ if len(p) == 8: p[0] = AST.If(p[2], p[5], None) elif len(p) == 11: p[0] = AST.If(p[2], p[5], p[8]) def p_instruction_while(self, p): '''instruction : WHILE condition SEMICOLON DO instructions DONE ''' p[0] = AST.While(p[2], p[5]) def p_instruction_echo(self, p): """instruction : ECHO expression """ p[0] = AST.Echo(p[2]) def p_instruction_break(self, p): """ instruction : BREAK """ p[0] = AST.Break() def p_instruction_continue(self, p): """ instruction : CONTINUE """ p[0] = AST.Continue() def p_double_paren_dollar_prefix_expression(self, p): """ double_paren_dollar_prefix_expression : DOLLAR DOUBLE_LPAREN expression DOUBLE_RPAREN """ if len(p) == 5: p[0] = AST.DoubleParenArithmetic(p[3]) def p_double_paren_expression(self, p): """ instruction : DOUBLE_LPAREN expression DOUBLE_RPAREN """ if len(p) == 4: p[0] = AST.DoubleParenArithmetic(p[2]) def p_identifier(self, p): """identifier : IDENTIFIER""" p[0] = AST.Identifier(p[1]) def p_comparators(self, p): """comparators : TEST_EQ | TEST_NEQ | TEST_LT | TEST_GT | EQUALS | GT | GTE | LT | LTE """ p[0] = p[1] def p_condition(self, p): """ condition : expression | DOUBLE_LBRACKET expression DOUBLE_RBRACKET | LBRACKET expression RBRACKET """ if len(p) == 4: p[0] = p[2] else: p[0] = p[1] def p_expression(self, p): """expression : identifier | identifier DECREMENT | identifier INCREMENT | const | expression PLUS expression | expression MINUS expression | expression TIMES expression | expression DIVIDE expression | expression comparators expression | LPAREN expression RPAREN""" if len(p) == 2: p[0] = p[1] elif len(p) == 4: if p[1] == '(' and p[3] == ')': p[0] = p[2] else: print("p2 {0}, p1 {1}, p3: {2}".format(p[2],p[1],p[3])) p[0] = AST.BinExpr(p[2], p[1], p[3]) elif len(p) == 3: if p[2] == '--': p[0] = AST.BinExpr('-', p[1], AST.Number(1)) elif p[2] == '++': p[0] = AST.BinExpr('+', p[1], AST.Number(1)) def p_const_number(self, p): """const : NUMBER""" p[0] = AST.Number(p[1]) def p_const_string(self, p): """const : STRING""" p[0] = AST.String(p[1]) def p_variable(self, p): """const : VARIABLE""" p[0] = AST.Variable(p[1]) def p_const_boolean(self, p): """const : BOOLEAN""" p[0] = AST.Boolean(p[1]) ### # Command Call ### def p_instruction_command_call(self, p): """ instruction : command switch_list argument_list | command argument_list | command switch_list | command """ if len(p) == 2: p[0] = AST.CommandCall(p[1], None, None) if len(p) == 3: if isinstance(p[2], AST.ArgumentList): p[0] = AST.CommandCall(p[1], None, p[2]) elif isinstance(p[2], AST.SwitchList): p[0] = AST.CommandCall(p[1], p[2], None) else: p[0] = AST.CommandCall(p[1], None, None) if len(p) == 4: p[0] = AST.CommandCall(p[1], p[2], p[3]) def p_command(self, p): """ command : identifier """ p[0] = AST.Identifier(p[1]) def p_command_switch(self, p): """ switch : MINUS identifier """ p[0] = AST.CommandSwitch(p[2]) def p_command_switch_list(self, p): """ switch_list : switch switch_list""" p[0] = AST.SwitchList([p[1]] + p[2].switch_list) def p_command_switch_list_first(self, p): """ switch_list : switch """ p[0] = AST.SwitchList([p[1]]) def p_command_argument(self, p): """ argument : const """ p[0] = p[1] def p_command_argument_list(self, p): """ argument_list : argument argument_list""" p[0] = AST.ArgumentList([p[1]] + p[2].argument_list) def p_command_argument_list_first(self, p): """ argument_list : argument """ p[0] = AST.ArgumentList([p[1]]) def p_error(self, p): print("Syntax error in input! Token: {0}".format(p))