def clex(filename,ipt): from c_pre import preprocess def errfoo(msg,a,b): import sys print msg sys.exit() def typelookup(namd): return False clex=CLexer(errfoo,typelookup) clex.build() clex.input(ipt,filename) return filename_filter(filename,clex)
def __init__(self, lex_optimize=False, yacc_optimize=False): """ 创建 CParser 对象并初始化。 """ self.clex = CLexer( error_func = self._lex_error_func, on_lbrace_func = self._lex_on_lbrace_func, on_rbrace_func = self._lex_on_rbrace_func, type_lookup_func = self._lex_type_lookup_func) self.clex.build(optimize=lex_optimize) # TODO: 这个好像没有用到 self.tokens = self.clex.tokens # 在此列表中的规则会自动生成一条后缀为 _opt 的规则 # rules_with_opt = [ 'assignment_expression', 'declaration_list', 'expression', 'identifier_list', 'init_declarator_list', 'initializer_list', 'block_item_list', 'type_qualifier_list', 'declaration_specifiers' ] for rule in rules_with_opt: self._create_opt_rule(rule) self.cparser = yacc.yacc(module=self, start='translation_unit_or_empty', debug=True,optimize=yacc_optimize) # 名称范围栈 # 如果 _scope_stack[n][name]为True,则 name 在当前范围内被定义为 type # 如果为 False,则 name 在当前范围内被定义为 identifier # 如果不存在,则 name 未在此范围内定义 # _scope_stack[-1] 表示当前所在的范围 self._scope_stack = [dict()]
class CParser(BaseParser): """ 语法分析器,生成抽象语法树。 调用 parse() 对文本进行语法分析,获得抽象语法树。返回值是一个包含若干个 翻译单元的列表。 Attributes: """ def __init__(self, lex_optimize=False, yacc_optimize=False): """ 创建 CParser 对象并初始化。 """ self.clex = CLexer( error_func = self._lex_error_func, on_lbrace_func = self._lex_on_lbrace_func, on_rbrace_func = self._lex_on_rbrace_func, type_lookup_func = self._lex_type_lookup_func) self.clex.build(optimize=lex_optimize) # TODO: 这个好像没有用到 self.tokens = self.clex.tokens # 在此列表中的规则会自动生成一条后缀为 _opt 的规则 # rules_with_opt = [ 'assignment_expression', 'declaration_list', 'expression', 'identifier_list', 'init_declarator_list', 'initializer_list', 'block_item_list', 'type_qualifier_list', 'declaration_specifiers' ] for rule in rules_with_opt: self._create_opt_rule(rule) self.cparser = yacc.yacc(module=self, start='translation_unit_or_empty', debug=True,optimize=yacc_optimize) # 名称范围栈 # 如果 _scope_stack[n][name]为True,则 name 在当前范围内被定义为 type # 如果为 False,则 name 在当前范围内被定义为 identifier # 如果不存在,则 name 未在此范围内定义 # _scope_stack[-1] 表示当前所在的范围 self._scope_stack = [dict()] def parse(self, text, filename=''): """ 解析C语言代码并生成抽象语法树 """ self.clex.filename = filename return self.cparser.parse(input=text, lexer=self.clex) #################### PRIVATE #################### def _push_scope(self): self._scope_stack.append(dict()) def _pop_scope(self): assert len(self._scope_stack) > 1 self._scope_stack.pop() def _add_typedef_name(self, name, coord): """ 在当前范围内将 name 添加为一个 typedef_name。 """ # 只有 name 在当前范围内没有定义或者已定义为 type 时才可以定义 # 若在此范围内已定义其为 identifier,则不能再定义为 type # if not self._scope_stack[-1].get(name, True): self._parse_error("%r在此范围内已被定义为identifier," "不能再被定义为type" % name, coord) self._scope_stack[-1][name] = True def _add_identifier(self, name, coord): """ 在当前范围内将 name 添加为一个 identifier。 """ # 只有name在当前范围内没有定义或者已定义为identifier时才可以定义 # 若在此范围内已定义其为type,则不能再定义为identifier # if self._scope_stack[-1].get(name, False): self._parse_error("%r在此范围内已被定义为type," "不能再被定义为identifier" % name, coord) self._scope_stack[-1][name] = False def _is_type_in_scope(self, name): """ 判断name是否在 scope 中已被定义为 type """ for scope in reversed(self._scope_stack): # 如果在多个范围内都定义过则根据所在最近的范围判断 in_scope = scope.get(name) if in_scope is not None: return in_scope return False def _lex_on_lbrace_func(self): self._push_scope() def _lex_on_rbrace_func(self): self._pop_scope() def _lex_type_lookup_func(self, name): return self._is_type_in_scope(name) def _lex_error_func(self, msg, line, column): self._parse_error(msg, self._coord(line, column)) def _get_lookahead_token(self): return self.clex.last_token def _type_modify_decl(self, decl, modifier): """ 修改 declaration 的修饰符。 declaration 是一个结点链的结构,TypeDecl 在链的最末端,而修饰它的内容都在 它的前面。 此函数将新的 modifier 插入到原先的 modifiers 末尾,即 TypeDecl 的前面。 NOTE: 函数可能会修改 decl 和 modifier Args: decl: 可能是一个 TypeDecl,也可能是一个已经被修饰的结点链 modifier: 修饰符,如 ArrayDecl, FuncDecl等 Returns: 修改后的 declaration 结点链。 """ modifier_head = modifier modifier_tail = modifier while modifier_tail.type: modifier_tail = modifier_tail.type if isinstance(decl, c_ast.TypeDecl): modifier_tail.type = decl return modifier else: decl_tail = decl while not isinstance(decl_tail.type, c_ast.TypeDecl): decl_tail = decl_tail.type # 把modifier的修饰词插到decl原来的修饰词末尾 # modifier_tail.type = decl_tail.type decl_tail.type = modifier_head return decl def _fix_decl_name_type(self, decl, typename): """ 修正 declaration. 因为 type 是在最外层识别的,所以最内层的 TypeDecl 并没有 type. 同时声明的 name 位于最内层的 TypeDecl 中,因此整个 declaration 没有 name. 这个函数是主要为了解决这两个问题。 Args: decl: 以 Typedef 或 Decl 为首的结点链 typename: 包含一个或多个 type-specifier 结点的列表,可以是一个 IdentifierType,Enum 或 Struct Returns: 修正后的 declaration """ # 获取最内层的 TypeDecl # type = decl.type while not isinstance(type, c_ast.TypeDecl): type = type.type decl.name = type.declname type.quals = decl.quals # 只允许多个 IdentifierType # 或者单个 其它Type # 比如,不允许出现 int enum .. # for tn in typename: if not isinstance(tn, c_ast.IdentifierType): if len(typename) > 1: self._parse_error('不合法的多类型说明', tn.coord) else: type.type = tn return decl if not typename: # 函数声明可以不写 type,缺省值为 int # if isinstance(decl.type, c_ast.FuncDecl): type.type = c_ast.IdentifierType(['int'], coord=decl.coord) else: self._parse_error('声明中缺少类型', decl.coord) else: # 将多个类型名合并成一个 IdentifierType 结点 # type.type = c_ast.IdentifierType( [name for id in typename for name in id.names], coord=typename[0].coord ) return decl def _add_declaration_specifier(self, declspec, newspec, kind, append=False): spec = declspec or dict(qual=[], storage=[], type=[], function=[]) spec[kind].append(newspec) if append else spec[kind].insert(0, newspec) return spec def _build_declarations(self, spec, decls, typedef_namespace=False): """ 构建 declarations. Args: spec: 一个 dict: {qual=[], storage=[], type=[], function=[]} 来自 declaration_specifiers decls: dict 列表:[{decl=, init=}],来自 init_declarator_list typedef_namespace: Returns: 构建好的 declarations 列表,元素为 Typedef 或 Decl. """ is_typedef = 'typedef' in spec['storage'] declarations = [] for decl in decls: assert decl['decl'] is not None if is_typedef: declaration = c_ast.Typedef( name=None, quals=spec['qual'], storage=spec['storage'], type=decl['decl'], coord=decl['decl'].coord ) else: declaration = c_ast.Decl( name=None, quals=spec['qual'], storage=spec['storage'], funcspec=spec['function'], type=decl['decl'], init = decl.get('init'), bitsize=None, # TODO: Parser全部测试完成后删除此参数 coord=decl['decl'].coord ) if isinstance(declaration.type, (c_ast.Struct, c_ast.IdentifierType)): fixed_decl = declaration else: fixed_decl = self._fix_decl_name_type(declaration, spec['type']) # 添加到 scope 中,在语法分析器中使用 # if typedef_namespace: if is_typedef: self._add_typedef_name(fixed_decl.name, fixed_decl.coord) else: self._add_identifier(fixed_decl.name, fixed_decl.coord) declarations.append(fixed_decl) return declarations def _build_function_definition(self, spec, decl, param_decls, body): """ 构建函数定义。 Args: spec: 函数类型说明 decl: 函数声明 param_decls: body: 函数体 Returns: 一个 FuncDef 结点。 """ declaration = self._build_declarations( spec=spec, decls=[dict(decl=decl, init=None)], typedef_namespace=True )[0] return c_ast.FuncDef( decl=declaration, param_decls=param_decls, body=body, coord=decl.coord ) # 规定运算符的优先级和结合性(升序) # 参考 https://zh.cppreference.com/w/c/language/operator_precedence # precedence = ( ('left', 'LOR'), ('left', 'LAND'), ('left', 'OR'), ('left', 'XOR'), ('left', 'AND'), ('left', 'EQ', 'NE'), ('left', 'GT', 'GE', 'LT', 'LE'), ('left', 'RSHIFT', 'LSHIFT'), ('left', 'PLUS', 'MINUS'), ('left', 'TIMES', 'DIVIDE', 'MOD') ) # 文法规则 # ''' # def p_function_specifier(self, p): def p_type_name(self, p): """ type_name : specifier_qualifier_list abstract_declarator_opt """ typename = c_ast.Typename( name='', quals=p[1]['qual'], type=p[2] or c_ast.TypeDecl(None, None, None), coord=self._token_coord(p, 2) ) p[0] = self._fix_decl_name_type(typename, p[1]['type']) def p_abstract_declarator_1(self, p): """ abstract_declarator : pointer """ p[0] = self._type_modify_decl( decl=c_ast.TypeDecl(None, None, None), modifier=p[1] ) def p_abstract_declarator_2(self, p): """ abstract_declarator : pointer direct_abstract_declarator """ p[0] = self._type_modify_decl(p[2], p[1]) def p_abstract_declarator_3(self, p): """ abstract_declarator : direct_abstract_declarator """ p[0] = p[1] def p_direct_abstract_declarator_1(self, p): """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """ p[0] = p[2] def p_direct_abstract_declarator_2(self, p): """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET """ arr = c_ast.ArrayDecl( type=None, dim=p[3], # dim_quals=[], coord=p[1].coord ) p[0] = self._type_modify_decl(p[1], arr) def p_direct_abstract_declarator_3(self, p): """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET """ quals = p[2] or [] p[0] = c_ast.ArrayDecl( type=c_ast.TypeDecl(None, None, None), dim=p[3], # dim_quals=quals, coord=self._token_coord(p, 1) ) def p_direct_abstract_declarator_4(self, p): """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET """ arr = c_ast.ArrayDecl( type=None, dim=c_ast.ID(p[3], self._token_coord(p, 3)), dim_quals=[], coord=p[1].coord ) p[0] = self._type_modify_decl(decl=p[1], modifier=arr) def p_direct_abstract_declarator_5(self, p): """ direct_abstract_declarator : LBRACKET TIMES RBRACKET """ p[0] = c_ast.ArrayDecl( type=c_ast.TypeDecl(None, None, None), dim=c_ast.ID(p[3], self._token_coord(p, 3)), coord=self._token_coord(p, 1) ) def p_direct_abstract_declarator_6(self, p): """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN """ func = c_ast.FuncDecl( args=p[3], type=None, coord=p[1].coord ) p[0] = self._type_modify_decl(p[1], func) def p_direct_abstract_declarator_7(self, p): """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN """ p[0] = c_ast.FuncDecl( args=p[2], type=c_ast.TypeDecl(None, None, None), coord=self._token_coord(p, 1) ) ''' ###### external deinitions 部分 ###### def p_translation_unit_or_empty(self, p): """ translation_unit_or_empty : translation_unit | empty """ if p[1] is None: p[0] = c_ast.FileAST([]) else: p[0] = c_ast.FileAST(p[1]) def p_translation_unit(self, p): """ translation_unit : external_declaration | translation_unit external_declaration """ if len(p) == 2: p[0] = p[1] else: p[1].extend(p[2]) p[0] = p[1] def p_external_declaration_1(self, p): """ external_declaration : function_defination """ p[0] = [p[1]] def p_external_declaration_2(self, p): """ external_declaration : declaration """ p[0] = p[1] def p_function_defination(self, p): """ function_defination : declaration_specifiers declarator declaration_list_opt compound_statement """ spec = p[1] p[0] = self._build_function_definition( spec=spec, decl=p[2], param_decls=p[3], body=p[4]) # NOTE: declaration 本身就是列表 # def p_declaration_list(self, p): """ declaration_list : declaration | declaration_list declaration """ p[0] = p[1] if len(p) == 2 else p[1] + p[2] ###### statement 部分 ###### # 不考虑 labeled 语句 # 不考虑 switch 语句 def p_statement(self, p): """ statement : expression_statement | compound_statement | selection_statement | iteration_statement | jump_statement """ p[0] = p[1] def p_compound_statement(self, p): """ compound_statement : brace_open block_item_list_opt brace_close """ p[0] = c_ast.Compound(block_items=p[2], coord=self._token_coord(p, 1)) def p_block_item_list(self, p): """ block_item_list : block_item | block_item_list block_item """ if len(p) == 3: assert p[2] != [None] # TODO: 测试用代码,完成后移除 p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2] def p_block_item(self, p): """ block_item : declaration | statement """ p[0] = p[1] if isinstance(p[1], list) else [p[1]] def p_expression_statement(self, p): """ expression_statement : expression_opt SEMI """ if p[1] is None: p[0] = c_ast.EmptyStatement(self._token_coord(p, 2)) else: p[0] = p[1] def p_selection_statement_1(self, p): """ selection_statement : IF LPAREN expression RPAREN statement """ p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1)) def p_selection_statement_2(self, p): """ selection_statement : IF LPAREN expression RPAREN statement ELSE statement """ p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1)) def p_iteration_statement_1(self, p): """ iteration_statement : WHILE LPAREN expression RPAREN statement """ p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1)) def p_iteration_statement_2(self, p): """ iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI """ p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1)) def p_iteration_statement_3(self, p): """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement """ p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1)) def p_iteration_statement_4(self, p): """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN statement """ p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)), p[4], p[6], p[8], self._token_coord(p, 1)) def p_jump_statement_1(self, p): """ jump_statement : BREAK SEMI """ p[0] = c_ast.Break(self._token_coord(p, 1)) def p_jump_statement_2(self, p): """ jump_statement : CONTINUE SEMI """ p[0] = c_ast.Continue(self._token_coord(p, 1)) def p_jump_statement_3(self, p): """ jump_statement : RETURN expression SEMI | RETURN SEMI """ p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1)) ###### declaration 部分 ###### # 不考虑 init_declarator_list 为空的情况 # NOTE: declaration 是一个列表 # def p_declaration(self, p): """ declaration : declaration_specifiers init_declarator_list SEMI """ spec = p[1] decls = self._build_declarations( spec=spec, decls=p[2], typedef_namespace=True) p[0] = decls def p_init_declarator_list(self, p): """ init_declarator_list : init_declarator | init_declarator_list COMMA init_declarator """ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[3]] # init_declarator 是一个 dict # def p_init_declarator(self, p): """ init_declarator : declarator | declarator EQUALS initializer """ p[0] = dict(decl=p[1], init=(p[3] if len(p) == 4 else None)) ###### declarator 部分 ###### def p_declarator(self, p): """ declarator : direct_declarator | pointer direct_declarator """ p[0] = p[1] if len(p) == 2 else self._type_modify_decl(p[2], p[1]) def p_direct_declarator_1(self, p): """ direct_declarator : ID """ p[0] = c_ast.TypeDecl( declname=p[1], type=None, quals=None, coord=self._token_coord(p, 1)) def p_direct_declarator_2(self, p): """ direct_declarator : LPAREN declarator RPAREN """ p[0] = p[2] def p_direct_declarator_3(self, p): """ direct_declarator : direct_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET """ arr = c_ast.ArrayDecl( type=None, dim=p[4], dim_quals=p[3] or [], coord=p[1].coord) p[0] = self._type_modify_decl(decl=p[1], modifier=arr) def p_direct_declarator_4(self, p): """ direct_declarator : direct_declarator LBRACKET STATIC type_qualifier_list assignment_expression RBRACKET | direct_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET """ dim_quals = p[3] + [p[4]] if isinstance(p[3], list) else p[4] + [p[3]] arr = c_ast.ArrayDecl( type=None, dim=p[5], dim_quals=dim_quals, coord=p[1].coord) p[0] = self._type_modify_decl(decl=p[1], modifier=arr) def p_direct_declarator_6(self, p): """ direct_declarator : direct_declarator LPAREN identifier_list_opt RPAREN | direct_declarator LPAREN parameter_type_list RPAREN """ func = c_ast.FuncDecl(args=p[3], type=None, coord=p[1].coord) # TODO: 可以考虑移除此部分 if self._get_lookahead_token().type == "LBRACE": if func.args is not None: for param in func.args.params: self._add_identifier(param.name, param.coord) p[0] = self._type_modify_decl(decl=p[1], modifier=func) def p_identifier_list(self, p): """ identifier_list : identifier | identifier_list COMMA identifier """ if len(p) == 2: p[0] = c_ast.ParamList([p[1]], p[1].coord) else: p[1].params.append(p[3]) p[0] = p[1] # 形参列表,函数声明时使用 # def p_parameter_type_list(self, p): """ parameter_type_list : parameter_list """ p[0] = p[1] def p_parameter_list(self, p): """ parameter_list : parameter_declaration | parameter_list COMMA parameter_declaration """ if len(p) == 2: p[0] = c_ast.ParamList([p[1]], p[1].coord) else: p[1].params.append(p[3]) p[0] = p[1] def p_parameter_declaration_1(self, p): """ parameter_declaration : declaration_specifiers declarator """ spec = p[1] # 形参缺省类型为 int # if not spec['type']: spec['type'] = [c_ast.IdentifierType(['int'], coord=self._token_coord(p, 1))] # _build_declarations() 函数返回一个 list # 而此处只涉及一个 declaration p[0] = self._build_declarations(spec=spec, decls=[dict(decl=p[2])])[0] # TODO # def p_parameter_declaration_2(self, p): # """ parameter_declaration : declaration_specifiers abstract_declarator_opt """ # if not spec['type']: # spec['type'] = [c_ast.IdentifierType( # ['int'], coord=self._token_coord(p, 1))] # # # 需要注意的是,declaration_specifiers 是一个 dict # 而不是像其它的非终结符一样是一个 Node # def p_declaration_specifiers_1(self, p): """ declaration_specifiers : storage_class_specifier declaration_specifiers_opt """ p[0] = self._add_declaration_specifier(p[2], p[1], 'storage') def p_declaration_specifiers_2(self, p): """ declaration_specifiers : type_specifier declaration_specifiers_opt """ p[0] = self._add_declaration_specifier(p[2], p[1], 'type') def p_declaration_specifiers_3(self, p): """ declaration_specifiers : type_qualifier declaration_specifiers_opt """ p[0] = self._add_declaration_specifier(p[2], p[1], 'qual') # 例如 char * const * p,意为 pointer to const pointer to char # char ** const p,意为 const pointer to pointer to char # 所以,最前面的 pointer 应该在最内层 # def p_pointer(self, p): """ pointer : TIMES type_qualifier_list_opt | TIMES type_qualifier_list_opt pointer """ nested = c_ast.PtrDecl( quals=p[2] or [], type=None, coord=self._token_coord(p, 1) ) if len(p) > 3: tail_type = p[3] while tail_type.type is not None: tail_type = tail_type.type tail_type.type = nested p[0] = p[3] else: p[0] = nested def p_type_qualifier_list(self, p): """ type_qualifier_list : type_qualifier | type_qualifier_list type_qualifier """ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] def p_type_qualifier(self, p): """ type_qualifier : CONST | VOLATILE """ p[0] = p[1] def p_storage_class_specifier(self, p): """ storage_class_specifier : AUTO | REGISTER | STATIC | EXTERN | TYPEDEF """ p[0] = p[1] # 不考虑 Union # def p_type_specifier(self, p): """ type_specifier : type_specifier_simple | enum_specifier | struct_specifier """ p[0] = p[1] def p_type_specifier_simple(self, p): """ type_specifier_simple : VOID | CHAR | SHORT | INT | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED | TYPEID """ p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) ###### enum 部分 ###### def p_enum_specifier_1(self, p): """ enum_specifier : ENUM ID """ p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1)) def p_enum_specifier_2(self, p): """ enum_specifier : ENUM brace_open enumerator_list brace_close """ p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1)) def p_enum_specifier_3(self, p): """ enum_specifier : ENUM ID brace_open enumerator_list brace_close """ p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1)) def p_enumerator_list(self, p): """ enumerator_list : enumerator | enumerator_list COMMA | enumerator_list COMMA enumerator """ if len(p) == 2: p[0] = c_ast.EnumeratorList([p[1]], p[1].coord) elif len(p) == 3: p[0] = p[1] else: p[1].enumerators.append(p[3]) p[0] = p[1] def p_enumerator(self, p): """ enumerator : ID | ID EQUALS constant_expression """ if len(p) == 2: enumerator = c_ast.Enumerator( p[1], None, self._token_coord(p, 1)) else: enumerator = c_ast.Enumerator( p[1], p[3], self._token_coord(p, 1)) # 在当前范围内将其声明为 identifier,防止后续出现重复的名字 self._add_identifier(enumerator.name, enumerator.coord) p[0] = enumerator ###### struct 部分 ###### def p_struct_specifier_1(self, p): """ struct_specifier : STRUCT ID """ p[0] = c_ast.Struct( name=p[2], decls=None, coord=self._token_coord(p, 2) ) def p_struct_specifier_2(self, p): """ struct_specifier : STRUCT brace_open struct_declaration_list brace_close """ p[0] = c_ast.Struct( name=None, decls=p[3], coord=self._token_coord(p, 2) ) def p_struct_specifier_3(self, p): """ struct_specifier : STRUCT ID brace_open struct_declaration_list brace_close """ p[0] = c_ast.Struct( name=p[2], decls=p[4], coord=self._token_coord(p, 2) ) def p_struct_declaration_list(self, p): """ struct_declaration_list : struct_declaration | struct_declaration_list struct_declaration """ if len(p) == 2: p[0] = p[1] or [] else: p[0] = p[1] + (p[2] or []) # 一个 list 或 None # def p_struct_declaration(self, p): """ struct_declaration : specifier_qualifier_list struct_declarator_list SEMI """ spec = p[1] assert 'typedef' not in spec['storage'] # # TODO 这块给写漏了 # specifier-qualifier-list 是一个 dict, # dict 中包含 quals,storage,type 三个 list # specifier-qualifier-list 至少要包含一个 type-specifier # [!] 第一条的意义尚不明确 # def p_specifier_qualifier_list_1(self, p): """ specifier_qualifier_list : specifier_qualifier_list type_specifier """ p[0] = p[1].type.append(p[2]) # p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) def p_specifier_qualifier_list_2(self, p): """ specifier_qualifier_list : specifier_qualifier_list type_qualifier """ p[0] = p[1].qual.append(p[2]) # p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True) def p_specifier_qualifier_list_3(self, p): """ specifier_qualifier_list : type_specifier """ p[0] = dict(qual=[], sotrage=[], type=[p[1]], function=[]) # p[0] = self._add_declaration_specifier(None, p[1], 'type') def p_specifier_qualifier_list_4(self, p): """ specifier_qualifier_list : type_qualifier_list type_specifier """ p[0] = dict(qual=[p[1]], sotrage=[], type=[p[1]], function=[]) # p[0] = self._add_declaration_specifier( # declspec=dict(qual=p[1], storage=[], type=[], function=[]), # newspec=p[2], # kind='type', # append=True # ) def p_struct_declarator_list(self, p): """ struct_declarator_list : struct_declarator | struct_declarator COMMA struct_declarator """ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] # 不考虑 bit fields # def p_struct_declarator(self, p): """ struct_declarator : declarator """ p[0] = p[1] # def p_typedef_name(self, p): # """ typedef_name : TYPEID """ # p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) ###### initializer部分 ###### # 不考虑designator # def p_initializer_1(self, p): """ initializer : assignment_expression """ p[0] = p[1] def p_initializer_2(self, p): """ initializer : brace_open initializer_list_opt brace_close | brace_open initializer_list COMMA brace_close """ if p[2] is None: p[0] = c_ast.InitList([], self._token_coord(p, 1)) else: p[0] = p[2] def p_initializer_list(self, p): """ initializer_list : initializer | initializer_list COMMA initializer """ if len(p) == 2: p[0] = c_ast.InitList([p[1]], p[1].coord) else: p[1].exprs.append(p[3]) p[0] = p[1] ###### expression 部分 ###### def p_constant_expression(self, p): """ constant_expression : conditional_expression """ p[0] = p[1] # 表达式 # 例如 a = 1 # 或 a = 1, b = 2, c # def p_expression(self, p): """ expression : assignment_expression | expression COMMA assignment_expression """ if len(p) == 2: p[0] = p[1] else: if not isinstance(p[1], c_ast.ExprList): p[1] = c_ast.ExprList([p[1]], p[1].coord) p[1].exprs.append(p[3]) p[0] = p[1] # 赋值表达式(优先级14) # C语言标准规定,赋值运算符的左运算数必须是一元(第 2 级非转型)表达式。 # def p_assignment_expression(self, p): """ assignment_expression : conditional_expression | unary_expression assignment_operator assignment_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord) # 赋值运算符 # def p_assignment_operator(self, p): """ assignment_operator : EQUALS | XOREQUAL | TIMESEQUAL | DIVEQUAL | MODEQUAL | PLUSEQUAL | MINUSEQUAL | LSHIFTEQUAL | RSHIFTEQUAL | ANDEQUAL | OREQUAL """ p[0] = p[1] # 三元条件表达式(优先级13) # 例如:a > b ? a : b # def p_conditional_expression(self, p): """ conditional_expression : binary_expression | binary_expression CONDOP expression COLON conditional_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord) # 二元表达式(优先级3-12) # 将优先级3到12的二元表达式的归约都列于此 # 它们在归约时的优先级关系和结合性由前面的 precedence元组规定 # def p_binary_expression(self, p): """ binary_expression : cast_expression | binary_expression TIMES binary_expression | binary_expression DIVIDE binary_expression | binary_expression MOD binary_expression | binary_expression PLUS binary_expression | binary_expression MINUS binary_expression | binary_expression LSHIFT binary_expression | binary_expression RSHIFT binary_expression | binary_expression GT binary_expression | binary_expression GE binary_expression | binary_expression LT binary_expression | binary_expression LE binary_expression | binary_expression EQ binary_expression | binary_expression NE binary_expression | binary_expression AND binary_expression | binary_expression XOR binary_expression | binary_expression OR binary_expression | binary_expression LAND binary_expression | binary_expression LOR binary_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord) # 类型转换表达式(优先级2) # 例如 (int)bar # 由于C语言标准中规定前缀自增与自减的运算数不能是转型 # 所以对转型的归约单独列出 # def p_cast_expression_1(self, p): """ cast_expression : unary_expression """ p[0] = p[1] # # def p_cast_expression_2(self, p): # """ cast_expression : LPAREN type_name RPAREN cast_expression """ # p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1)) # 一元表达式(优先级2) # 包含对前缀自增和自减的归约 # 以及对其它一元运算符的归约 # def p_unary_expression_1(self, p): """ unary_expression : postfix_expression """ p[0] = p[1] def p_unary_expression_2(self, p): """ unary_expression : PLUSPLUS unary_expression | MINUSMINUS unary_expression | unary_operator cast_expression """ p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord) def p_unary_operator(self, p): """ unary_operator : AND | TIMES | PLUS | MINUS | NOT | LNOT """ p[0] = p[1] # 实参列表,函数调用时使用 # def p_argument_expression_list(self, p): """ argument_expression_list : assignment_expression | argument_expression_list COMMA assignment_expression """ if len(p) == 2: p[0] = c_ast.ExprList([p[1]], p[1].coord) else: p[1].exprs.append(p[3]) p[0] = p[1] # 后缀表达式(优先级1) # 包含对数组引用、函数调用和Struct结构引用 # 以及后缀自增和自减的归约 # def p_postfix_expression_1(self, p): """ postfix_expression : primary_expression """ p[0] = p[1] def p_postfix_expression_2(self, p): """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """ p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) def p_postfix_expression_3(self, p): """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN | postfix_expression LPAREN RPAREN """ p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord) def p_postfix_expression_4(self, p): """ postfix_expression : postfix_expression PERIOD identifier | postfix_expression ARROW identifier """ # field = c_ast.ID(p[3], self._token_coord(p, 3)) # p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord) p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord) def p_postfix_expression_5(self, p): """ postfix_expression : postfix_expression PLUSPLUS | postfix_expression MINUSMINUS """ # 添加前缀p以表示这是后缀的运算符,与上面前缀的进行区分 p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord) # 基本表达式,优先级最高 # 表达式加括号后优先级也成为最高 # def p_primary_expression_1(self, p): """ primary_expression : identifier | constant | string_literal """ p[0] = p[1] def p_primary_expression_2(self, p): """ primary_expression : LPAREN expression RPAREN """ p[0] = p[2] # def p_offsetof_member_designator def p_identifier(self, p): """ identifier : ID """ p[0] = c_ast.ID(p[1], self._token_coord(p, 1)) # 整数、浮点数和字符常量 # def p_constant_1(self, p): """ constant : INT_CONST """ uCount = 0 lCount = 0 for x in p[1][-3:]: if x in ('l', 'L'): lCount += 1 elif x in ('u', 'U'): uCount += 1 if uCount > 1: raise ValueError('常量尾缀错误,含有多于1个u/U') elif lCount > 2: raise ValueError('常量尾缀错误,含有多余2个l/L') prefix = 'unsigned ' * uCount + 'long ' * lCount p[0] = c_ast.Constant(prefix + 'int', p[1], self._token_coord(p, 1)) def p_constant_2(self, p): """ constant : FLOAT_CONST """ if p[1][-1] in ('f', 'F'): t = 'float' elif p[1][-1] in ('l', 'L'): t = 'long double' else: t = 'double' p[0] = c_ast.Constant(t, p[1], self._token_coord(p, 1)) def p_constant_3(self, p): """ constant : CHAR_CONST """ p[0] = c_ast.Constant('char', p[1], self._token_coord(p, 1)) # 字符串 # def p_string_literal(self, p): """ string_literal : STRING_LITERAL """ p[0] = c_ast.Constant( 'string', p[1].replace('\n', '\\n'), self._token_coord(p, 1)) def p_brace_open(self, p): """ brace_open : LBRACE """ p[0] = p[1] p.set_lineno(0, p.lineno(1)) def p_brace_close(self, p): """ brace_close : RBRACE """ p[0] = p[1] p.set_lineno(0, p.lineno(1)) # 定义空字 ε # def p_empty(self, p): """ empty : """ p[0] = None # 错误处理 # def p_error(self, p): if p: self._parse_error( '在 {} 之前'.format(p.value), self._coord(lineno=p.lineno, column=self.clex.token_column(p)) ) else: self._parse_error('到达文件结尾', self.clex.filename)
def __init__( self, lex_optimize=True, lextab='pycparser.lextab', yacc_optimize=True, yacctab='pycparser.yacctab', yacc_debug=False): """ Create a new CParser. Some arguments for controlling the debug/optimization level of the parser are provided. The defaults are tuned for release/performance mode. The simple rules for using them are: *) When tweaking CParser/CLexer, set these to False *) When releasing a stable parser, set to True lex_optimize: Set to False when you're modifying the lexer. Otherwise, changes in the lexer won't be used, if some lextab.py file exists. When releasing with a stable lexer, set to True to save the re-generation of the lexer table on each run. lextab: Points to the lex table that's used for optimized mode. Only if you're modifying the lexer and want some tests to avoid re-generating the table, make this point to a local lex table file (that's been earlier generated with lex_optimize=True) yacc_optimize: Set to False when you're modifying the parser. Otherwise, changes in the parser won't be used, if some parsetab.py file exists. When releasing with a stable parser, set to True to save the re-generation of the parser table on each run. yacctab: Points to the yacc table that's used for optimized mode. Only if you're modifying the parser, make this point to a local yacc table file yacc_debug: Generate a parser.out file that explains how yacc built the parsing table from the grammar. """ self.clex = CLexer( error_func=self._lex_error_func, type_lookup_func=self._lex_type_lookup_func) self.clex.build( optimize=lex_optimize, lextab=lextab) self.tokens = self.clex.tokens rules_with_opt = [ 'abstract_declarator', 'constant_expression', 'declaration_list', 'declaration_specifiers', 'expression', 'identifier_list', 'init_declarator_list', 'parameter_type_list', 'specifier_qualifier_list', 'statement_list', 'type_qualifier_list', ] for rule in rules_with_opt: self._create_opt_rule(rule) self.cparser = ply.yacc.yacc( module=self, start='translation_unit', debug=yacc_debug, optimize=yacc_optimize, tabmodule=yacctab) # A table of identifiers defined as typedef types during # parsing. # self.typedef_table = set([])
class CParser(PLYParser): def __init__( self, lex_optimize=True, lextab='pycparser.lextab', yacc_optimize=True, yacctab='pycparser.yacctab', yacc_debug=False): """ Create a new CParser. Some arguments for controlling the debug/optimization level of the parser are provided. The defaults are tuned for release/performance mode. The simple rules for using them are: *) When tweaking CParser/CLexer, set these to False *) When releasing a stable parser, set to True lex_optimize: Set to False when you're modifying the lexer. Otherwise, changes in the lexer won't be used, if some lextab.py file exists. When releasing with a stable lexer, set to True to save the re-generation of the lexer table on each run. lextab: Points to the lex table that's used for optimized mode. Only if you're modifying the lexer and want some tests to avoid re-generating the table, make this point to a local lex table file (that's been earlier generated with lex_optimize=True) yacc_optimize: Set to False when you're modifying the parser. Otherwise, changes in the parser won't be used, if some parsetab.py file exists. When releasing with a stable parser, set to True to save the re-generation of the parser table on each run. yacctab: Points to the yacc table that's used for optimized mode. Only if you're modifying the parser, make this point to a local yacc table file yacc_debug: Generate a parser.out file that explains how yacc built the parsing table from the grammar. """ self.clex = CLexer( error_func=self._lex_error_func, type_lookup_func=self._lex_type_lookup_func) self.clex.build( optimize=lex_optimize, lextab=lextab) self.tokens = self.clex.tokens rules_with_opt = [ 'abstract_declarator', 'constant_expression', 'declaration_list', 'declaration_specifiers', 'expression', 'identifier_list', 'init_declarator_list', 'parameter_type_list', 'specifier_qualifier_list', 'statement_list', 'type_qualifier_list', ] for rule in rules_with_opt: self._create_opt_rule(rule) self.cparser = ply.yacc.yacc( module=self, start='translation_unit', debug=yacc_debug, optimize=yacc_optimize, tabmodule=yacctab) # A table of identifiers defined as typedef types during # parsing. # self.typedef_table = set([]) def parse(self, text, filename='', debuglevel=0): """ Parses C code and returns an AST. text: A string containing the C source code filename: Name of the file being parsed (for meaningful error messages) debuglevel: Debug level to yacc """ self.clex.filename = filename self.clex.reset_lineno() self.typedef_table = set([]) return self.cparser.parse(text, lexer=self.clex, debug=debuglevel) ######################-- PRIVATE --###################### def _lex_error_func(self, msg, line, column): self._parse_error(msg, self._coord(line, column)) def _lex_type_lookup_func(self, name): """ Looks up types that were previously defined with typedef. Passed to the lexer for recognizing identifiers that are types. """ return name in self.typedef_table def _add_typedef_type(self, name): """ Adds names that were defined as new types with typedef. """ self.typedef_table.add(name) # To understand what's going on here, read sections A.8.5 and # A.8.6 of K&R2 very carefully. # # A C type consists of a basic type declaration, with a list # of modifiers. For example: # # int *c[5]; # # The basic declaration here is 'int x', and the pointer and # the array are the modifiers. # # Basic declarations are represented by TypeDecl (from module # c_ast) and the modifiers are FuncDecl, PtrDecl and # ArrayDecl. # # The standard states that whenever a new modifier is parsed, # it should be added to the end of the list of modifiers. For # example: # # K&R2 A.8.6.2: Array Declarators # # In a declaration T D where D has the form # D1 [constant-expression-opt] # and the type of the identifier in the declaration T D1 is # "type-modifier T", the type of the # identifier of D is "type-modifier array of T" # # This is what this method does. The declarator it receives # can be a list of declarators ending with TypeDecl. It # tacks the modifier to the end of this list, just before # the TypeDecl. # # Additionally, the modifier may be a list itself. This is # useful for pointers, that can come as a chain from the rule # p_pointer. In this case, the whole modifier list is spliced # into the new location. # def _type_modify_decl(self, decl, modifier): """ Tacks a type modifier on a declarator, and returns the modified declarator. Note: the declarator and modifier may be modified """ #~ print '****' #~ decl.show(offset=3) #~ modifier.show(offset=3) #~ print '****' modifier_head = modifier modifier_tail = modifier # The modifier may be a nested list. Reach its tail. # while modifier_tail.type: modifier_tail = modifier_tail.type # If the decl is a basic type, just tack the modifier onto # it # if isinstance(decl, c_ast.TypeDecl): modifier_tail.type = decl return modifier else: # Otherwise, the decl is a list of modifiers. Reach # its tail and splice the modifier onto the tail, # pointing to the underlying basic type. # decl_tail = decl while not isinstance(decl_tail.type, c_ast.TypeDecl): decl_tail = decl_tail.type modifier_tail.type = decl_tail.type decl_tail.type = modifier_head return decl # Due to the order in which declarators are constructed, # they have to be fixed in order to look like a normal AST. # # When a declaration arrives from syntax construction, it has # these problems: # * The innermost TypeDecl has no type (because the basic # type is only known at the uppermost declaration level) # * The declaration has no variable name, since that is saved # in the innermost TypeDecl # * The typename of the declaration is a list of type # specifiers, and not a node. Here, basic identifier types # should be separated from more complex types like enums # and structs. # # This method fixes these problem. # def _fix_decl_name_type(self, decl, typename): """ Fixes a declaration. Modifies decl. """ # Reach the underlying basic type # type = decl while not isinstance(type, c_ast.TypeDecl): type = type.type decl.name = type.declname type.quals = decl.quals # The typename is a list of types. If any type in this # list isn't a simple string type, it must be the only # type in the list (it's illegal to declare "int enum .." # If all the types are basic, they're collected in the # IdentifierType holder. # for tn in typename: if not isinstance(tn, StringType): if len(typename) > 1: self._parse_error( "Invalid multiple types specified", tn.coord) else: type.type = tn return decl type.type = c_ast.IdentifierType(typename) return decl def _add_declaration_specifier(self, declspec, newspec, kind): """ Declaration specifiers are represented by a dictionary with 3 entries: * qual: a list of type qualifiers * storage: a list of storage type qualifiers * type: a list of type specifiers This method is given a declaration specifier, and a new specifier of a given kind. Returns the declaration specifier, with the new specifier incorporated. """ spec = declspec or dict(qual=[], storage=[], type=[]) spec[kind].append(newspec) return spec def _build_function_definition(self, decl, spec, param_decls, body): """ Builds a function definition. """ declaration = c_ast.Decl( name=None, quals=spec['qual'], storage=spec['storage'], type=decl, init=None, bitsize=None, coord=decl.coord) typename = spec['type'] declaration = self._fix_decl_name_type(declaration, typename) return c_ast.FuncDef( decl=declaration, param_decls=param_decls, body=body, coord=decl.coord) def _select_struct_union_class(self, token): """ Given a token (either STRUCT or UNION), selects the appropriate AST class. """ if token == 'struct': return c_ast.Struct else: return c_ast.Union ## ## Precedence and associativity of operators ## precedence = ( ('left', 'LOR'), ('left', 'LAND'), ('left', 'OR'), ('left', 'XOR'), ('left', 'AND'), ('left', 'EQ', 'NE'), ('left', 'GT', 'GE', 'LT', 'LE'), ('left', 'RSHIFT', 'LSHIFT'), ('left', 'PLUS', 'MINUS'), ('left', 'TIMES', 'DIVIDE', 'MOD') ) ## ## Grammar productions ## Implementation of the BNF defined in K&R2 A.13 ## def p_translation_unit_1(self, p): """ translation_unit : external_declaration """ # Note: external_declaration is already a list # p[0] = c_ast.FileAST(p[1]) def p_translation_unit_2(self, p): """ translation_unit : translation_unit external_declaration """ p[1].ext.extend(p[2]) p[0] = p[1] # Declarations always come as lists (because they can be # several in one line), so we wrap the function definition # into a list as well, to make the return value of # external_declaration homogenous. # def p_external_declaration_1(self, p): """ external_declaration : function_definition """ p[0] = [p[1]] def p_external_declaration_2(self, p): """ external_declaration : declaration """ p[0] = p[1] def p_external_declaration_3(self, p): """ external_declaration : pp_directive """ p[0] = p[1] def p_pp_directive(self, p): """ pp_directive : PPHASH """ self._parse_error('Directives not supported yet', self._coord(p.lineno(1))) # In function definitions, the declarator can be followed by # a declaration list, for old "K&R style" function definitios. # def p_function_definition_1(self, p): """ function_definition : declarator declaration_list_opt compound_statement """ # no declaration specifiers spec = dict(qual=[], storage=[], type=[]) p[0] = self._build_function_definition( decl=p[1], spec=spec, param_decls=p[2], body=p[3]) def p_function_definition_2(self, p): """ function_definition : declaration_specifiers declarator declaration_list_opt compound_statement """ spec = p[1] p[0] = self._build_function_definition( decl=p[2], spec=spec, param_decls=p[3], body=p[4]) def p_statement(self, p): """ statement : labeled_statement | expression_statement | compound_statement | selection_statement | iteration_statement | jump_statement """ p[0] = p[1] # In C, declarations can come several in a line: # int x, *px, romulo = 5; # # However, for the AST, we will split them to separate Decl # nodes. # # This rule splits its declarations and always returns a list # of Decl nodes, even if it's one element long. # def p_decl_body(self, p): """ decl_body : declaration_specifiers init_declarator_list_opt """ spec = p[1] is_typedef = 'typedef' in spec['storage'] decls = [] # p[2] (init_declarator_list_opt) is either a list or None # if p[2] is None: # Then it's a declaration of a struct / enum tag, # without an actual declarator. # type = spec['type'] if len(type) > 1: coord = '?' for t in type: if hasattr(t, 'coord'): coord = t.coord break self._parse_error('Multiple type specifiers with a type tag', coord) decl = c_ast.Decl( name=None, quals=spec['qual'], storage=spec['storage'], type=type[0], init=None, bitsize=None, coord=type[0].coord) decls = [decl] else: for decl, init in p[2] or []: if is_typedef: decl = c_ast.Typedef( name=None, quals=spec['qual'], storage=spec['storage'], type=decl, coord=decl.coord) else: decl = c_ast.Decl( name=None, quals=spec['qual'], storage=spec['storage'], type=decl, init=init, bitsize=None, coord=decl.coord) typename = spec['type'] fixed_decl = self._fix_decl_name_type(decl, typename) # Add the type name defined by typedef to a # symbol table (for usage in the lexer) # if is_typedef: self._add_typedef_type(fixed_decl.name) decls.append(fixed_decl) p[0] = decls # The declaration has been split to a decl_body sub-rule and # SEMI, because having them in a single rule created a problem # for defining typedefs. # # If a typedef line was directly followed by a line using the # type defined with the typedef, the type would not be # recognized. This is because to reduce the declaration rule, # the parser's lookahead asked for the token after SEMI, which # was the type from the next line, and the lexer had no chance # to see the updated type symbol table. # # Splitting solves this problem, because after seeing SEMI, # the parser reduces decl_body, which actually adds the new # type into the table to be seen by the lexer before the next # line is reached. # def p_declaration(self, p): """ declaration : decl_body SEMI """ p[0] = p[1] # Since each declaration is a list of declarations, this # rule will combine all the declarations and return a single # list # def p_declaration_list(self, p): """ declaration_list : declaration | declaration_list declaration """ p[0] = p[1] if len(p) == 2 else p[1] + p[2] def p_declaration_specifiers_1(self, p): """ declaration_specifiers : type_qualifier declaration_specifiers_opt """ p[0] = self._add_declaration_specifier(p[2], p[1], 'qual') def p_declaration_specifiers_2(self, p): """ declaration_specifiers : type_specifier declaration_specifiers_opt """ p[0] = self._add_declaration_specifier(p[2], p[1], 'type') def p_declaration_specifiers_3(self, p): """ declaration_specifiers : storage_class_specifier declaration_specifiers_opt """ p[0] = self._add_declaration_specifier(p[2], p[1], 'storage') def p_storage_class_specifier(self, p): """ storage_class_specifier : AUTO | REGISTER | STATIC | EXTERN | TYPEDEF """ p[0] = p[1] def p_type_specifier_1(self, p): """ type_specifier : VOID | CHAR | SHORT | INT | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED | typedef_name | enum_specifier | struct_or_union_specifier """ p[0] = p[1] def p_type_qualifier(self, p): """ type_qualifier : CONST | VOLATILE """ p[0] = p[1] def p_init_declarator_list(self, p): """ init_declarator_list : init_declarator | init_declarator_list COMMA init_declarator """ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] # Returns a (declarator, intializer) pair # If there's no initializer, returns (declarator, None) # def p_init_declarator(self, p): """ init_declarator : declarator | declarator EQUALS initializer """ p[0] = (p[1], p[3] if len(p) > 2 else None) def p_specifier_qualifier_list_1(self, p): """ specifier_qualifier_list : type_qualifier specifier_qualifier_list_opt """ p[0] = self._add_declaration_specifier(p[2], p[1], 'qual') def p_specifier_qualifier_list_2(self, p): """ specifier_qualifier_list : type_specifier specifier_qualifier_list_opt """ p[0] = self._add_declaration_specifier(p[2], p[1], 'type') # TYPEID is allowed here (and in other struct/enum related tag names), because # struct/enum tags reside in their own namespace and can be named the same as types # def p_struct_or_union_specifier_1(self, p): """ struct_or_union_specifier : struct_or_union ID | struct_or_union TYPEID """ klass = self._select_struct_union_class(p[1]) p[0] = klass( name=p[2], decls=None, coord=self._coord(p.lineno(2))) def p_struct_or_union_specifier_2(self, p): """ struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE """ klass = self._select_struct_union_class(p[1]) p[0] = klass( name=None, decls=p[3], coord=self._coord(p.lineno(2))) def p_struct_or_union_specifier_3(self, p): """ struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE | struct_or_union TYPEID LBRACE struct_declaration_list RBRACE """ klass = self._select_struct_union_class(p[1]) p[0] = klass( name=p[2], decls=p[4], coord=self._coord(p.lineno(2))) def p_struct_or_union(self, p): """ struct_or_union : STRUCT | UNION """ p[0] = p[1] # Combine all declarations into a single list # def p_struct_declaration_list(self, p): """ struct_declaration_list : struct_declaration | struct_declaration_list struct_declaration """ p[0] = p[1] if len(p) == 2 else p[1] + p[2] def p_struct_declaration_1(self, p): """ struct_declaration : specifier_qualifier_list struct_declarator_list SEMI """ spec = p[1] decls = [] for struct_decl in p[2]: decl = c_ast.Decl( name=None, quals=spec['qual'], storage=spec['storage'], type=struct_decl['decl'], init=None, bitsize=struct_decl['bitsize'], coord=struct_decl['decl'].coord) typename = spec['type'] decls.append(self._fix_decl_name_type(decl, typename)) p[0] = decls def p_struct_declarator_list(self, p): """ struct_declarator_list : struct_declarator | struct_declarator_list COMMA struct_declarator """ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] # struct_declarator passes up a dict with the keys: decl (for # the underlying declarator) and bitsize (for the bitsize) # def p_struct_declarator_1(self, p): """ struct_declarator : declarator """ p[0] = {'decl': p[1], 'bitsize': None} def p_struct_declarator_2(self, p): """ struct_declarator : declarator COLON constant_expression | COLON constant_expression """ if len(p) > 3: p[0] = {'decl': p[1], 'bitsize': p[3]} else: p[0] = {'decl': None, 'bitsize': p[2]} def p_enum_specifier_1(self, p): """ enum_specifier : ENUM ID | ENUM TYPEID """ p[0] = c_ast.Enum(p[2], None, self._coord(p.lineno(1))) def p_enum_specifier_2(self, p): """ enum_specifier : ENUM LBRACE enumerator_list RBRACE """ p[0] = c_ast.Enum(None, p[3], self._coord(p.lineno(1))) def p_enum_specifier_3(self, p): """ enum_specifier : ENUM ID LBRACE enumerator_list RBRACE | ENUM TYPEID LBRACE enumerator_list RBRACE """ p[0] = c_ast.Enum(p[2], p[4], self._coord(p.lineno(1))) def p_enumerator_list(self, p): """ enumerator_list : enumerator | enumerator_list COMMA | enumerator_list COMMA enumerator """ if len(p) == 2: p[0] = c_ast.EnumeratorList([p[1]], p[1].coord) elif len(p) == 3: p[0] = p[1] else: p[1].enumerators.append(p[3]) p[0] = p[1] def p_enumerator(self, p): """ enumerator : ID | ID EQUALS constant_expression """ if len(p) == 2: p[0] = c_ast.Enumerator( p[1], None, self._coord(p.lineno(1))) else: p[0] = c_ast.Enumerator( p[1], p[3], self._coord(p.lineno(1))) def p_declarator_1(self, p): """ declarator : direct_declarator """ p[0] = p[1] def p_declarator_2(self, p): """ declarator : pointer direct_declarator """ p[0] = self._type_modify_decl(p[2], p[1]) def p_direct_declarator_1(self, p): """ direct_declarator : ID """ p[0] = c_ast.TypeDecl( declname=p[1], type=None, quals=None, coord=self._coord(p.lineno(1))) def p_direct_declarator_2(self, p): """ direct_declarator : LPAREN declarator RPAREN """ p[0] = p[2] def p_direct_declarator_3(self, p): """ direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET """ arr = c_ast.ArrayDecl( type=None, dim=p[3], coord=p[1].coord) p[0] = self._type_modify_decl(decl=p[1], modifier=arr) def p_direct_declarator_4(self, p): """ direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN | direct_declarator LPAREN identifier_list_opt RPAREN """ func = c_ast.FuncDecl( args=p[3], type=None, coord=p[1].coord) p[0] = self._type_modify_decl(decl=p[1], modifier=func) def p_pointer(self, p): """ pointer : TIMES type_qualifier_list_opt | TIMES type_qualifier_list_opt pointer """ coord = self._coord(p.lineno(1)) p[0] = c_ast.PtrDecl( quals=p[2] or [], type=p[3] if len(p) > 3 else None, coord=coord) def p_type_qualifier_list(self, p): """ type_qualifier_list : type_qualifier | type_qualifier_list type_qualifier """ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] def p_parameter_type_list(self, p): """ parameter_type_list : parameter_list | parameter_list COMMA ELLIPSIS """ if len(p) > 2: p[1].params.append(c_ast.EllipsisParam()) p[0] = p[1] def p_parameter_list(self, p): """ parameter_list : parameter_declaration | parameter_list COMMA parameter_declaration """ if len(p) == 2: # single parameter p[0] = c_ast.ParamList([p[1]], p[1].coord) else: p[1].params.append(p[3]) p[0] = p[1] def p_parameter_declaration_1(self, p): """ parameter_declaration : declaration_specifiers declarator """ spec = p[1] decl = p[2] decl = c_ast.Decl( name=None, quals=spec['qual'], storage=spec['storage'], type=decl, init=None, bitsize=None, coord=decl.coord) typename = spec['type'] or ['int'] p[0] = self._fix_decl_name_type(decl, typename) def p_parameter_declaration_2(self, p): """ parameter_declaration : declaration_specifiers abstract_declarator_opt """ spec = p[1] decl = c_ast.Typename( quals=spec['qual'], type=p[2] or c_ast.TypeDecl(None, None, None)) typename = spec['type'] or ['int'] p[0] = self._fix_decl_name_type(decl, typename) def p_identifier_list(self, p): """ identifier_list : identifier | identifier_list COMMA identifier """ if len(p) == 2: # single parameter p[0] = c_ast.ParamList([p[1]], p[1].coord) else: p[1].params.append(p[3]) p[0] = p[1] def p_initializer_1(self, p): """ initializer : assignment_expression """ p[0] = p[1] def p_initializer_2(self, p): """ initializer : LBRACE initializer_list RBRACE | LBRACE initializer_list COMMA RBRACE """ p[0] = p[2] def p_initializer_list(self, p): """ initializer_list : initializer | initializer_list COMMA initializer """ if len(p) == 2: # single initializer p[0] = c_ast.ExprList([p[1]], p[1].coord) else: p[1].exprs.append(p[3]) p[0] = p[1] def p_type_name(self, p): """ type_name : specifier_qualifier_list abstract_declarator_opt """ #~ print '==========' #~ print p[1] #~ print p[2] #~ print p[2].children() #~ print '==========' typename = c_ast.Typename( quals=p[1]['qual'], type=p[2] or c_ast.TypeDecl(None, None, None)) p[0] = self._fix_decl_name_type(typename, p[1]['type']) def p_abstract_declarator_1(self, p): """ abstract_declarator : pointer """ dummytype = c_ast.TypeDecl(None, None, None) p[0] = self._type_modify_decl( decl=dummytype, modifier=p[1]) def p_abstract_declarator_2(self, p): """ abstract_declarator : pointer direct_abstract_declarator """ p[0] = self._type_modify_decl(p[2], p[1]) def p_abstract_declarator_3(self, p): """ abstract_declarator : direct_abstract_declarator """ p[0] = p[1] # Creating and using direct_abstract_declarator_opt here # instead of listing both direct_abstract_declarator and the # lack of it in the beginning of _1 and _2 caused two # shift/reduce errors. # def p_direct_abstract_declarator_1(self, p): """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """ p[0] = p[2] def p_direct_abstract_declarator_2(self, p): """ direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET """ arr = c_ast.ArrayDecl( type=None, dim=p[3], coord=p[1].coord) p[0] = self._type_modify_decl(decl=p[1], modifier=arr) def p_direct_abstract_declarator_3(self, p): """ direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET """ p[0] = c_ast.ArrayDecl( type=c_ast.TypeDecl(None, None, None), dim=p[2], coord=self._coord(p.lineno(1))) def p_direct_abstract_declarator_4(self, p): """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN """ func = c_ast.FuncDecl( args=p[3], type=None, coord=p[1].coord) p[0] = self._type_modify_decl(decl=p[1], modifier=func) def p_direct_abstract_declarator_5(self, p): """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN """ p[0] = c_ast.FuncDecl( args=p[2], type=c_ast.TypeDecl(None, None, None), coord=p[1].coord) def p_compound_statement_1(self, p): """ compound_statement : LBRACE statement_list_opt RBRACE """ p[0] = c_ast.Compound( decls=None, stmts=p[2], coord=self._coord(p.lineno(1))) def p_compound_statement_2(self, p): """ compound_statement : LBRACE declaration_list RBRACE """ p[0] = c_ast.Compound( decls=p[2], stmts=None, coord=self._coord(p.lineno(1))) def p_compound_statement_3(self, p): """ compound_statement : LBRACE declaration_list statement_list RBRACE """ #~ print '((((((' #~ print p[2] #~ print p[3] #~ print '((((((' p[0] = c_ast.Compound( decls=p[2], stmts=p[3], coord=self._coord(p.lineno(1))) # Note: this doesn't create an AST node, but a list of AST # nodes that will be used as the statement list of a compound # def p_statement_list(self, p): """ statement_list : statement | statement_list statement """ if len(p) == 2: # single expr p[0] = [p[1]] if p[1] else [] else: p[0] = p[1] + ([p[2]] if p[2] else []) def p_labeled_statement_1(self, p): """ labeled_statement : ID COLON statement """ p[0] = c_ast.Label(p[1], p[3], self._coord(p.lineno(1))) def p_labeled_statement_2(self, p): """ labeled_statement : CASE constant_expression COLON statement """ p[0] = c_ast.Case(p[2], p[4], self._coord(p.lineno(1))) def p_labeled_statement_3(self, p): """ labeled_statement : DEFAULT COLON statement """ p[0] = c_ast.Default(p[3], self._coord(p.lineno(1))) def p_selection_statement_1(self, p): """ selection_statement : IF LPAREN expression RPAREN statement """ p[0] = c_ast.If(p[3], p[5], None, self._coord(p.lineno(1))) def p_selection_statement_2(self, p): """ selection_statement : IF LPAREN expression RPAREN statement ELSE statement """ p[0] = c_ast.If(p[3], p[5], p[7], self._coord(p.lineno(1))) def p_selection_statement_3(self, p): """ selection_statement : SWITCH LPAREN expression RPAREN statement """ p[0] = c_ast.Switch(p[3], p[5], self._coord(p.lineno(1))) def p_iteration_statement_1(self, p): """ iteration_statement : WHILE LPAREN expression RPAREN statement """ p[0] = c_ast.While(p[3], p[5], self._coord(p.lineno(1))) def p_iteration_statement_2(self, p): """ iteration_statement : DO statement WHILE LPAREN expression RPAREN """ p[0] = c_ast.DoWhile(p[5], p[2], self._coord(p.lineno(1))) def p_iteration_statement_3(self, p): """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement """ p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._coord(p.lineno(1))) def p_jump_statement_1(self, p): """ jump_statement : GOTO ID SEMI """ p[0] = c_ast.Goto(p[2], self._coord(p.lineno(1))) def p_jump_statement_2(self, p): """ jump_statement : BREAK SEMI """ p[0] = c_ast.Break(self._coord(p.lineno(1))) def p_jump_statement_3(self, p): """ jump_statement : CONTINUE SEMI """ p[0] = c_ast.Continue(self._coord(p.lineno(1))) def p_jump_statement_4(self, p): """ jump_statement : RETURN expression SEMI | RETURN SEMI """ p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._coord(p.lineno(1))) def p_expression_statement(self, p): """ expression_statement : expression_opt SEMI """ p[0] = p[1] def p_expression(self, p): """ expression : assignment_expression | expression COMMA assignment_expression """ if len(p) == 2: p[0] = p[1] else: if not isinstance(p[1], c_ast.ExprList): p[1] = c_ast.ExprList([p[1]], p[1].coord) p[1].exprs.append(p[3]) p[0] = p[1] def p_typedef_name(self, p): """ typedef_name : TYPEID """ p[0] = p[1] def p_assignment_expression(self, p): """ assignment_expression : conditional_expression | unary_expression assignment_operator assignment_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord) # K&R2 defines these as many separate rules, to encode # precedence and associativity. Why work hard ? I'll just use # the built in precedence/associativity specification feature # of PLY. (see precedence declaration above) # def p_assignment_operator(self, p): """ assignment_operator : EQUALS | XOREQUAL | TIMESEQUAL | DIVEQUAL | MODEQUAL | PLUSEQUAL | MINUSEQUAL | LSHIFTEQUAL | RSHIFTEQUAL | ANDEQUAL | OREQUAL """ p[0] = p[1] def p_constant_expression(self, p): """ constant_expression : conditional_expression """ p[0] = p[1] def p_conditional_expression(self, p): """ conditional_expression : binary_expression | binary_expression CONDOP expression COLON conditional_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord) def p_binary_expression(self, p): """ binary_expression : cast_expression | binary_expression TIMES binary_expression | binary_expression DIVIDE binary_expression | binary_expression MOD binary_expression | binary_expression PLUS binary_expression | binary_expression MINUS binary_expression | binary_expression RSHIFT binary_expression | binary_expression LSHIFT binary_expression | binary_expression LT binary_expression | binary_expression LE binary_expression | binary_expression GE binary_expression | binary_expression GT binary_expression | binary_expression EQ binary_expression | binary_expression NE binary_expression | binary_expression AND binary_expression | binary_expression OR binary_expression | binary_expression XOR binary_expression | binary_expression LAND binary_expression | binary_expression LOR binary_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord) def p_cast_expression_1(self, p): """ cast_expression : unary_expression """ p[0] = p[1] def p_cast_expression_2(self, p): """ cast_expression : LPAREN type_name RPAREN cast_expression """ p[0] = c_ast.Cast(p[2], p[4], p[2].coord) def p_unary_expression_1(self, p): """ unary_expression : postfix_expression """ p[0] = p[1] def p_unary_expression_2(self, p): """ unary_expression : PLUSPLUS unary_expression | MINUSMINUS unary_expression | unary_operator cast_expression """ p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord) def p_unary_expression_3(self, p): """ unary_expression : SIZEOF unary_expression | SIZEOF LPAREN type_name RPAREN """ p[0] = c_ast.UnaryOp( p[1], p[2] if len(p) == 3 else p[3], self._coord(p.lineno(1))) def p_unary_operator(self, p): """ unary_operator : AND | TIMES | PLUS | MINUS | NOT | LNOT """ p[0] = p[1] def p_postfix_exptession_1(self, p): """ postfix_expression : primary_expression """ p[0] = p[1] def p_postfix_exptession_2(self, p): """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """ p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) def p_postfix_exptession_3(self, p): """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN | postfix_expression LPAREN RPAREN """ p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None) def p_postfix_expression_4(self, p): """ postfix_expression : postfix_expression PERIOD identifier | postfix_expression ARROW identifier """ p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord) def p_postfix_expression_5(self, p): """ postfix_expression : postfix_expression PLUSPLUS | postfix_expression MINUSMINUS """ p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord) def p_primary_expression_1(self, p): """ primary_expression : identifier """ p[0] = p[1] def p_primary_expression_2(self, p): """ primary_expression : constant """ p[0] = p[1] def p_primary_expression_3(self, p): """ primary_expression : STRING_LITERAL | WSTRING_LITERAL """ p[0] = c_ast.Constant( 'string', p[1], self._coord(p.lineno(1))) def p_primary_expression_4(self, p): """ primary_expression : LPAREN expression RPAREN """ p[0] = p[2] def p_argument_expression_list(self, p): """ argument_expression_list : assignment_expression | argument_expression_list COMMA assignment_expression """ if len(p) == 2: # single expr p[0] = c_ast.ExprList([p[1]], p[1].coord) else: p[1].exprs.append(p[3]) p[0] = p[1] def p_identifier(self, p): """ identifier : ID """ p[0] = c_ast.ID(p[1], self._coord(p.lineno(1))) def p_constant_1(self, p): """ constant : INT_CONST_DEC | INT_CONST_OCT | INT_CONST_HEX """ p[0] = c_ast.Constant( 'int', p[1], self._coord(p.lineno(1))) def p_constant_2(self, p): """ constant : FLOAT_CONST """ p[0] = c_ast.Constant( 'float', p[1], self._coord(p.lineno(1))) def p_constant_3(self, p): """ constant : CHAR_CONST | WCHAR_CONST """ p[0] = c_ast.Constant( 'char', p[1], self._coord(p.lineno(1))) def p_empty(self, p): 'empty : ' p[0] = None def p_error(self, p): if p: self._parse_error( 'before: %s' % p.value, self._coord(p.lineno)) else: self._parse_error('At end of input', '')
print("Got an error: {} \n line: {} col: {} ".format(err_msg, line, col)) def look_up_func(name): return False def on_lbrace_func(): pass def on_rbrace_func(): pass lexer = CLexer(error_func, on_lbrace_func, on_rbrace_func, look_up_func) lexer.build() if (len(sys.argv) > 1): for i in range(1, len(sys.argv)): file = open(sys.argv[i]) inp = file.read() lexer.input(inp) while True: tok = lexer.token() if not tok: break print(tok) else: while True: inp = raw_input(">") lexer.input(inp)
class CParser(PLYParser): def __init__( self, lex_optimize=True, lextab='pytel.ast.lextab', yacc_optimize=True, yacctab='pytel.ast.yacctab', yacc_debug=False): """ Create a new CParser. Some arguments for controlling the debug/optimization level of the parser are provided. The defaults are tuned for release/performance mode. The simple rules for using them are: *) When tweaking CParser/CLexer, set these to False *) When releasing a stable parser, set to True lex_optimize: Set to False when you're modifying the lexer. Otherwise, changes in the lexer won't be used, if some lextab.py file exists. When releasing with a stable lexer, set to True to save the re-generation of the lexer table on each run. lextab: Points to the lex table that's used for optimized mode. Only if you're modifying the lexer and want some tests to avoid re-generating the table, make this point to a local lex table file (that's been earlier generated with lex_optimize=True) yacc_optimize: Set to False when you're modifying the parser. Otherwise, changes in the parser won't be used, if some parsetab.py file exists. When releasing with a stable parser, set to True to save the re-generation of the parser table on each run. yacctab: Points to the yacc table that's used for optimized mode. Only if you're modifying the parser, make this point to a local yacc table file yacc_debug: Generate a parser.out file that explains how yacc built the parsing table from the grammar. """ self.clex = CLexer( error_func=self._lex_error_func, ) self.clex.build( optimize=lex_optimize, lextab=lextab) self.tokens = self.clex.tokens rules_with_opt = [ 'expression', 'parameter_list', 'constant_expression', ] for rule in rules_with_opt: self._create_opt_rule(rule) self.cparser = ply.yacc.yacc( module=self, start='translation_unit', debug=yacc_debug, optimize=yacc_optimize, tabmodule=yacctab) # A table of identifiers defined as typedef types during # parsing. # self.typedef_table = set([]) def parse(self, text, filename='', debuglevel=0): """ Parses C code and returns an AST. text: A string containing the C source code filename: Name of the file being parsed (for meaningful error messages) debuglevel: Debug level to yacc """ self.clex.filename = filename self.clex.reset_lineno() self.typedef_table = set([]) return self.cparser.parse(text, lexer=self.clex, debug=debuglevel) ######################-- PRIVATE --###################### def _lex_error_func(self, msg, line, column): self._parse_error(msg, self._coord(line, column)) ## ## Precedence and associativity of operators ## precedence = ( ('left', 'NOT'), ('left', 'BOR'), ('left', 'BXOR'), ('left', 'BAND'), ('left', 'EQ', 'NE'), ('left', 'GT', 'GE', 'LT', 'LE'), ('left', 'RSHIFT', 'LSHIFT'), ('left', 'PLUS', 'MINUS'), ('left', 'TIMES', 'DIVIDE', 'MOD'), ('left', 'AND'), ('left', 'OR'), ) def p_translation_unit(self, p): """ translation_unit : external_decl_list | translation_unit external_decl_list """ # Note: external_declaration is already a list # if len(p) == 2: p[0] = c_ast.FileAST(p[1]) else: p[1].ext.extend(p[2]) p[0] = p[1] def p_external_decl_list(self, p): """ external_decl_list : external_decl | external_decl_list external_decl """ if len(p) == 2: p[0] = p[1] if isinstance(p[1], list) else [p[1]] else: p[0] = p[1] + (p[2] if isinstance(p[2], list) else [p[2]]) def p_external_decl_1(self, p): """ external_decl : external_declaration | declaration_specifiers """ p[0] = p[1] def p_external_decl_2(self, p): """ external_decl : function_definition """ p[0] = p[1] def p_external_declaration(self, p): """ external_declaration : pp_directive """ p[0] = p[1] def p_pp_directive(self, p): """ pp_directive : PPHASH """ p[0] = p[1] def p_function_definition(self, p): """ function_definition : function_type_specifier FUNCTION identifier LPAREN parameter_list_opt RPAREN compound_statement ENDFUNCTION """ decl = c_ast.FunctionDecl(p[1], p[3], p[5], p[7], self._coord(p.lineno(1))) p[0] = decl def p_statement(self, p): """ statement : labeled_statement | expression_statement | selection_statement | iteration_statement | jump_statement """ p[0] = p[1] def p_declaration_specifiers(self, p): """ declaration_specifiers : type_qualifier type_specifier init_declarator_list SEMI | type_specifier init_declarator_list SEMI """ if len(p) == 5: decl = c_ast.Decl(p[1], p[2], p[3], coord=self._coord(p.lineno(1))) else: decl = c_ast.Decl(None, p[1], p[2], coord=self._coord(p.lineno(1))) p[0] = decl def p_declaration_specifiers_list(self, p): """ declaration_specifiers_list : declaration_specifiers | declaration_specifiers_list declaration_specifiers """ p[0] = p[1] + [p[2]] if len(p) == 3 else [p[1]] def p_function_type_specifier(self, p): """ function_type_specifier : VOID | type_specifier """ p[0] = p[1] def p_type_specifier(self, p): """ type_specifier : INTEGER | FLOAT | STRING """ p[0] = p[1] def p_type_qualifier(self, p): """ type_qualifier : CONST """ p[0] = p[1] def p_init_declarator_list(self, p): """ init_declarator_list : init_declarator | init_declarator_list COMMA init_declarator """ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] def p_init_declarator(self, p): """ init_declarator : variable_declarator | variable_declarator EQUALS initializer """ p[0] = c_ast.VaribleDecl(p[1], p[3] if len(p) > 2 else None) def p_variable_declarator(self, p): """ variable_declarator : identifier LBRACKET constant_expression_opt RBRACKET | identifier """ if len(p) == 5: var = c_ast.Varible('array', p[1], p[3], p[1].coord) else: var = c_ast.Varible('var', p[1], None, p[1].coord) p[0] = var def p_parameter_list(self, p): """ parameter_list : parameter_declaration | parameter_list COMMA parameter_declaration """ if len(p) == 2: # single parameter p[0] = c_ast.ParamList([p[1]], p[1].coord) else: p[1].params.append(p[3]) p[0] = p[1] def p_parameter_declaration(self, p): # string abc """ parameter_declaration : type_specifier variable_declarator | type_specifier BAND variable_declarator """ if len(p) == 3: param = c_ast.Param(False, p[1], p[2], self._coord(p.lineno(1))) else: param = c_ast.Param(True, p[1], p[3], self._coord(p.lineno(1))) out = False if len(p) == 3 else True p[0] = param def p_initializer_1(self, p): """ initializer : assignment_expression """ p[0] = p[1] def p_initializer_2(self, p): """ initializer : LBRACE initializer_list RBRACE | LBRACE initializer_list COMMA RBRACE """ p[0] = p[2] def p_initializer_list(self, p): """ initializer_list : initializer | initializer_list COMMA initializer """ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] def p_compound_statement_1(self, p): """ compound_statement : statement_list """ p[0] = c_ast.Compound( decls=None, stmts=p[1], coord=self._coord(p.lineno(1))) def p_compound_statement_2(self, p): """ compound_statement : declaration_specifiers_list """ p[0] = c_ast.Compound( decls=p[1], stmts=None, coord=self._coord(p.lineno(1))) def p_compound_statement_3(self, p): """ compound_statement : declaration_specifiers_list statement_list """ p[0] = c_ast.Compound( decls=p[1], stmts=p[2], coord=self._coord(p.lineno(1))) def p_statement_list(self, p): """ statement_list : statement | statement_list statement """ if len(p) == 2: # single expr p[0] = [p[1]] if p[1] else [] else: p[0] = p[1] + ([p[2]] if p[2] else []) def p_labeled_statement_1(self, p): """ labeled_statement : ID COLON statement """ p[0] = c_ast.Label(p[1], p[3], self._coord(p.lineno(1))) def p_labeled_statement_2(self, p): """ labeled_statement : CASE expression COLON statement """ p[0] = c_ast.Case(p[2], p[4], self._coord(p.lineno(1))) def p_labeled_statement_3(self, p): """ labeled_statement : DEFAULT COLON statement """ p[0] = c_ast.Default(p[3], self._coord(p.lineno(1))) def p_selection_statement_1(self, p): """ selection_statement : IF LPAREN expression RPAREN compound_statement ENDIF """ p[0] = c_ast.If(p[3], p[5], None, self._coord(p.lineno(1))) def p_selection_statement_2(self, p): """ selection_statement : IF LPAREN expression RPAREN compound_statement ELSE compound_statement ENDIF """ p[0] = c_ast.If(p[3], p[5], p[7], self._coord(p.lineno(1))) def p_selection_statement_3(self, p): """ selection_statement : SWITCH LPAREN expression RPAREN compound_statement ENDSWITCH """ p[0] = c_ast.Switch(p[3], p[5], self._coord(p.lineno(1))) def p_iteration_statement_1(self, p): """ iteration_statement : WHILE LPAREN expression RPAREN compound_statement ENDWHILE """ p[0] = c_ast.While(p[3], p[5], self._coord(p.lineno(1))) def p_iteration_statement_2(self, p): """ iteration_statement : DO compound_statement WHILE LPAREN expression RPAREN """ p[0] = c_ast.DoWhile(p[5], p[2], self._coord(p.lineno(1))) def p_iteration_statement_3(self, p): """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN compound_statement ENDFOR """ p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._coord(p.lineno(1))) def p_jump_statement_1(self, p): """ jump_statement : GOTO ID SEMI """ p[0] = c_ast.Goto(p[2], self._coord(p.lineno(1))) def p_jump_statement_2(self, p): """ jump_statement : BREAK SEMI """ p[0] = c_ast.Break(self._coord(p.lineno(1))) def p_jump_statement_3(self, p): """ jump_statement : CONTINUE SEMI """ p[0] = c_ast.Continue(self._coord(p.lineno(1))) def p_jump_statement_4(self, p): """ jump_statement : RETURN expression SEMI | RETURN SEMI """ p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._coord(p.lineno(1))) def p_expression_statement(self, p): """ expression_statement : expression SEMI | SEMI """ if len(p) == 2: p[0] = c_ast.ExprList([], self._coord(p.lineno(1))) else: p[0] = p[1] def p_expression(self, p): """ expression : assignment_expression | expression COMMA assignment_expression """ if len(p) == 2: p[0] = p[1] else: if not isinstance(p[1], c_ast.ExprList): p[1] = c_ast.ExprList([p[1]], p[1].coord) p[1].exprs.append(p[3]) p[0] = p[1] def p_assignment_expression(self, p): """ assignment_expression : conditional_expression | variable_declarator assignment_operator conditional_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord) def p_assignment_operator(self, p): """ assignment_operator : EQUALS """ p[0] = p[1] def p_constant_expression(self, p): """ constant_expression : conditional_expression """ p[0] = p[1] def p_conditional_expression(self, p): """ conditional_expression : binary_expression """ p[0] = p[1] def p_binary_expression(self, p): """ binary_expression : unary_expression | binary_expression TIMES binary_expression | binary_expression DIVIDE binary_expression | binary_expression MOD binary_expression | binary_expression PLUS binary_expression | binary_expression MINUS binary_expression | binary_expression RSHIFT binary_expression | binary_expression LSHIFT binary_expression | binary_expression LT binary_expression | binary_expression LE binary_expression | binary_expression GE binary_expression | binary_expression GT binary_expression | binary_expression EQ binary_expression | binary_expression NE binary_expression | binary_expression BAND binary_expression | binary_expression BOR binary_expression | binary_expression BXOR binary_expression | binary_expression AND binary_expression | binary_expression OR binary_expression """ if len(p) > 2: p[0] = c_ast.BinaryOp(p.slice[2], p[1], p[3], p[1].coord) else: p[0] = p[1] def p_unary_expression_1(self, p): """ unary_expression : function_expression """ p[0] = p[1] def p_unary_expression_2(self, p): """ unary_expression : primary_expression """ p[0] = p[1] def p_unary_expression_3(self, p): """ unary_expression : PLUSPLUS unary_expression | MINUSMINUS unary_expression | unary_operator unary_expression """ p[0] = c_ast.UnaryOp(p.slice[1], p[2], self._coord(p.lineno(1))) def p_unary_operator(self, p): """ unary_operator : PLUS | MINUS | NOT | BNOT """ p[0] = p.slice[1] def p_function_expression(self, p): """ function_expression : identifier LPAREN argument_expression_list RPAREN | identifier LPAREN RPAREN """ if len(p) == 5: fcall = c_ast.FuncCall(p[1], p[3], p[1].coord) else: fcall = c_ast.FuncCall(p[1], None, p[1].coord) p[0] = fcall def p_primary_expression_1(self, p): """ primary_expression : variable_declarator """ p[0] = p[1] def p_primary_expression_2(self, p): """ primary_expression : constant """ p[0] = p[1] def p_primary_expression_3(self, p): """ primary_expression : STRING_LITERAL """ p[0] = c_ast.Constant('string', p[1], self._coord(p.lineno(1))) def p_primary_expression_4(self, p): """ primary_expression : LPAREN expression RPAREN """ p[0] = p[2] def p_argument_expression_list(self, p): """ argument_expression_list : assignment_expression | argument_expression_list COMMA assignment_expression """ if len(p) == 2: # single expr p[0] = c_ast.ExprList([p[1]], p[1].coord) else: p[1].exprs.append(p[3]) p[0] = p[1] def p_identifier(self, p): """ identifier : ID """ p[0] = c_ast.ID(p[1], self._coord(p.lineno(1))) def p_constant_1(self, p): """ constant : INT_CONST_DEC | INT_CONST_OCT | INT_CONST_HEX """ p[0] = c_ast.Constant("integer", p[1], self._coord(p.lineno(1))) def p_constant_2(self, p): """ constant : FLOAT_CONST """ p[0] = c_ast.Constant("float", p[1], self._coord(p.lineno(1))) def p_constant_3(self, p): """ constant : CHAR_CONST """ p[0] = c_ast.Constant("char", p[1], self._coord(p.lineno(1))) def p_empty(self, p): 'empty : ' p[0] = None def p_error(self, p): if p: self._parse_error( 'before: %s' % p.value, self._coord(p.lineno)) else: self._parse_error('At end of input', '')