def inject(self, union_object, body_only=False): if self.decls: return self.decls, self.defn self.decls = [] if not union_object.typename and not body_only: # XXX the fact that we're refusing to generate mutators for anonymous types # XXX should probably be made more explicit return [], "" self.replace_placeholder_type(union_object) for node in self.get_nodes(self.ast): if type(node) == c_ast.FuncDef: if not self.defn: rnd = self.build_random_value(union_object) for memb_mut in self.build_all_member_mutators( union_object): node.body.block_items[-1:-1] = memb_mut node.body.block_items.insert(0, rnd) if body_only: # remove the return statement node.body.block_items.pop(-1) return node.body decl, defn = make_commented_mutator_defn(node) self.decls.append(decl) self.defn = defn break comment, sizedecl, sizedef = define_sizeof_type(union_object) self.decls.append(CGenerator().visit(sizedecl)) self.defn += CGenerator().visit(sizedef) global nesting_context nesting_context = NestingContext() return self.decls, self.defn
def inject(self, struct_object): if self.decls: return self.decls, self.defn self.decls = [] self.replace_placeholder_type(struct_object) for node in self.get_nodes(self.ast): if type(node) == c_ast.FuncDef: if not self.defn: for memb_mut in self.build_all_member_mutators( struct_object): node.body.block_items[-1:-1] = memb_mut try: decl, defn = make_commented_mutator_defn(node) except Exception: print( "Warning: failed to generate a mutator definition." ) print(node) print(struct_object) raise Exception() self.decls.append(decl) self.defn = defn break comment, sizedecl, sizedef = define_sizeof_type(struct_object) self.decls.append(CGenerator().visit(sizedecl)) self.defn += CGenerator().visit(sizedef) global nesting_context nesting_context = NestingContext() return self.decls, self.defn
def inject(self, struct_object): if self.decls: return self.decls, self.defn self.decls = [] if not struct_object.typename: # XXX the fact that we're refusing to generate mutators for anonymous types # XXX should probably be made more explicit return [], "" self.replace_placeholder_type(struct_object) for node in self.get_nodes(self.ast): if type(node) == c_ast.FuncDef: if not self.defn: for memb_mut in self.build_all_member_mutators( struct_object): node.body.block_items[-1:-1] = memb_mut decl, defn = make_commented_mutator_defn(node) self.decls.append(decl) self.defn = defn break comment, sizedecl, sizedef = define_sizeof_type(struct_object) self.decls.append(CGenerator().visit(sizedecl)) self.defn += CGenerator().visit(sizedef) global nesting_context nesting_context = NestingContext() return self.decls, self.defn
def inject(self, modifier_type): self.replace_placeholder_type(modifier_type) decls, defn = self.replace_funcs(modifier_type) comment, sizedecl, sizedef = define_sizeof_type(modifier_type) decls.append(CGenerator().visit(sizedecl)) defn += CGenerator().visit(sizedef) return decls, defn
def inject(self, union_object, body_only=False): if self.decls: return self.decls, self.defn self.decls = [] self.replace_placeholder_type(union_object) for node in self.get_nodes(self.ast): if type(node) == c_ast.FuncDef: if not self.defn: rnd = self.build_random_value(union_object) for memb_mut in self.build_all_member_mutators( union_object): node.body.block_items[-1:-1] = memb_mut node.body.block_items.insert(0, rnd) if body_only: # remove the return statement node.body.block_items.pop(-1) return node.body decl, defn = make_commented_mutator_defn(node) self.decls.append(decl) self.defn = defn break comment, sizedecl, sizedef = define_sizeof_type(union_object) self.decls.append(CGenerator().visit(sizedecl)) self.defn += CGenerator().visit(sizedef) global nesting_context nesting_context = NestingContext() return self.decls, self.defn
def inject(self, enum_object): decls, defn = self.do_replacements(enum_object) comment, sizedecl, sizedef = define_sizeof_type(enum_object) decls.append(CGenerator().visit(sizedecl)) defn += CGenerator().visit(sizedef) global nesting_context nesting_context = NestingContext() return decls, defn
def inject(self, modifier_type): if not modifier_type.underlying_type: return None, None self.replace_placeholder_type(modifier_type) decls, defn = self.replace_funcs(modifier_type.underlying_type) comment, sizedecl, sizedef = define_sizeof_modifier_type(modifier_type) decls.append(CGenerator().visit(sizedecl)) defn += CGenerator().visit(sizedef) return decls, defn
def inject(self, pointer_type): if not pointer_type.underlying_type: return None, None self.replace_placeholder_type(pointer_type) self.replace_underlying_sizeof(pointer_type.underlying_type) decls, defn = self.replace_funcs(pointer_type.underlying_type) if pointer_type.array_sizes: comment, sizedecl, sizedef = define_sizeof_type(pointer_type) decls.append(CGenerator().visit(sizedecl)) defn += CGenerator().visit(sizedef) return decls, defn
class Collector(c_ast.NodeVisitor): def __init__(self): self.generator = CGenerator() self.typedecls = [] self.functions = [] def process_typedecl(self, node): coord = os.path.abspath(node.coord.file) if node.coord is None or coord.find(include_dir) != -1: typedecl = '{};'.format(self.generator.visit(node)) typedecl = ARRAY_SIZEOF_PATTERN.sub('[...]', typedecl) if typedecl not in self.typedecls: self.typedecls.append(typedecl) def sanitize_enum(self, enum): for name, enumeratorlist in enum.children(): for name, enumerator in enumeratorlist.children(): enumerator.value = c_ast.Constant('dummy', '...') return enum def visit_Typedef(self, node): coord = os.path.abspath(node.coord.file) if node.coord is None or coord.find(include_dir) != -1: if ((isinstance(node.type, c_ast.TypeDecl) and isinstance(node.type.type, c_ast.Enum))): self.sanitize_enum(node.type.type) self.process_typedecl(node) def visit_Union(self, node): self.process_typedecl(node) def visit_Struct(self, node): self.process_typedecl(node) def visit_Enum(self, node): coord = os.path.abspath(node.coord.file) if node.coord is None or coord.find(include_dir) != -1: node = self.sanitize_enum(node) self.process_typedecl(node) def visit_FuncDecl(self, node): coord = os.path.abspath(node.coord.file) if node.coord is None or coord.find(include_dir) != -1: if isinstance(node.type, c_ast.PtrDecl): function_name = node.type.type.declname else: function_name = node.type.declname if function_name in FUNCTION_BLACKLIST: return decl = '{};'.format(self.generator.visit(node)) decl = VARIADIC_ARG_PATTERN.sub('...', decl) if decl not in self.functions: self.functions.append(decl)
def inject(self, obj): if not obj.typename: # XXX the fact that we're refusing to generate mutators for anonymous types # XXX should probably be made more explicit return [], "" decls = [CGenerator().visit(obj.define(None))] self.replace_placeholder_type(obj) funcdecls, defn = self.replace_funcs(obj) comment, sizedecl, sizedef = define_sizeof_do_nothing_type(obj) decls.append(CGenerator().visit(sizedecl)) defn += CGenerator().visit(sizedef) return decls + funcdecls, defn
def visit_Decl(self, n, no_type=False): if isinstance(n.type, FuncDecl): if n.name in self.extern: self.mocked.add(n.name) n.storage.remove('extern') n.storage.append('extern "Python+C"') return Generator.visit_Decl(self, n)
def __init__(self, parser=None): self.code_generator = CGenerator() self.jinja_env = Environment(loader=PackageLoader('nala', 'templates'), trim_blocks=True, lstrip_blocks=True) self.jinja_env.filters['render'] = self.code_generator.visit self.header_template = self.jinja_env.get_template( f'{HEADER_FILE}.jinja2') self.source_template = self.jinja_env.get_template( f'{SOURCE_FILE}.jinja2') self.mocks = [] self.includes = [] self.structs = [] self.struct_typedefs = [] self.struct_names = set() self.struct_typedef_names = set() self.parser = parser if parser is not None: for include in parser.includes: self.includes.append((include.path, include.system)) for struct in parser.structs: self.structs.append(struct) self.struct_names.add(struct[0]) for struct_typedef in parser.struct_typedefs: self.struct_typedefs.append(struct_typedef) self.struct_typedef_names.add(struct_typedef[0])
def __new__(cls): if not cls.text: cls.text = cls._load_template() cls.parser = CParser() cls.generator = CGenerator() cls.saved_ast = cls.parser.parse(cls.text) return super().__new__(cls)
def replace_funcs(self, dwarf_type): decls = [] defn = None # replace the underlying call for node in self.get_nodes(self.ast): if type(node) == c_ast.FuncCall: if node.name.name == self.underlying_mutator_name: if dwarf_type.get_typename(): ut = dwarf_type.get_reference()() else: ut = dwarf_type.define() underlying_mutator_name, underlying_decl_ast = make_mutator_decl_from_arg_type( ut, point=True, change_name=True) comment = "/* " + underlying_mutator_name + "*/\n" underlying_mutator_call = make_call_from_mutator_decl( "tmp", underlying_decl_ast) node.name = underlying_mutator_call.name # make this a k&r style decl, 'cause cheating is sometimes winning after all underlying_decl_ast.type.args = c_ast.ParamList([]) decls.append(comment + CGenerator().visit(underlying_decl_ast)) # build the decl and defn for node in self.get_nodes(self.ast): if type(node) == c_ast.FuncDef: if not defn: decl, defn = make_commented_mutator_defn(node) decls.append(decl) return decls, defn
def define_sizeof_modifier_type(t): # setup the toplevel call if t.get_typename(): argument_ast = t.get_reference()("storage") else: argument_ast = t.define("storage") prefix = "fffc_get_sizeof_" desired_name = CGenerator().visit(argument_ast) suffix = encode_hash(desired_name) function_name = prefix + suffix # build the underlying function call underlying_call = get_sizeof_pointer_to_type(t.underlying_type, c_ast.ID("storage")) # build this just as above, except with the call in place of the sizeof storage_tdecl = c_ast.Decl("storage", [], [], [], c_ast.PtrDecl([], argument_ast), None, None) func_tdecl = c_ast.TypeDecl( function_name, [], c_ast.IdentifierType(["long", "long", "unsigned"])) funcdecl = c_ast.FuncDecl(c_ast.ParamList([storage_tdecl]), func_tdecl) funcdef = c_ast.FuncDef( c_ast.Decl(function_name, [], [], [], funcdecl, None, None), None, c_ast.Compound([c_ast.Return(underlying_call)]), ) comment = "/* " + desired_name + "*/\n" kr_funcdecl = c_ast.FuncDecl(c_ast.ParamList([]), func_tdecl) return comment, kr_funcdecl, funcdef
def visit_StructRef(self, node): assert isinstance(node.name, c_ast.ID) assert isinstance(node.field, c_ast.ID) if node.name.name in self.magic_vars: var = self.magic_vars[node.name.name] return var.getattr(self.cxnode, node.field.name) return CGenerator.visit_StructRef(self, node)
def make_mutator_decl_from_arg_type(arg_type, generator=CGenerator(), seen={}, point=True, change_name=False): # memoize if arg_type in seen: return seen[arg_type] mut_name = "fffc_mutator_for_target_type" # change the type declname if change_name: change_declname(arg_type, "storage") # first, wrap the type in a pointer to match the necessary mutator semantics if point: arg_type_ptr = c_ast.PtrDecl([], arg_type) else: arg_type_ptr = arg_type # next, wrap that in a decl with the right name arg_decl = c_ast.ParamList( [c_ast.Decl("storage", [], [], [], arg_type_ptr, None, None)]) # next, generate the desired decl ret_type = c_ast.IdentifierType(["int"]) ret_decl = c_ast.TypeDecl(mut_name, [], ret_type) desired_decl = c_ast.FuncDecl(arg_decl, ret_decl) # now build the mangled name desired_name = generator.visit(desired_decl) suffix = encode_hash(desired_name) actual_name = "_Z_fffc_mutator_" + suffix desired_decl.type.declname = actual_name # build the output out = c_ast.Decl(actual_name, [], [], [], desired_decl, None, None) # save the result seen[arg_type] = (desired_name, out) # and go home return desired_name, out
def inject(self, pointer_type): if not pointer_type.underlying_type: return None, None self.replace_placeholder_type(pointer_type) self.replace_underlying_sizeof(pointer_type.underlying_type) decls, defn = self.replace_funcs(pointer_type.underlying_type) # XXX This is a hack, because it's easier to just remove the offending # XXX inner mutation bits than to mess with the AST. The issue here is # XXX indexing into a function pointer, which is something the basic # XXX pointer mutator does and which is a no-go. if type(pointer_type.underlying_type) == dwarf_to_c.DwarfFunctionType: lines = defn.splitlines() defn = "\n".join(lines[:3] + lines[-3:]) + "\n" comment, sizedecl, sizedef = define_sizeof_type(pointer_type) decls.append(CGenerator().visit(sizedecl)) defn += CGenerator().visit(sizedef) return decls, defn
def inject(self): if self.decls: return self.decls, self.defns self.decls = [] self.defns = [] for node in self.get_nodes(self.ast): if type(node) == c_ast.FuncDef: comment, decl, defn = self.build_sizeof( node.decl.type.args.params[0].type) self.decls.append(CGenerator().visit(decl)) self.defns.append(CGenerator().visit(defn)) for node in self.get_nodes(self.ast): if type(node) == c_ast.FuncDef: decl, defn = make_commented_mutator_defn(node) self.decls.append(decl) self.defns.append(defn) return self.decls, self.defns
def make_commented_mutator_call_from_var(var_name, var_type, generator=CGenerator()): desired_name, mutator_decl = make_mutator_decl_from_arg_type(var_type) mutator_call = make_call_from_mutator_decl(var_name, mutator_decl) comment = "/* " + desired_name + "*/\n" call = comment + generator.visit(mutator_call) + ";" return call
def build_source(statements): s = "" generator = CGenerator() seen_statements = set() for statement in statements: current_statement = "" if not statement: continue if isinstance(statement, c_ast.FuncDef): current_statement += generator.visit(statement) + "\n" elif isinstance(statement, c_ast.Pragma): current_statement += generator.visit(statement) + "\n\n" else: current_statement += generator.visit(statement) + ";\n\n" if current_statement not in seen_statements: seen_statements.add(current_statement) s += current_statement return s
def make_commented_mutator_defn(node, generator=CGenerator()): desired_name, decl = make_mutator_decl_from_arg_type( node.decl.type.args.params[0].type) node.decl = decl comment = "/* " + desired_name + "*/\n" defn = comment + generator.visit(node) # make this a k&r style decl, 'cause cheating is sometimes winning after all decl.type.args = c_ast.ParamList([]) decl = comment + generator.visit(decl) return decl, defn
def __init__(self, func, name, binary_path, executable_path, inferred_header_include, pie): self.generator = CGenerator() self.func = func self.name = name self.binary_path = binary_path self.exe_path = executable_path self.pie = pie self.template_path = self._get_template_path() self.template_data = pkgutil.get_data("fffc", str(self.template_path)) self.inferred_header_include = inferred_header_include self.hook_sig = self.generator.visit(func.define("FFFC_replacement")) self.parallel_sig = self.generator.visit( func.define("FFFC_parallel_replacement")) self.proxy_sig = self.generator.visit( func.build_ast("FFFC_proxy_target", func.typename, func.arguments, dwarf_to_c.DwarfVoidType())) self.worker_sig = self.generator.visit( func.build_ast("FFFC_worker_target", func.typename, func.arguments, dwarf_to_c.DwarfVoidType()))
def get_sizeof_pointer_to_type(t, reference_ast): if not t.get_typename(): return c_ast.FuncCall(c_ast.ID("fffc_estimate_allocation_size"), c_ast.ID("storage")) if t.get_typename(): argument_ast = t.get_reference()("storage") else: argument_ast = t.define("storage") prefix = "fffc_get_sizeof_" desired_name = CGenerator().visit(argument_ast) suffix = encode_hash(desired_name) function_name = prefix + suffix call = c_ast.FuncCall(c_ast.ID(function_name), reference_ast) return call
class MatchFunctionsTest(unittest.TestCase): def setUp(self) -> None: self.parser = pycparser.CParser(lexer=ghcc.parse.CachedCLexer) self.generator = CGenerator() self.lexer = ghcc.parse.LexerWrapper() def test_serialize(self) -> None: for code, _ in EXAMPLE_CODE: preprocessed_code = ghcc.parse.preprocess(code) ast = self.parser.parse(preprocessed_code) token_coords = ghcc.parse.convert_to_tokens( preprocessed_code, self.parser.clex.cached_tokens) functions = ghcc.parse.FunctionExtractor().find_functions(ast) for func_ast in functions.values(): ast_dict, tokens = match_functions.serialize( func_ast, token_coords) original_code = self.lexer.lex(self.generator.visit(func_ast)) assert tokens == original_code
def __init__(self): self.code_generator = CGenerator() self.jinja_env = Environment( loader=PackageLoader("narmock", "templates"), trim_blocks=True, lstrip_blocks=True, ) self.jinja_env.filters["render"] = self.code_generator.visit self.source_template = self.jinja_env.get_template( f"{self.SOURCE_FILE}.jinja2") self.header_template = self.jinja_env.get_template( f"{self.HEADER_FILE}.jinja2") self.mocks = [] self.system_includes = set() self.local_includes = set()
def define_sizeof_type_from_ast(argument_ast): prefix = "fffc_get_sizeof_" desired_name = CGenerator().visit(argument_ast) suffix = encode_hash(desired_name) function_name = prefix + suffix storage_tdecl = c_ast.Decl("storage", [], [], [], c_ast.PtrDecl([], argument_ast), None, None) func_tdecl = c_ast.TypeDecl( function_name, [], c_ast.IdentifierType(["long", "long", "unsigned"])) funcdecl = c_ast.FuncDecl(c_ast.ParamList([storage_tdecl]), func_tdecl) funcdef = c_ast.FuncDef( c_ast.Decl(function_name, [], [], [], funcdecl, None, None), None, c_ast.Compound([ c_ast.Return( c_ast.UnaryOp("sizeof", c_ast.UnaryOp("*", c_ast.ID("storage")))) ]), ) comment = "/* " + desired_name + "*/\n" kr_funcdecl = c_ast.FuncDecl(c_ast.ParamList([]), func_tdecl) return comment, kr_funcdecl, funcdef
def serialize( func_ast: ASTNode, tokens: List[ghcc.parse.Token] ) -> Tuple[ghcc.parse.JSONNode, List[str]]: r"""Generate serialized AST and lexed tokens for a single function. :param func_ast: :param tokens: :return: """ ast_dict = ghcc.parse.ast_to_dict(func_ast, tokens) # Instead of generating and lexing the code again, we find the function boundaries based on heuristics: # - Left boundary is given by smallest token position in tree. # - Right boundary is the matching right curly brace given the token position of the function body # compound statement. inf = len(tokens) COORD = ghcc.parse.TOKEN_POS_ATTR find_min_pos_fn = lambda node, xs: min(min(xs, default=inf), node[COORD] or inf) left = ghcc.parse.visit_dict(find_min_pos_fn, ast_dict[ghcc.parse.CHILDREN_ATTR]["decl"]) body_start = ghcc.parse.visit_dict( find_min_pos_fn, ast_dict[ghcc.parse.CHILDREN_ATTR]["body"]) try: right = find_matching_rbrace(tokens, body_start) # Decrease all token positions by offset. def visit_fn(node: ghcc.parse.JSONNode, _) -> None: if node[COORD] is not None: node[COORD] -= left ghcc.parse.visit_dict(visit_fn, ast_dict) token_names = [tok.name for tok in tokens[left:(right + 1)]] except ValueError: # Fallback to the fail-safe method. token_names = ghcc.parse.LexerWrapper().lex( CGenerator().visit(func_ast)) return ast_dict, token_names
def to_c(node: ca.Node) -> str: return CGenerator().visit(node)
def gen_node(node): return CGenerator().visit(node)
def __init__(self): self.generator = CGenerator() self.typedecls = [] self.functions = []
def visit_Assignment(self, node): var, channel, index = self.get_magic_array_ref(node.lvalue) if var is None: return CGenerator.visit_Assignment(self, node) return var.setitem(self.cxnode, channel, index, node.op, self.visit(node.rvalue))
def visit_ArrayRef(self, node): var, channel, index = self.get_magic_array_ref(node) if var is None: return CGenerator.visit_ArrayRef(self, node) return var.getitem(self.cxnode, channel, index)
from copy import deepcopy import attr import re import py import pycparser from pycparser import c_ast from pycparser.c_generator import CGenerator PUBLIC_API_H = py.path.local(__file__).dirpath('public_api.h') def toC(node): return toC.gen.visit(node) toC.gen = CGenerator() def find_typedecl(node): while not isinstance(node, c_ast.TypeDecl): node = node.type return node @attr.s class Function: _BASE_NAME = re.compile(r'^_?HPy_?') name = attr.ib() cpython_name = attr.ib() node = attr.ib(repr=False) def base_name(self): return self._BASE_NAME.sub('', self.name)
def __init__(self, cxnode, magic_vars): CGenerator.__init__(self) self.cxnode = cxnode self.magic_vars = magic_vars