class Collector(c_ast.NodeVisitor): def __init__(self): self.generator = CGenerator() self.typedecls = [] self.functions = [] def process_typedecl(self, node): coord = os.path.abspath(node.coord.file) if node.coord is None or coord.find(include_dir) != -1: typedecl = '{};'.format(self.generator.visit(node)) typedecl = ARRAY_SIZEOF_PATTERN.sub('[...]', typedecl) if typedecl not in self.typedecls: self.typedecls.append(typedecl) def sanitize_enum(self, enum): for name, enumeratorlist in enum.children(): for name, enumerator in enumeratorlist.children(): enumerator.value = c_ast.Constant('dummy', '...') return enum def visit_Typedef(self, node): coord = os.path.abspath(node.coord.file) if node.coord is None or coord.find(include_dir) != -1: if ((isinstance(node.type, c_ast.TypeDecl) and isinstance(node.type.type, c_ast.Enum))): self.sanitize_enum(node.type.type) self.process_typedecl(node) def visit_Union(self, node): self.process_typedecl(node) def visit_Struct(self, node): self.process_typedecl(node) def visit_Enum(self, node): coord = os.path.abspath(node.coord.file) if node.coord is None or coord.find(include_dir) != -1: node = self.sanitize_enum(node) self.process_typedecl(node) def visit_FuncDecl(self, node): coord = os.path.abspath(node.coord.file) if node.coord is None or coord.find(include_dir) != -1: if isinstance(node.type, c_ast.PtrDecl): function_name = node.type.type.declname else: function_name = node.type.declname if function_name in FUNCTION_BLACKLIST: return decl = '{};'.format(self.generator.visit(node)) decl = VARIADIC_ARG_PATTERN.sub('...', decl) if decl not in self.functions: self.functions.append(decl)
def build_source(statements): s = "" generator = CGenerator() seen_statements = set() for statement in statements: current_statement = "" if not statement: continue if isinstance(statement, c_ast.FuncDef): current_statement += generator.visit(statement) + "\n" elif isinstance(statement, c_ast.Pragma): current_statement += generator.visit(statement) + "\n\n" else: current_statement += generator.visit(statement) + ";\n\n" if current_statement not in seen_statements: seen_statements.add(current_statement) s += current_statement return s
class MatchFunctionsTest(unittest.TestCase): def setUp(self) -> None: self.parser = pycparser.CParser(lexer=ghcc.parse.CachedCLexer) self.generator = CGenerator() self.lexer = ghcc.parse.LexerWrapper() def test_serialize(self) -> None: for code, _ in EXAMPLE_CODE: preprocessed_code = ghcc.parse.preprocess(code) ast = self.parser.parse(preprocessed_code) token_coords = ghcc.parse.convert_to_tokens( preprocessed_code, self.parser.clex.cached_tokens) functions = ghcc.parse.FunctionExtractor().find_functions(ast) for func_ast in functions.values(): ast_dict, tokens = match_functions.serialize( func_ast, token_coords) original_code = self.lexer.lex(self.generator.visit(func_ast)) assert tokens == original_code
def print_header(message): generator = CGenerator() parser = CParser() def del_spaces(name): if name.startswith('(extension in '): idx = name.index('):') name = '_extension_in_' + name[14:idx] + "__" + name[idx + 2:] # file private types if ' in _' in name: idx = name.index(' in _') end = name.index(')', idx) start = name.rindex('(', None, idx) namespace = name[:start] if '>' in namespace: namespace = mangle_name(namespace[:-1]) + '.' name = namespace + name[start + 1:idx] + name[end + 1:] return name def mangle_name(human): if human in ('void*', 'voidp', 'Metadata*'): return human if human == '()': return 'void' info = types[human] if 'getGenericParams' in info and info['getGenericParams']: name = remove_generic(human) else: name = human if name.startswith('?Unknown type of'): name = name.replace('?Unknown type of ', 'XXX_unknown_type_of_') if name.startswith("Static #"): spl = name.split(' ', 4) return "_static_no" + spl[1][1:] + "_in_" + spl[ 3] + "__func" + str(hash(spl[4]))[1:] name = del_spaces(name) outp = f'swift_{info["kind"]}__' if info['kind'] == "Tuple": elems = [] for e in info['tupleElements']: name = mangle_name(e['type']) if e['label']: name += "__as_" + e['label'] elems.append(name) outp += "with__" + "__and__".join(elems) elif info['kind'] == "Existential": protos = [] for p in info['protocols']: protos.append( del_spaces(script.exports.demangle(p)).replace(".", "__")) if info['isClassBounded']: protos.append("Swift__AnyObject") if protos: outp += "conforming_to__" + "__and__".join(protos) else: outp += "Any" if info.get('getSuperclassConstraint'): outp += "__inheriting_from_" + mangle_name( info['getSuperclassConstraint']) elif info['kind'] == 'Function': return "func_" + str(hash(name))[1:] else: outp += name.replace(".", "_") if 'getGenericParams' in info and info['getGenericParams']: gen_params = [ mangle_name(param) for param in info['getGenericParams'] ] outp += "__of__" + "__and__".join(gen_params) return outp def make_decl(name, offset, type_name): nonlocal decls, pad_count, parser, prev_end if isinstance(offset, str): assert offset[:2] == '0x' offset = int(offset, 16) if prev_end < offset: pad_str = f"char _padding{pad_count}[{offset - prev_end}];" decls.append(parser.parse(pad_str).ext[0]) pad_count += 1 type_decl = TypeDecl(name.replace(".", "__"), None, IdentifierType([mangle_name(type_name)])) decls.append(Decl(None, None, None, None, type_decl, None, None)) req_graph.setdefault(type_name, set()).add(parent_name) if offset != -1: size = pointer_size if type_name.endswith('*') else int( types[type_name]['size'], 16) prev_end = offset + size #print("#include <stdint.h>") print("#pragma pack(1)") print("typedef void *voidp;") print("typedef struct Metadata_s Metadata;") types = json.loads(message) req_graph = {} ptr_types = {'void*', 'voidp', 'Metadata*'} ctypes = {} for name, info in types.items(): pad_count = 0 decls = [] prev_end = 0 ctype = None parent_name = name if info['kind'] == "Tuple": for i, elem in enumerate(info['tupleElements']): make_decl(elem['label'] or f'_{i}', elem['offset'], elem['type']) ctype = Struct(mangle_name(name) + "_s", decls) elif info['kind'] == "ObjCClassWrapper": print( f'typedef struct {mangle_name(name)}_s *{mangle_name(name)};') elif info['kind'] in ("Struct", "Class"): if info['kind'] == 'Class': make_decl('_isa', '0x0', 'Metadata*') #make_decl('_refCounts', hex(pointer_size), 'size_t') for i, field in enumerate(info['fields']): make_decl(field['name'], field['offset'], field['type']) ctype = Struct(mangle_name(name) + "_s", decls) if info['kind'] == 'Class': ctype = PtrDecl(None, ctype) elif info['kind'] == "Existential": if info['isClassBounded'] or info.get( 'getSuperclassConstraint'): # class existential container make_decl(f'heap_object', -1, 'void*') else: # opaque existential container decls.append( parser.parse("void *heapObjectOrInlineData0;").ext[0]) for i in range(1, 3): decls.append( parser.parse( "void *nothingOrInlineData{};".format(i)).ext[0]) make_decl("dynamicType", -1, "Metadata*") for i in range(info['witnessTableCount']): make_decl(f'_witnessTable{i + 1}', -1, 'void*') ctype = Struct(mangle_name(name) + "_s", decls) elif info['kind'] in ("Enum", "Optional"): if info['enumCases'] and info['enumCases'][0]['name'] is None: # C-like enum # we don't have case names or values, so just generate a typedef to an int type print( f"typedef uint{int(info['size'], 16) * 8}_t {mangle_name(name)};" ) elif len(info['enumCases']) == 0: ctype = Struct(mangle_name(name) + "_s", decls) elif len(info['enumCases']) == 1 and info['enumCases'][0]['type']: make_decl(info['enumCases'][0]['name'], 0, info['enumCases'][0]['type']) ctype = Struct(mangle_name(name) + "_s", decls) else: print( f'typedef struct {mangle_name(name)}_s {{ char _data[{info["size"]}]; }} {mangle_name(name)};' ) elif info['kind'] == 'Opaque': if 'getCType' in info: ctype_names = { 'pointer': 'void*', 'int8': 'int8_t', 'int16': 'int16_t', 'int32': 'int32_t', 'int64': 'int64_t', 'int64': 'int64_t', } print( f'typedef {ctype_names[info["getCType"]]} {mangle_name(name)};' ) elif name == 'Builtin.NativeObject': print(f'typedef void *{mangle_name(name)};') else: print(f'typedef char {mangle_name(name)}[{info["size"]}];') elif info['kind'] == 'Function': print(f"typedef void *func_{str(hash(name))[1:]};" ) # TODO: proper names else: print(f'typedef char {mangle_name(name)}[{info["size"]}];') if ctype: type_decl = TypeDecl(mangle_name(name), None, ctype) ctypes[name] = type_decl type_decl_forward = Struct(mangle_name(name) + "_s", []) if isinstance(type_decl, PtrDecl): ptr_types.add(name) type_decl_forward = PtrDecl(None, type_decl_forward) print( generator.visit( Typedef(mangle_name(name), None, ['typedef'], type_decl_forward)) + ";") for name in ptr_types: req_graph.pop(name, None) for name in top_sort(req_graph): if name in ctypes: print(f"\n// {name}") print( generator.visit( Typedef(mangle_name(name), None, ['typedef'], ctypes[name])) + ";")
class RunnerTemplate: template_name = "fffc_runner.c" def __init__(self, func, name, binary_path, executable_path, inferred_header_include, pie): self.generator = CGenerator() self.func = func self.name = name self.binary_path = binary_path self.exe_path = executable_path self.pie = pie self.template_path = self._get_template_path() self.template_data = pkgutil.get_data("fffc", str(self.template_path)) self.inferred_header_include = inferred_header_include self.hook_sig = self.generator.visit(func.define("FFFC_replacement")) self.parallel_sig = self.generator.visit( func.define("FFFC_parallel_replacement")) self.proxy_sig = self.generator.visit( func.build_ast("FFFC_proxy_target", func.typename, func.arguments, dwarf_to_c.DwarfVoidType())) self.worker_sig = self.generator.visit( func.build_ast("FFFC_worker_target", func.typename, func.arguments, dwarf_to_c.DwarfVoidType())) def replace_target_name(self): raw = self.template_data placeholder = b"___FFFC_TARGET_NAME___" raw = raw.replace(placeholder, bytes(self.name, "utf-8")) self.template_data = raw def replace_hook_sig(self): raw = self.template_data placeholder = b"___FFFC_HOOK_SIG___" raw = raw.replace(placeholder, bytes(self.hook_sig, "utf-8")) self.template_data = raw def replace_parallel_sig(self): raw = self.template_data placeholder = b"___FFFC_PARALLEL_SIG___" raw = raw.replace(placeholder, bytes(self.parallel_sig, "utf-8")) self.template_data = raw def replace_proxy_sig(self): raw = self.template_data placeholder = b"___FFFC_PROXY_SIG___" raw = raw.replace(placeholder, bytes(self.proxy_sig, "utf-8")) self.template_data = raw def replace_worker_sig(self): raw = self.template_data placeholder = b"___FFFC_WORKER_SIG___" raw = raw.replace(placeholder, bytes(self.worker_sig, "utf-8")) self.template_data = raw def replace_inferred_header(self): raw = self.template_data placeholder = b"___FFFC_INFERRED_HEADER___" raw = raw.replace(placeholder, bytes(self.inferred_header_include, "utf-8")) self.template_data = raw def replace_call(self): raw = self.template_data placeholder = b"___FFFC_CALL___" if type(self.func.return_type) != dwarf_to_c.DwarfVoidType: tdecl = self.func.return_type.get_reference()("retval") init = self.func.call("FFFC_target") decl = c_ast.Decl("retval", [], [], [], tdecl, init, None) func_call = self.generator.visit(decl) else: func_call = self.generator.visit(self.func.call("FFFC_target")) raw = raw.replace(placeholder, bytes(func_call + ";", "utf-8")) self.template_data = raw def replace_proxy_call(self): raw = self.template_data placeholder = b"___FFFC_PROXY_CALL___" func_call = self.generator.visit(self.func.call("FFFC_proxy_target")) raw = raw.replace(placeholder, bytes(func_call + ";", "utf-8")) self.template_data = raw def replace_worker_call(self): raw = self.template_data placeholder = b"___FFFC_WORKER_CALL___" func_call = self.generator.visit(self.func.call("FFFC_worker_target")) raw = raw.replace(placeholder, bytes(func_call + ";", "utf-8")) self.template_data = raw def replace_target_decl(self): raw = self.template_data placeholder = b"___FFFC_TARGET_DECL___" # Now you need to declare a pointer to the function whose name is FFFC_replacement funcref = self.func.declare("FFFC_target") funcptr = c_ast.PtrDecl([], funcref) ast = c_ast.Decl("FFFC_target", [], [], [], funcptr, None, None) replacement = self.generator.visit(ast) + ";" raw = raw.replace(placeholder, bytes(replacement, "utf-8")) self.template_data = raw def replace_offset(self): offset = hex(self.func.low_pc) raw = self.template_data placeholder = b"___FFFC_OFFSET__" raw = raw.replace(placeholder, bytes(offset, "utf-8")) placeholder = b"___FFFC_RECALCULATE_OFFSET___" raw = raw.replace(placeholder, bytes(hex(self.pie), "utf-8")) self.template_data = raw def replace_return(self): raw = self.template_data placeholder = b"___FFFC_RETURN___" if type(self.func.return_type) != dwarf_to_c.DwarfVoidType: ret = c_ast.Return(expr=c_ast.ID("retval")) raw = raw.replace(placeholder, bytes(self.generator.visit(ret), "utf-8")) else: raw = raw.replace(placeholder, b"return;") self.template_data = raw def replace_argument_mutators(self): raw = self.template_data placeholder = b"___FFFC_ARGUMENT_MUTATORS___" mutators = [] for arg in self.func.arguments: if type(arg) == c_ast.EllipsisParam: continue call = make_commented_mutator_call_from_var(arg.name, arg.type) call = "\n".join("\t" + line for line in call.splitlines()) mutators.append(call) raw = raw.replace(placeholder, bytes("\n".join(mutators), "utf-8")) self.template_data = raw def replace_binary_path(self): bin_path = str(self.binary_path) exe_path = str(self.exe_path) if (bin_path == exe_path): # This deals with the weirdness of dl_iterate_phdr, which notates # the main executable as an empty string target_path = "" else: target_path = bin_path raw = self.template_data placeholder = b"___FFFC_BINARY_PATH__" raw = raw.replace(placeholder, bytes(target_path, "utf-8")) self.template_data = raw def _get_template_path(self): return str(Path("templates") / self.template_name) def inject(self): self.replace_target_decl() self.replace_inferred_header() self.replace_target_name() self.replace_hook_sig() self.replace_parallel_sig() self.replace_proxy_sig() self.replace_worker_sig() self.replace_call() self.replace_proxy_call() self.replace_worker_call() self.replace_return() self.replace_argument_mutators() self.replace_offset() self.replace_binary_path() return None, str(self.template_data, "utf-8")