def is_pointer(self, expr): """Return True if expr may be a pointer""" target_types = expr_to_types(self.c_handler, expr) return any( objc_is_dereferenceable(target_type) for target_type in target_types)
def generate_check(self, snapshot, number): '''Return the string corresponding to the code of the check function''' self.printer.add_block(funcDefTemplate.format("check", number)) self.printer.add_lvl() memory_out = snapshot.memory_out c_handler = snapshot.c_handler typed_C_ids = snapshot.typed_C_ids arguments_symbols = snapshot.arguments_symbols output_value = snapshot.output_value # Sanitize memory accesses memory_out, filled_out = self.sanitize_memory_accesses( memory_out, c_handler, typed_C_ids, ) fixed = self.fixed bases_to_C = self.bases_to_C self.printer.add_block("return all((\n") self.printer.add_lvl() if objc_is_dereferenceable(self.prototype.func_type): if output_value.is_id() or output_value.is_mem(): assert output_value in fixed base = output_value elif output_value.is_op(): # X + offset assert all((output_value.op == "+", len(output_value.args) == 2, isinstance(output_value.args[1], ExprInt), output_value.args[0] in fixed)) base = output_value.args[0] else: raise ValueError("Output should be in X, X + offset, @[X] form") info_C = list(c_handler.expr_to_c(output_value)) assert len(info_C) == 1 Clike = info_C[0] suffix = "" if bases_to_C[base] != Clike: # Only consider necessary calls to field_addr suffix = ' + self.field_addr("%s", "%s", is_ptr=True)' % (bases_to_C[base], Clike) self.printer.add_block("# Check output value\n# result == %s\n" % Clike) self.printer.add_block('self._get_result() == self.%s%s,\n' % (fixed[base], suffix)) elif self.prototype.func_type.name != "void": retvalue = int(output_value) self.printer.add_block("# Check output value\nself._get_result() == %s,\n" % hex(retvalue)) for dst in memory_out: info_type = list(c_handler.expr_to_types(dst)) info_C = list(c_handler.expr_to_c(dst)) # TODO handle unknown type? assert len(info_type) == 1 assert len(info_C) == 1 dst_type = info_type[0] if objc_is_dereferenceable(dst_type): if dst not in fixed: # The pointer is read but never deferenced # Consider it as an int value = memory_out[dst] if not value.is_int(): # Second chance, it may have been fixed value = fixed[value] assert value.is_int() else: value = "self.%s" % fixed[dst] else: value = memory_out[dst] # We already have the pointer allocated addr = fixed[dst.ptr] if dst in filled_out: # Sparse access, there are offset NOT to consider offsets = [int(offset) for offset in filled_out[dst]] self.printer.add_block('# %s == %s (without considering %s offset(s)\n' % (info_C[0], value, ", ".join(map(hex, offsets)))) self.printer.add_block('self._ensure_mem_sparse' '(self.%s, self.pack(%s, self.sizeof("%s")), [%s]),\n' % (addr, value, info_C[0], ", ".join(map(hex, offsets)), )) else: # Full access self.printer.add_block('# %s == %s\n' % (info_C[0], value)) self.printer.add_block('self._ensure_mem(self.%s, self.pack(%s, self.sizeof("%s"))),\n' % (addr, value, info_C[0])) self.printer.sub_lvl() self.printer.add_block("))") self.printer.sub_lvl()
def generate_init(self, snapshot, number): '''Return the string corresponding to the code of the init function''' self.printer.add_block(funcDefTemplate.format("init", number)) self.printer.add_lvl() memory_in = snapshot.memory_in memory_out = snapshot.memory_out c_handler = snapshot.c_handler typed_C_ids = snapshot.typed_C_ids arguments_symbols = snapshot.arguments_symbols output_value = snapshot.output_value # Sanitize memory accesses memory_in, _ = self.sanitize_memory_accesses(memory_in, c_handler, typed_C_ids) memory_out, _ = self.sanitize_memory_accesses(memory_out, c_handler, typed_C_ids) # Allocate zones if needed ## First, resolve common bases bases_to_C = {} # expr -> C-like to_resolve = set() for expr in memory_in.keys() + memory_out.keys(): to_resolve.update(expr.ptr.get_r(mem_read=True)) fixed = {} for i, expr in enumerate(to_resolve): fixed[expr] = ExprId("base%d_ptr" % i, size=expr.size) info_type = list(c_handler.expr_to_types(expr)) info_C = list(c_handler.expr_to_c(expr)) assert len(info_type) == 1 assert len(info_C) == 1 arg_type = info_type[0] # Must be a pointer to be present in expr.get_r assert objc_is_dereferenceable(arg_type) bases_to_C[expr] = info_C[0] ## Second, alloc potential needed spaces for I/O todo = {} max_per_base_offset = {} # base -> maximum used offset max_per_base = {} # base -> maximum used field ptr_to_info = {} for mode, exprs in (("input", memory_in), ("output", memory_out)): count = 0 for expr in exprs: assert isinstance(expr, ExprMem) addr_expr = expr.ptr # Expr.replace_expr is postfix, enumerate possibilities if addr_expr.is_id() or addr_expr.is_mem(): assert addr_expr in fixed base = addr_expr offset = 0 elif addr_expr.is_op(): # X + offset assert all((addr_expr.op == "+", len(addr_expr.args) == 2, isinstance(addr_expr.args[1], ExprInt), addr_expr.args[0] in fixed)) base = addr_expr.args[0] offset = int(addr_expr.args[1]) else: raise ValueError("Memory access should be in " \ "X, X + offset, @[X]") if addr_expr in fixed: # Already handled ptr = fixed[addr_expr] else: ptr = ExprId("%s%d_ptr" % (mode, count), size=addr_expr.size) fixed[addr_expr] = ptr count += 1 info_type = list(c_handler.expr_to_types(addr_expr)) info_C = list(c_handler.expr_to_c(expr)) # TODO handle unknown type? assert len(info_type) == 1 assert len(info_C) == 1 expr_type = info_type[0] # Must be a pointer to be deref assert objc_is_dereferenceable(expr_type) assert expr_type.objtype.size >= (expr.size / 8) info = {"Clike": info_C[0], "addr": addr_expr, "ptr": ptr, "base": base, "offset": offset, } ptr_to_info[ptr] = info # Find the last field in the struct for future alloc if max_per_base_offset.get(base, -1) < offset: max_per_base_offset[base] = offset max_per_base[base] = info["Clike"] # Reserve memory for each bases for expr, Clike in bases_to_C.iteritems(): ptr = fixed[expr] ptr_size = "%s_size" % ptr last_field = max_per_base[expr] self.printer.add_block("# %s\n" % Clike) self.printer.add_block('%s = self.field_addr("%s", "%s") ' \ '+ self.sizeof("%s")\n' % (ptr_size, Clike, last_field, last_field)) self.printer.add_block('%s = self._alloc_mem(%s, read=True, ' \ 'write=True)\n' % (ptr, ptr_size)) self.printer.add_empty_line() # Set each pointers for ptr, info in sorted(ptr_to_info.iteritems(), key=lambda x:x[0]): base = info["base"] suffix = "" if info["offset"] != 0: # Only consider necessary calls to field_addr # (assume the first field of a struct will always be at offset 0) suffix = ' + self.field_addr("%s", "%s")' % (bases_to_C[base], info["Clike"]) elif ptr == fixed[base]: # Avoid unnecessary identity affectation continue self.printer.add_block("# %s\n" % info["Clike"]) self.printer.add_block('%s = %s%s\n' % (ptr, fixed[base], suffix) ) # Set initial values ## Arguments self.printer.add_empty_line() for i, arg_name in enumerate(self.prototype.args_order): arg_type = self.prototype.args[arg_name] symbol = arguments_symbols[i] if objc_is_dereferenceable(arg_type): if symbol not in fixed: # The argument is not used as a pointer #TODO self.logger.warn("argument %s not used?!", arg_name) continue else: value = fixed[symbol] else: # Set real value from regs or stack for expr, expr_value in snapshot.init_values.iteritems(): if expr.name == "arg%d_%s" % (i, arg_name): break else: raise RuntimeError("Unable to find the init values of " \ "argument %d" % i) if expr_value.is_int(): value = int(expr_value) elif expr_value.is_compose(): # Only a part of the argument has been read # -> fill the rest with 0s value = 0 for index, val in expr_value.iter_args(): if val.is_int(): val = int(val) else: val = 0 value |= (val << index) else: raise TypeError("An argument should be in the form I, " \ "or {I, XX}") self.printer.add_block("self._add_arg(%d, %s) # arg%d_%s\n" % (i, value, i, arg_name)) ## Inputs self.printer.add_empty_line() for dst in memory_in: info_type = list(c_handler.expr_to_types(dst)) info_C = list(c_handler.expr_to_c(dst)) # TODO handle unknown type? assert len(info_type) == 1 assert len(info_C) == 1 dst_type = info_type[0] if objc_is_dereferenceable(dst_type): if dst not in fixed: # The pointer is read but never deferenced # Consider it as an int value = memory_in[dst] assert value.is_int() # Fix it to this value fixed[dst] = value else: # We must have considered it before value = fixed[dst] else: value = memory_in[dst] # We already have the pointer allocated addr = fixed[dst.ptr] self.printer.add_block('# %s = %s\n' % (info_C[0], value)) self.printer.add_block('self._write_mem(%s, self.pack(%s, self.sizeof("%s")))\n' % (addr, value, info_C[0])) ## Returned value base = None if objc_is_dereferenceable(self.prototype.func_type): if output_value.is_id() or output_value.is_mem(): assert output_value in fixed base = output_value elif output_value.is_op(): # X + offset assert all((output_value.op == "+", len(output_value.args) == 2, isinstance(output_value.args[1], ExprInt), output_value.args[0] in fixed)) base = output_value.args[0] else: raise ValueError("Output should be in X, X + offset, @[X] form") # Needed for check generation ## For generate_check needs self.fixed = fixed self.bases_to_C = bases_to_C ## For the generated check needs to_save = set() to_save.update(fixed[dst.ptr] for dst in memory_out) if base is not None: to_save.add(fixed[base]) self.printer.add_empty_line() for var in to_save: self.printer.add_block('self.%s = %s\n' % (var, var)) self.printer.sub_lvl()
def generate_check(self, snapshot, number): '''Return the string corresponding to the code of the check function''' self.printer.add_block(funcDefTemplate.format("check", number)) self.printer.add_lvl() memory_out = snapshot.memory_out c_handler = snapshot.c_handler arguments_symbols = snapshot.arguments_symbols output_value = snapshot.output_value # Sanitize memory accesses memory_out, filled_out = self.sanitize_memory_accesses(memory_out, c_handler) fixed = self.fixed bases_to_C = self.bases_to_C self.printer.add_block("return all((\n") self.printer.add_lvl() if objc_is_dereferenceable(self.prototype.func_type): if output_value.is_id() or output_value.is_mem(): assert output_value in fixed base = output_value elif output_value.is_op(): # X + offset assert all((output_value.op == "+", len(output_value.args) == 2, isinstance(output_value.args[1], ExprInt), output_value.args[0] in fixed)) base = output_value.args[0] else: raise ValueError("Output should be in X, X + offset, @[X] form") info_C = c_handler.expr_to_c(output_value) assert len(info_C) == 1 Clike = info_C[0] suffix = "" if bases_to_C[base] != Clike: # Only consider necessary calls to field_addr suffix = ' + self.field_addr("%s", "%s", is_ptr=True)' % (bases_to_C[base], Clike) self.printer.add_block("# Check output value\n# result == %s\n" % Clike) self.printer.add_block('self._get_result() == self.%s%s,\n' % (fixed[base], suffix)) elif self.prototype.func_type.name != "void": retvalue = int(output_value) self.printer.add_block("# Check output value\nself._get_result() == %s,\n" % hex(retvalue)) for dst in memory_out: info_type = expr_to_types(c_handler, dst) info_C = c_handler.expr_to_c(dst) # TODO handle unknown type? assert len(info_type) == 1 assert len(info_C) == 1 dst_type = info_type[0] if objc_is_dereferenceable(dst_type): # We must have considered it before assert dst in fixed value = "self.%s" % fixed[dst] else: value = memory_out[dst] # We already have the pointer allocated addr = fixed[dst.arg] if dst in filled_out: # Sparse access, there are offset NOT to consider offsets = [int(offset) for offset in filled_out[dst]] self.printer.add_block('# %s == %s (without considering %s offset(s)\n' % (info_C[0], value, ", ".join(map(hex, offsets)))) self.printer.add_block('self._ensure_mem_sparse' '(self.%s, self.pack(%s, self.sizeof("%s")), [%s]),\n' % (addr, value, info_C[0], ", ".join(map(hex, offsets)), )) else: # Full access self.printer.add_block('# %s == %s\n' % (info_C[0], value)) self.printer.add_block('self._ensure_mem(self.%s, self.pack(%s, self.sizeof("%s"))),\n' % (addr, value, info_C[0])) self.printer.sub_lvl() self.printer.add_block("))") self.printer.sub_lvl()
def generate_init(self, snapshot, number): '''Return the string corresponding to the code of the init function''' self.printer.add_block(funcDefTemplate.format("init", number)) self.printer.add_lvl() memory_in = snapshot.memory_in memory_out = snapshot.memory_out c_handler = snapshot.c_handler arguments_symbols = snapshot.arguments_symbols output_value = snapshot.output_value # Sanitize memory accesses memory_in, _ = self.sanitize_memory_accesses(memory_in, c_handler) memory_out, _ = self.sanitize_memory_accesses(memory_out, c_handler) # Allocate zones if needed ## First, resolve common bases bases_to_C = {} # expr -> C-like to_resolve = set() for expr in memory_in.keys() + memory_out.keys(): to_resolve.update(expr.arg.get_r(mem_read=True)) fixed = {} for i, expr in enumerate(to_resolve): fixed[expr] = ExprId("base%d_ptr" % i, size=expr.size) info_type = expr_to_types(c_handler, expr) info_C = c_handler.expr_to_c(expr) assert len(info_type) == 1 assert len(info_C) == 1 arg_type = info_type[0] # Must be a pointer to be present in expr.get_r assert objc_is_dereferenceable(arg_type) bases_to_C[expr] = info_C[0] ## Second, alloc potential needed spaces for I/O todo = {} max_per_base_offset = {} # base -> maximum used offset max_per_base = {} # base -> maximum used field ptr_to_info = {} for mode, exprs in (("input", memory_in), ("output", memory_out)): count = 0 for expr in exprs: assert isinstance(expr, ExprMem) addr_expr = expr.arg # Expr.replace_expr is postfix, enumerate possibilities if addr_expr.is_id() or addr_expr.is_mem(): assert addr_expr in fixed base = addr_expr offset = 0 elif addr_expr.is_op(): # X + offset assert all((addr_expr.op == "+", len(addr_expr.args) == 2, isinstance(addr_expr.args[1], ExprInt), addr_expr.args[0] in fixed)) base = addr_expr.args[0] offset = int(addr_expr.args[1]) else: raise ValueError("Memory access should be in " \ "X, X + offset, @[X]") if addr_expr in fixed: # Already handled ptr = fixed[addr_expr] else: ptr = ExprId("%s%d_ptr" % (mode, count), size=addr_expr.size) fixed[addr_expr] = ptr count += 1 info_type = expr_to_types(c_handler, addr_expr) info_C = c_handler.expr_to_c(expr) # TODO handle unknown type? assert len(info_type) == 1 assert len(info_C) == 1 expr_type = info_type[0] # Must be a pointer to be deref assert objc_is_dereferenceable(expr_type) assert expr_type.objtype.size >= (expr.size / 8) info = {"Clike": info_C[0], "addr": addr_expr, "ptr": ptr, "base": base, "offset": offset, } ptr_to_info[ptr] = info # Find the last field in the struct for future alloc if max_per_base_offset.get(base, -1) < offset: max_per_base_offset[base] = offset max_per_base[base] = info["Clike"] # Reserve memory for each bases for expr, Clike in bases_to_C.iteritems(): ptr = fixed[expr] last_field = max_per_base[expr] self.printer.add_block("# %s\n" % Clike) self.printer.add_block('%s = self._alloc_mem(self.field_addr("%s", "%s")' ' + self.sizeof("%s"), read=True, write=True)\n' % (ptr, Clike, last_field, last_field) ) self.printer.add_empty_line() # Set each pointers for ptr, info in sorted(ptr_to_info.iteritems(), key=lambda x:x[0]): base = info["base"] suffix = "" if info["offset"] != 0: # Only consider necessary calls to field_addr # (assume the first field of a struct will always be at offset 0) suffix = ' + self.field_addr("%s", "%s")' % (bases_to_C[base], info["Clike"]) elif ptr == fixed[base]: # Avoid unnecessary identity affectation continue self.printer.add_block("# %s\n" % info["Clike"]) self.printer.add_block('%s = %s%s\n' % (ptr, fixed[base], suffix) ) # Set initial values ## Arguments self.printer.add_empty_line() for i, arg_name in enumerate(self.prototype.args_order): arg_type = self.prototype.args[arg_name] symbol = arguments_symbols[i] if objc_is_dereferenceable(arg_type): if symbol not in fixed: # The argument is not used as a pointer #TODO self.logger.warn("argument %s not used?!", arg_name) continue else: value = fixed[symbol] else: # Set real value from regs or stack # TODO: use abicls abi_order = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"] value = hex(snapshot.input_reg[abi_order[i]]) self.printer.add_block("self._add_arg(%d, %s)\n" % (i, value)) ## Inputs self.printer.add_empty_line() for dst in memory_in: info_type = expr_to_types(c_handler, dst) info_C = c_handler.expr_to_c(dst) # TODO handle unknown type? assert len(info_type) == 1 assert len(info_C) == 1 dst_type = info_type[0] if objc_is_dereferenceable(dst_type): # We must have considered it before assert dst in fixed value = fixed[dst] else: value = memory_in[dst] # We already have the pointer allocated addr = fixed[dst.arg] self.printer.add_block('# %s = %s\n' % (info_C[0], value)) self.printer.add_block('self._write_mem(%s, self.pack(%s, self.sizeof("%s")))\n' % (addr, value, info_C[0])) ## Returned value base = None if objc_is_dereferenceable(self.prototype.func_type): if output_value.is_id() or output_value.is_mem(): assert output_value in fixed base = output_value elif output_value.is_op(): # X + offset assert all((output_value.op == "+", len(output_value.args) == 2, isinstance(output_value.args[1], ExprInt), output_value.args[0] in fixed)) base = output_value.args[0] else: raise ValueError("Output should be in X, X + offset, @[X] form") # Needed for check generation ## For generate_check needs self.fixed = fixed self.bases_to_C = bases_to_C ## For the generated check needs to_save = set() to_save.update(fixed[dst.arg] for dst in memory_out) if base is not None: to_save.add(fixed[base]) self.printer.add_empty_line() for var in to_save: self.printer.add_block('self.%s = %s\n' % (var, var)) self.printer.sub_lvl()
def is_pointer(self, expr): """Return True if expr may be a pointer""" target_types = expr_to_types(self.c_handler, expr) return any(objc_is_dereferenceable(target_type) for target_type in target_types)
def prepare_symbexec(self, jitter, return_addr): # Activate callback on each instr jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) #jitter.jit.log_mn = True #jitter.jit.log_regs = True jitter.exec_cb = self.callback # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(jitter.vm), lines_wd=1) # Symbexec engine ## Prepare the symbexec engine self.symb_ir = self.machine.ir() self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm, self.symb_ir, {}) self.symb.enable_emulated_simplifications() ## Update registers value self.symb.reset_regs() self.symb.update_engine_from_cpu() ## Load the memory as ExprMem self.symb.func_read = None self.symb.func_write = None for base_addr, mem_segment in jitter.vm.get_all_memory().iteritems(): # Split into 8 bytes chunk for get_mem_overlapping for start in xrange(0, mem_segment["size"], 8): expr_mem = m2_expr.ExprMem(m2_expr.ExprInt(base_addr + start, size=64), size=8*min(8, mem_segment["size"] - start)) # Its initialisation, self.symb.apply_change is not necessary self.symb.symbols[expr_mem] = self.symb._func_read(expr_mem) ## Save the initial state self.symbols_init = self.symb.symbols.copy() ## Save the returning address self.return_addr = return_addr # Inject argument self.init_values = {} struct_expr_types = {} self.args_symbols = [] for i, param_name in enumerate(self.prototype.args_order): cur_arg_abi = self.get_arg_n(i) cur_arg = m2_expr.ExprId("arg%d_%s" % (i, param_name), size=cur_arg_abi.size) self.init_values[cur_arg] = self.symb.eval_expr(cur_arg_abi) arg_type = self.prototype.args[param_name] if objc_is_dereferenceable(arg_type): # Convert the argument to symbol to track access based on it self.symb.apply_change(cur_arg_abi, cur_arg) struct_expr_types[cur_arg.name] = arg_type self.args_symbols.append(cur_arg) # Init Expr <-> C conversion # Strict access is deliberately not enforced (example: memcpy(struct)) self.c_handler = CHandler(self.types, struct_expr_types, enforce_strict_access=False) # Init output structures self.memories_read = set() self.memories_write = set()
def prepare_symbexec(self, jitter, return_addr): # Activate callback on each instr jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) #jitter.jit.log_mn = True #jitter.jit.log_regs = True jitter.exec_cb = self.callback # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(jitter.vm), lines_wd=1) # Symbexec engine ## Prepare the symbexec engine self.symb_ir = self.machine.ir() self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm, self.symb_ir, {}) self.symb.enable_emulated_simplifications() ## Update registers value self.symb.reset_regs() self.symb.update_engine_from_cpu() ## Load the memory as ExprMem self.symb.func_read = None self.symb.func_write = None for base_addr, mem_segment in jitter.vm.get_all_memory().iteritems(): # Split into 8 bytes chunk for get_mem_overlapping for start in xrange(0, mem_segment["size"], 8): expr_mem = m2_expr.ExprMem(m2_expr.ExprInt(base_addr + start, size=64), size=8 * min(8, mem_segment["size"] - start)) # Its initialisation, self.symb.apply_change is not necessary self.symb.symbols[expr_mem] = self.symb._func_read(expr_mem) ## Save the initial state self.symbols_init = self.symb.symbols.copy() ## Save the returning address self.return_addr = return_addr # Inject argument # TODO # TODO: use abicls abi_order = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"] self.init_values = {} struct_expr_types = {} self.args_symbols = [] for i, param_name in enumerate(self.prototype.args_order): cur_arg_abi = getattr(self.ira.arch.regs, abi_order[i]) cur_arg = m2_expr.ExprId("arg%d_%s" % (i, param_name), size=cur_arg_abi.size) arg_type = self.prototype.args[param_name] if objc_is_dereferenceable(arg_type): # Convert the argument to symbol to track access based on it self.init_values[cur_arg] = self.symb.symbols[cur_arg_abi] self.symb.apply_change(cur_arg_abi, cur_arg) struct_expr_types[cur_arg.name] = arg_type self.args_symbols.append(cur_arg) # Init Expr <-> C conversion # Strict access is deliberately not enforced (example: memcpy(struct)) self.c_handler = CHandler(self.types, struct_expr_types, enforce_strict_access=False) # Init output structures self.memories_read = set() self.memories_write = set()