def _process_sbb(self, cur_bloc, last_instruction): assignment_block = AsmBlock(self.loc_db.add_location()) cond_block = AsmBlock(self.loc_db.add_location()) reg = last_instruction.args[0] assignment_block.lines.append( create_mov_instruction(self.mode, reg, ExprInt(-1, reg.size))) branch_target = next(iter(cur_bloc.bto)).loc_key assignment_block.lines.append( create_jump_instruction(self.mode, ExprLoc(branch_target, self.mode))) branch_name = "JB" # JC is not implemented in miasm, using alias cur_bloc.lines.pop() pre_branch_block = AsmBlock(self.loc_db.add_location()) pre_branch_block.lines = [ create_mov_instruction(self.mode, reg, ExprInt(0, reg.size)) ] cond_block.lines.append( create_cond_branch_instruction( self.mode, branch_name, ExprLoc(assignment_block.loc_key, self.mode))) if not cur_bloc.lines: cur_bloc.lines = [create_nop(self.mode)] assignment_block.bto = {AsmConstraintTo(branch_target)} cur_bloc.bto = {AsmConstraintNext(cond_block.loc_key)} cond_block.bto = { AsmConstraintNext(pre_branch_block.loc_key), AsmConstraintTo(assignment_block.loc_key) } pre_branch_block.bto = {AsmConstraintNext(branch_target)} self.add_block(assignment_block) self.add_block(cond_block) self.add_block(pre_branch_block)
def _process_cmov(self, cur_bloc, last_instruction): assignment_block = AsmBlock(self.loc_db.add_location()) cond_block = AsmBlock(self.loc_db.add_location()) dst = last_instruction.args[0] src = last_instruction.args[1] assignment_block.lines.append( create_mov_instruction(self.mode, dst, src)) branch_target = next(iter(cur_bloc.bto)).loc_key assignment_block.lines.append( create_jump_instruction(self.mode, ExprLoc(branch_target, self.mode))) branch_name = "J" + last_instruction.name[len("CMOV"):] cur_bloc.lines.pop() if not cur_bloc.lines: cur_bloc.lines = [create_nop(self.mode)] cond_block.lines.append( create_cond_branch_instruction( self.mode, branch_name, ExprLoc(assignment_block.loc_key, self.mode))) assignment_block.bto = {AsmConstraintTo(branch_target)} cond_block.bto = { AsmConstraintNext(branch_target), AsmConstraintTo(assignment_block.loc_key) } cur_bloc.bto = {AsmConstraintNext(cond_block.loc_key)} self.add_block(assignment_block) self.add_block(cond_block)
def fix_multiple_next_constraints(asmcfg, mode): """ When there are multiple blocks proceeding another block with no jump, add one. :return: """ blocks_to_be_added = [] for loc_key in asmcfg.nodes(): next_edges = { edge: constraint for edge, constraint in asmcfg.edges2constraint.items() if constraint == AsmConstraint.c_next } pred_next = list(ploc_key for (ploc_key, dloc_key) in next_edges if dloc_key == loc_key) if len(pred_next) > 1: for index in range(1, len(pred_next)): inst = create_jump_instruction(mode, ExprLoc(loc_key, mode)) new_block_loc_key = asmcfg.loc_db.add_location() new_block = AsmBlock(new_block_loc_key) new_block.addline(inst) new_block.bto = {AsmConstraintTo(loc_key)} asmcfg.loc_key_to_block(pred_next[index]).bto = { AsmConstraintNext(new_block_loc_key) } blocks_to_be_added.append(new_block) # one while might be sufficient, depends on type of _nodes for block in blocks_to_be_added: asmcfg.add_block(block)
def arm_guess_jump_table(dis_engine, cur_block, offsets_to_dis): arch = dis_engine.arch loc_db = dis_engine.loc_db ira = get_ira(arch, dis_engine.attrib) jra = ExprId('jra') jrb = ExprId('jrb') ir_arch = ira(loc_db) ircfg = ira.new_ircfg() ir_arch.add_asmblock_to_ircfg(cur_block, ircfg) for irblock in viewvalues(ircfg.blocks): pc_val = None for exprs in irblock: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): continue assert (pc_val.size == 32) print(pc_val) ad = pc_val.arg ad = expr_simp(ad) print(ad) res = match_expr(ad, jra + jrb, set([jra, jrb])) if res is False: raise NotImplementedError('not fully functional') print(res) if not isinstance(res[jrb], ExprInt): raise NotImplementedError('not fully functional') base_ad = int(res[jrb]) print(base_ad) addrs = set() i = -1 max_table_entry = 10000 max_diff_addr = 0x100000 # heuristic while i < max_table_entry: i += 1 try: ad = upck32(dis_engine.bin_stream.getbytes(base_ad + 4 * i, 4)) except: break if abs(ad - base_ad) > max_diff_addr: break addrs.add(ad) print([hex(x) for x in addrs]) for ad in addrs: offsets_to_dis.add(ad) l = loc_db.get_or_create_offset_location(ad) c = AsmConstraintTo(l) cur_block.addto(c)
def _insert_flat_block(self, source_flat_block, symb_exec, flat_block_to_loc_key): """ Copies source_flat_block and sets its successors according to flat_block_to_loc_key :param flat_block_to_loc_key: dictionary mapping flat_blocks to respective loc_keys :param symb_exec: instance of current symbolic execution engine :param source_flat_block: flat_block to be inserted :return: dictionary mapping old successor loc_keys to the new ones """ # we're not using redirect_successors after copying to avoid executing the same loops multiple times source_block = self.asmcfg.loc_key_to_block( source_flat_block.block_loc_key) tobe_processed = {} new_flat_blocks = set() new_block_loc_key = flat_block_to_loc_key[source_flat_block] if self.out_asmcfg.loc_key_to_block(new_block_loc_key) is not None: raise Exception("Target loc_key is already associated to a block") new_block = AsmBlock(new_block_loc_key) # copy instructions for ln in source_block.lines: tmp_ln = instruction_x86(ln.name, ln.mode, [i.copy() for i in ln.args], ln.additional_info) tmp_ln.b = ln.b tmp_ln.l = ln.l tmp_ln.offset = ln.offset new_block.addline(tmp_ln) constraints = source_block.bto # try to simplify the destination if it's a primary flattening block if not self.flat_loops[source_block.loc_key].is_default: logger.debug("current block is a part of primary loc_keys") simplified_target = symb_exec.eval_expr(self.ircfg.IRDst) if isinstance(simplified_target, ExprInt): simplified_target = self.asmcfg.loc_db.get_offset_location( int(simplified_target)) elif isinstance(simplified_target, ExprLoc): simplified_target = simplified_target.loc_key else: # there's probably a(n) (series of) unknown instruction(s) causing an implicit conditional assignment # such as CMOV or SBB->AND->ADD, prepend comparison + cond jump if it happens to be common, or add it to # ExtendedAsmCFG.extended_discovery and split flow on the final instruction # it's also possible that it's not related to any cff loop at all addr = self.asmcfg.loc_db.get_location_offset( source_flat_block.block_loc_key) addr = hex(addr) if addr else addr logger.warning( "Couldn't simplify loc_key %s at %s, continuing" % (str(source_flat_block.block_loc_key), addr)) logger.warning("the simplified target is %s of instance %s" % (simplified_target, type(simplified_target))) simplified_target = None if simplified_target: constraints = {AsmConstraintTo(simplified_target)} mode = self.asmcfg.mode # remove redundant comparison dp = DependencyGraph(self.ircfg, True) block_loc_key = source_block.loc_key res = next( dp.get(block_loc_key, {self.ircfg.IRDst}, None, {block_loc_key})) for depnode in res.relevant_nodes: ind = depnode.line_nb ind -= (len(self.ircfg.blocks[block_loc_key]) - len(new_block.lines)) if new_block.lines[ind].name == "CMP": new_block.lines.pop(ind) new_block.lines[-1] = create_jump_instruction( mode, ExprLoc(simplified_target, mode)) # copy constraints new_bto = set() for constraint in constraints: if not self.asmcfg.loc_key_to_block(constraint.loc_key): logger.debug("Skipping bad constraint %s" % constraint.loc_key) continue flat_block = self.flat_loops.get_block(constraint.loc_key, symb_exec, source_flat_block) if flat_block not in flat_block_to_loc_key: new_flat_blocks.add(flat_block) new_loc_key = self.out_asmcfg.loc_db.add_location() tobe_processed[constraint.loc_key] = (new_loc_key, flat_block) flat_block_to_loc_key[flat_block] = new_loc_key else: new_loc_key = flat_block_to_loc_key[flat_block] new_bto.add(AsmConstraint(new_loc_key, constraint.c_t)) new_block.bto = new_bto new_block.alignment = source_block.alignment # change jmp targets if new_block.lines: for ind, arg in enumerate(list(new_block.lines[-1].args)): if isinstance(arg, ExprLoc): if not self.asmcfg.loc_key_to_block(arg.loc_key): logger.debug("Skipping bad constraint %s" % arg.loc_key) continue new_target, flat_block = tobe_processed.get( arg.loc_key, (None, None)) if not new_target: flat_block = self.flat_loops.get_block( arg.loc_key, symb_exec, source_flat_block) new_target = flat_block_to_loc_key.get(flat_block) # None in case of irrelevant calls logger.debug("new target: %s" % new_target) if new_target: new_block.lines[-1].args[ind] = ExprLoc( new_target, arg.size) self.out_asmcfg.add_block(new_block) return new_flat_blocks
def _process_jmp_table(self, cur_bloc, mn, attrib, loc_db, pool_bin, offsets_to_dis): # TODO add support for jump tables with "AND cntrl_var, range" boundary check; such jmp tables were present only # in library functions in Stantinko samples # add current block to the asmcfg to make it accessible in the ircfg edges, add_block is called anyway right # after this callback, it will notice that the block has been already added self.add_block(cur_bloc) dst_address = loc_db.get_location_offset(cur_bloc.loc_key) logger.info("Possible jump table addr: 0x%x" % dst_address) ira = get_ira(mn, attrib) ir_arch = ira(loc_db) ircfg = ir_arch.new_ircfg_from_asmcfg(self) # the previous blocks should have exactly 1 predecessor dictating range predecessors = self.predecessors(cur_bloc.loc_key) if len(predecessors) != 1: logger.info("Expected exactly one predecessor") return predecessor = ircfg.blocks[predecessors.pop()] irdst_block = ircfg.blocks[cur_bloc.loc_key] if len(irdst_block.assignblks) != len(cur_bloc.lines): processed = set() todo = {irdst_block.loc_key} while not irdst_block.dst.is_mem(): loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) irdst_block = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) # we shouldn't stumble upon crashing segm and call operators even thought implicit is required to process # initial IRDst(mentioned operators cause crashes of the engine behind implicit) since we operate only on the # 2 crucial basic blocks. The predecessor contains range of the jump table, we use it to determine constructs # of the jump table and track back base code segment address assignment to target the msvc compiler and x64 # architecture, other compilers use directly RIP related addressing to get the address. # get real predecessor asm_block = self.loc_key_to_block(predecessor.loc_key) if len(predecessor.assignblks) != len(asm_block.lines): processed = set() todo = {predecessor.loc_key} while cur_bloc.loc_key not in ircfg.successors(predecessor.loc_key): loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) predecessor = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) # get jump_table_control_variable from predecessor dg = DependencyGraph(ircfg, implicit=True, apply_simp=True, follow_mem=True, follow_call=False) jtcdg = JTCVariableDependencyGraph(predecessor.loc_key, ircfg, implicit=True, apply_simp=True, follow_mem=False, follow_call=False) dependency_result_iter = iter(jtcdg.get(irdst_block.loc_key, {ircfg.IRDst}, len(predecessor.assignblks), {predecessor.loc_key})) solution_predecessor = next(dependency_result_iter) # jump table control variable jtc_var = jtcdg.jtc_var if not jtc_var: logger.info("couldn't determine single jump table control variable") return # get symbolic execution engine to be used in both predecessor and jmp table block symb_exec_both = MySymbolicExecutionEngine(pool_bin, jtc_var, ir_arch) try: # symbolically evaluate lines influencing IRDst of the predecessor leading to jtc_var for line_nb in sorted({node.line_nb for node in solution_predecessor.relevant_nodes if node.loc_key == predecessor.loc_key}): assign_blk = predecessor.assignblks[line_nb] symb_exec_both.eval_updt_assignblk(assign_blk) except (KeyError, TypeError): logger.error( "Couldn't symbolically eval predecessor of 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key)) # stantinko contains illegal unreachable dereferences prior jmp tables, such as # xor eax, eax; movsx eax, byte ptr [eax] return # get symbolic execution engine supporting binary memory dereference symb_exec_minimal = MySymbolicExecutionEngine(pool_bin, ir_arch, symb_exec_both.symbols.copy()) predecessor_irdst_equation = symb_exec_both.symbols[ircfg.IRDst] # get equation whose solutions solve the indirect jump irdst_block = ircfg.blocks[cur_bloc.loc_key] if len(irdst_block.assignblks) != len(cur_bloc.lines): processed = set() todo = {irdst_block.loc_key} while not irdst_block.dst.is_mem(): symb_exec_both.eval_updt_irblock(irdst_block) loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) irdst_block = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) irdst_equation = symb_exec_both.eval_updt_irblock(irdst_block) sizes = set() # prevent mem processing via raw arrays by using var ID instead # we also want to set a maximum boundary so slices don't cause the sat solver generate a huge number of results visitor = ExprVisitorCallbackTopToBottom(lambda x: self._eliminate_jtc_var_slice_cb(x, sizes, jtc_var)) irdst_equation = visitor.visit(irdst_equation) predecessor_irdst_equation = visitor.visit(predecessor_irdst_equation) size_boundary = jtc_var.size sizes = sorted(filter(lambda x: x > 1, sizes)) if sizes: size_boundary = sizes[0] jtc_var_id = ExprId("jtc_var", jtc_var.size) irdst_equation = irdst_equation.replace_expr({jtc_var: jtc_var_id}) predecessor_irdst_equation = predecessor_irdst_equation.replace_expr({jtc_var: jtc_var_id}) # track possible CS base address dependency, ignore control variable from predecessor eliminated_jtc_var_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(0, jtc_var_id.size)}) evaluated_ejtc_var_equation = symb_exec_both.eval_expr(eliminated_jtc_var_equation) if not evaluated_ejtc_var_equation.is_int(): # we need to determine code base dependencies = dg._follow_apply_cb(evaluated_ejtc_var_equation) expr_deps = {fexpr.element for fexpr in dependencies if fexpr.follow} dg_base = DependencyGraph(ircfg, implicit=False, apply_simp=True, follow_mem=True, follow_call=False) dependency_result_iter = iter(dg_base.get(cur_bloc.loc_key, expr_deps, len(cur_bloc.lines), {self.heads()[0]})) solution = next(dependency_result_iter) code_base_dict = {expr: solution.emul(ir_arch)[expr] for expr in expr_deps} irdst_equation = irdst_equation.replace_expr(code_base_dict) predecessor_irdst_equation = predecessor_irdst_equation.replace_expr(code_base_dict) # we need backward slice of the jump table destination dependencies to retain the other independent assignments # during cmp chain assembling dependency_result = dg.get(cur_bloc.loc_key, {ircfg.IRDst}, len(cur_bloc.lines), {cur_bloc.loc_key}) dependent_line_nbs = {} for solution in dependency_result: dependent_line_nbs.setdefault(solution.loc_key, set()).update( {dn.line_nb for dn in solution.relevant_nodes}) cur_bloc_new_lines = [] for loc_key, lines in dependent_line_nbs.items(): for line_nb, assignblk in enumerate(ircfg.blocks[loc_key].assignblks): if line_nb not in lines: symb_exec_minimal.eval_assignblk(assignblk) cur_bloc_new_lines.append(assignblk.instr) comparison_reg_id = None comparison_reg_value = None if jtc_var not in symb_exec_minimal.symbols.symbols_id: comparison_reg_id = jtc_var comparison_reg_value = jtc_var else: for symbol, comparison_reg_value in symb_exec_minimal.symbols.symbols_id.items(): if jtc_var in comparison_reg_value and (symbol.is_mem() or (symbol.is_id() and symbol.name not in ["RIP", "EIP", "zf", "nf", "pf", "of", "cf", "af", "df", ircfg.IRDst.name])): replaced_jtcv = comparison_reg_value.replace_expr({jtc_var: ExprInt(0, jtc_var.size)}) if isinstance(symb_exec_minimal.eval_expr(replaced_jtcv), ExprInt): comparison_reg_id = symbol break if not comparison_reg_id or not comparison_reg_value: logger.debug("Couldn't find any candidate for comparison register at 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key)) return from miasm.ir.translators import Translator import z3 translator = Translator.to_language("z3") solver = z3.Solver() logger.debug("predecessor_irdst_equation: %s" % str(predecessor_irdst_equation)) logger.debug(("dst_address: 0x%x" % dst_address)) logger.debug(("jump_table_control_variable: %s" % str(jtc_var))) solver.add(translator.from_expr(predecessor_irdst_equation) == dst_address) translated_jtc_var = translator.from_expr(jtc_var_id) solver.add(translated_jtc_var >= 0) solver.add(translated_jtc_var < 2 ** (size_boundary - 1) - 1) if solver.check() != z3.sat: logger.debug("Couldn't find at least one jump table control variable") return dbg_destinations = set() next_loc_key = new_block_loc_key = loc_db.add_location() logger.debug("comparison_reg_id: %s" % str(comparison_reg_id)) dst_ranges = {} counter = 0 while counter < 500: val = solver.model()[translated_jtc_var].as_long() final_irdst_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(val, jtc_var_id.size)}) final_dst = int(symb_exec_both.eval_expr(final_irdst_equation)) cmp_reg_val = comparison_reg_value.replace_expr({jtc_var: ExprInt(val, jtc_var.size)}) cmp_reg_val = int(symb_exec_minimal.eval_expr(cmp_reg_val)) dst_ranges[final_dst] = dst_ranges.get(final_dst, interval()).union([(cmp_reg_val, cmp_reg_val)]) dbg_destinations.add(final_dst) offsets_to_dis.add(final_dst) solver.add(translated_jtc_var != translator.from_expr(ExprInt(val, jtc_var_id.size))) if solver.check() != z3.sat: break counter += 1 if counter == 500: raise RuntimeError("Interrupted; there might be a broken slice") for dst, interv in dst_ranges.items(): cond_target_loc_key = loc_db.get_or_create_offset_location(dst) for lower, upper in interv: lower = ExprInt(lower, self.mode) upper = ExprInt(upper, self.mode) new_asm_block = AsmBlock(new_block_loc_key) new_block_loc_key = loc_db.add_location() if lower == upper: new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower, ExprLoc(cond_target_loc_key, self.mode), "JZ") new_asm_block.add_cst(cond_target_loc_key, "c_to") new_asm_block.add_cst(new_block_loc_key, "c_next") else: upper_check_loc_key = loc_db.add_location() # lower boundary check new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower, ExprLoc(new_block_loc_key, self.mode), "JB") new_asm_block.add_cst(new_block_loc_key, "c_to") new_asm_block.add_cst(upper_check_loc_key, "c_next") # upper boundary check upper_check_block = AsmBlock(upper_check_loc_key) upper_check_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, upper, ExprLoc(cond_target_loc_key, self.mode), "JBE") upper_check_block.add_cst(cond_target_loc_key, "c_to") upper_check_block.add_cst(new_block_loc_key, "c_next") self.add_block(upper_check_block) self.add_block(new_asm_block) # trigger last jump unconditionally new_asm_block.bto = {AsmConstraintTo(cond_target_loc_key)} new_asm_block.lines = [create_jump_instruction(self.mode, ExprLoc(cond_target_loc_key, self.mode))] cur_bloc.lines = cur_bloc_new_lines cur_bloc.add_cst(next_loc_key, "c_next") if not cur_bloc.lines: cur_bloc.lines = [create_nop(self.mode)] self.jmp_table_loc_keys.add(cur_bloc.loc_key) logger.debug("destinations: %s" % pformat([hex(i or 0) for i in dbg_destinations])) logger.debug("blocks: %d" % counter)
assert len(asmcfg) == 3 assert asmcfg.loc_key_to_block(first_block.loc_key) == first_block assert asmcfg.loc_key_to_block(my_block.loc_key) == my_block ## Bad asmcfg assert len(list(asmcfg.get_bad_blocks())) == 0 assert len(list(asmcfg.get_bad_blocks_predecessors())) == 0 ### Add a bad block, not linked testlabel_bad = mdis.loc_db.get_or_create_name_location("testlabel_bad") my_bad_block = AsmBlockBad(testlabel_bad) asmcfg.add_block(my_bad_block) assert list(asmcfg.get_bad_blocks()) == [my_bad_block] assert len(list(asmcfg.get_bad_blocks_predecessors())) == 0 ### Link the bad block and update edges ### Indeed, a sub-element has been modified (bto from a block from asmcfg) my_block.bto.add(AsmConstraintTo(my_bad_block.loc_key)) asmcfg.rebuild_edges() assert list(asmcfg.get_bad_blocks_predecessors()) == [my_block.loc_key] ### Test strict option my_block.bto.add(AsmConstraintTo(my_block.loc_key)) asmcfg.rebuild_edges() assert list(asmcfg.get_bad_blocks_predecessors(strict=False)) == [my_block.loc_key] assert len(list(asmcfg.get_bad_blocks_predecessors(strict=True))) == 0 ## Sanity check asmcfg.sanity_check() ### Next on itself testlabel_nextitself = mdis.loc_db.get_or_create_name_location("testlabel_nextitself") my_block_ni = AsmBlock(testlabel_nextitself) my_block_ni.bto.add(AsmConstraintNext(my_block_ni.loc_key)) asmcfg.add_block(my_block_ni)