def __init__(self, binary_name: str, function_addr: int): """ 尝试去除ollvm中的flat混淆 :param binary_name: binary文件名 :param function_addr: 被混淆的目标函数地址 """ # 1.加载程序 self.p = angr.Project(binary_name, load_options={'auto_load_libs': False}) # 2.获取函数CFG self.whole_cfg = self.p.analyses.CFG() self.func = self.whole_cfg.functions[function_addr] self.func.normalize() self.cfg = am_graph.to_supergraph(self.func.transition_graph) # 3.初始化节点变量 # 入度为0即序言块 self.prologue_node = None # 出度为0即返回块 self.ret_node = None # 序言的后继即主分发器 self.main_dispatcher_node = None # 主分发器的前驱(除序言外)即预处理器 self.pre_dispatcher_node = None # 预处理器的前驱即真实块 self.relevant_nodes = [] # 其余为无用块 self.nop_nodes = [] # 除了无用块外的所有块都为有用块 self.useful_nodes = [] self.useful_nodes_except_ret = [] # 存放nodes的信息 self.nodes_info = {} # 存放恢复出来的控制流信息 self.flows = [] # 用于记录在恢复flow时指定block的初试state # 正常来说没有必要,用于O3混淆下 self.next_state = None # 配置选项 self.use_unicorn = False
'po': '\x8B', 's': '\x88', 'nop': '\x90', 'jmp': '\xE9', 'j': '\x0F' } filename = sys.argv[1] start = int(sys.argv[2], 16) project = angr.Project(filename, load_options={'auto_load_libs': False}) cfg = project.analyses.CFGFast( normalize=True) # do normalize to avoid overlapping blocks target_function = cfg.functions.get(start) # A super transition graph is a graph that looks like IDA Pro's CFG supergraph = am_graph.to_supergraph(target_function.transition_graph) base_addr = project.loader.main_object.mapped_base >> 12 << 12 # get prologue_node and retn_node prologue_node = None for node in supergraph.nodes(): if supergraph.in_degree(node) == 0: prologue_node = node if supergraph.out_degree(node) == 0: retn_node = node if prologue_node is None or prologue_node.addr != start: print("Something must be wrong...") sys.exit(-1)
def main(): parser = argparse.ArgumentParser(description="deflat control flow script") parser.add_argument("-f", "--file", help="binary to analyze") parser.add_argument( "--addr", help="address of target function in hex format") args = parser.parse_args() if args.file is None or args.addr is None: parser.print_help() sys.exit(0) filename = args.file start = int(args.addr, 16) project = angr.Project(filename, load_options={'auto_load_libs': False}) # do normalize to avoid overlapping blocks, disable force_complete_scan to avoid possible "wrong" blocks cfg = project.analyses.CFGFast(normalize=True, force_complete_scan=False) target_function = cfg.functions.get(start) # A super transition graph is a graph that looks like IDA Pro's CFG supergraph = am_graph.to_supergraph(target_function.transition_graph) base_addr = project.loader.main_object.mapped_base >> 12 << 12 # get prologue_node and retn_node prologue_node = None for node in supergraph.nodes(): if supergraph.in_degree(node) == 0: prologue_node = node if supergraph.out_degree(node) == 0 and len(node.out_branches) == 0: retn_node = node if prologue_node is None or prologue_node.addr != start: print("Something must be wrong...") sys.exit(-1) main_dispatcher_node = list(supergraph.successors(prologue_node))[0] for node in supergraph.predecessors(main_dispatcher_node): if node.addr != prologue_node.addr: pre_dispatcher_node = node break relevant_nodes, nop_nodes = get_relevant_nop_nodes( supergraph, pre_dispatcher_node, prologue_node, retn_node) print('*******************relevant blocks************************') print('prologue: %#x' % start) print('main_dispatcher: %#x' % main_dispatcher_node.addr) print('pre_dispatcher: %#x' % pre_dispatcher_node.addr) print('retn: %#x' % retn_node.addr) relevant_block_addrs = [node.addr for node in relevant_nodes] print('relevant_blocks:', [hex(addr) for addr in relevant_block_addrs]) print('*******************symbolic execution*********************') relevants = relevant_nodes relevants.append(prologue_node) relevants_without_retn = list(relevants) relevants.append(retn_node) relevant_block_addrs.extend([prologue_node.addr, retn_node.addr]) flow = defaultdict(list) patch_instrs = {} for relevant in relevants_without_retn: print('-------------------dse %#x---------------------' % relevant.addr) block = project.factory.block(relevant.addr, size=relevant.size) has_branches = False hook_addrs = set([]) for ins in block.capstone.insns: if project.arch.name in ARCH_X86: if ins.insn.mnemonic.startswith('cmov'): # only record the first one if relevant not in patch_instrs: patch_instrs[relevant] = ins has_branches = True elif ins.insn.mnemonic.startswith('call'): hook_addrs.add(ins.insn.address) elif project.arch.name in ARCH_ARM: if ins.insn.mnemonic != 'mov' and ins.insn.mnemonic.startswith('mov'): if relevant not in patch_instrs: patch_instrs[relevant] = ins has_branches = True elif ins.insn.mnemonic in {'bl', 'blx'}: hook_addrs.add(ins.insn.address) elif project.arch.name in ARCH_ARM64: if ins.insn.mnemonic.startswith('cset'): if relevant not in patch_instrs: patch_instrs[relevant] = ins has_branches = True elif ins.insn.mnemonic in {'bl', 'blr'}: hook_addrs.add(ins.insn.address) if has_branches: tmp_addr = symbolic_execution(project, relevant_block_addrs, relevant.addr, hook_addrs, claripy.BVV(1, 1), True) if tmp_addr is not None: flow[relevant].append(tmp_addr) tmp_addr = symbolic_execution(project, relevant_block_addrs, relevant.addr, hook_addrs, claripy.BVV(0, 1), True) if tmp_addr is not None: flow[relevant].append(tmp_addr) else: tmp_addr = symbolic_execution(project, relevant_block_addrs, relevant.addr, hook_addrs) if tmp_addr is not None: flow[relevant].append(tmp_addr) print('************************flow******************************') for k, v in flow.items(): print('%#x: ' % k.addr, [hex(child) for child in v]) print('%#x: ' % retn_node.addr, []) print('************************patch*****************************') with open(filename, 'rb') as origin: # Attention: can't transform to str by calling decode() directly. so use bytearray instead. origin_data = bytearray(origin.read()) origin_data_len = len(origin_data) recovery_file = filename + '_recovered' recovery = open(recovery_file, 'wb') # patch irrelevant blocks for nop_node in nop_nodes: fill_nop(origin_data, nop_node.addr-base_addr, nop_node.size, project.arch) # remove unnecessary control flows for parent, childs in flow.items(): if len(childs) == 1: parent_block = project.factory.block(parent.addr, size=parent.size) last_instr = parent_block.capstone.insns[-1] file_offset = last_instr.address - base_addr # patch the last instruction to jmp if project.arch.name in ARCH_X86: fill_nop(origin_data, file_offset, last_instr.size, project.arch) patch_value = ins_j_jmp_hex_x86(last_instr.address, childs[0], 'jmp') elif project.arch.name in ARCH_ARM: patch_value = ins_b_jmp_hex_arm(last_instr.address, childs[0], 'b') if project.arch.memory_endness == "Iend_BE": patch_value = patch_value[::-1] elif project.arch.name in ARCH_ARM64: # FIXME: For aarch64/arm64, the last instruction of prologue seems useful in some cases, so patch the next instruction instead. if parent.addr == start: file_offset += 4 patch_value = ins_b_jmp_hex_arm64(last_instr.address+4, childs[0], 'b') else: patch_value = ins_b_jmp_hex_arm64(last_instr.address, childs[0], 'b') if project.arch.memory_endness == "Iend_BE": patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) else: instr = patch_instrs[parent] file_offset = instr.address - base_addr # patch instructions starting from `cmovx` to the end of block fill_nop(origin_data, file_offset, parent.addr + parent.size - base_addr - file_offset, project.arch) if project.arch.name in ARCH_X86: # patch the cmovx instruction to jx instruction patch_value = ins_j_jmp_hex_x86(instr.address, childs[0], instr.mnemonic[len('cmov'):]) patch_instruction(origin_data, file_offset, patch_value) file_offset += 6 # patch the next instruction to jmp instrcution patch_value = ins_j_jmp_hex_x86(instr.address+6, childs[1], 'jmp') patch_instruction(origin_data, file_offset, patch_value) elif project.arch.name in ARCH_ARM: # patch the movx instruction to bx instruction bx_cond = 'b' + instr.mnemonic[len('mov'):] patch_value = ins_b_jmp_hex_arm(instr.address, childs[0], bx_cond) if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) file_offset += 4 # patch the next instruction to b instrcution patch_value = ins_b_jmp_hex_arm(instr.address+4, childs[1], 'b') if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) elif project.arch.name in ARCH_ARM64: # patch the cset.xx instruction to bx instruction bx_cond = instr.op_str.split(',')[-1].strip() patch_value = ins_b_jmp_hex_arm64(instr.address, childs[0], bx_cond) if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) file_offset += 4 # patch the next instruction to b instruction patch_value = ins_b_jmp_hex_arm64(instr.address+4, childs[1], 'b') if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) assert len(origin_data) == origin_data_len, "Error: size of data changed!!!" recovery.write(origin_data) recovery.close() print('Successful! The recovered file: %s' % recovery_file)
def main(): if len(sys.argv) != 3: print('Usage: python debougs.py filename function_address(hex)') exit(0) filename = sys.argv[1] start = int(sys.argv[2], 16) project = angr.Project(filename, load_options={'auto_load_libs': False}) cfg = project.analyses.CFGFast(normalize=True) target_function = cfg.functions.get(start) supergraph = am_graph.to_supergraph(target_function.transition_graph) base_addr = project.loader.main_object.mapped_base >> 12 << 12 state = project.factory.blank_state( addr=target_function.addr, remove_options={angr.sim_options.LAZY_SOLVES}) flow = set() flow.add(target_function.addr) print('*******************symbolic execution*********************') sm = project.factory.simulation_manager(state) sm.step() while len(sm.active) > 0: for active in sm.active: flow.add(active.addr) sm.step() print('executed blocks: ', list(map(hex, flow))) print('************************patch******************************') with open(filename, 'rb') as origin: origin_data = bytearray(origin.read()) origin_data_len = len(origin_data) patch_nodes = set() for node in supergraph.nodes(): if node.addr in patch_nodes: continue if node.addr not in flow: # patch unnecessary node file_offset = node.addr - base_addr fill_nop(origin_data, file_offset, node.size) else: suc_nodes = list(supergraph.successors(node)) jmp_targets = [] for suc_node in suc_nodes: if suc_node.addr in flow: jmp_targets.append(suc_node.addr) else: # patch unnecessary suc_node file_offset = suc_node.addr - base_addr fill_nop(origin_data, file_offset, suc_node.size) patch_nodes.add(suc_node.addr) # patch jmp instruction if len(suc_nodes) > 1 and len(jmp_targets) == 1: file_offset = node.addr + node.size - 6 - base_addr patch_byte(origin_data, file_offset, opcodes['nop']) patch_byte(origin_data, file_offset + 1, opcodes['jmp']) fill_jmp_offset(origin_data, file_offset + 2, jmp_targets[0] - (node.addr + node.size)) assert len( origin_data) == origin_data_len, "Error: size of data changed!!!" recovery_file = filename + '_recovered' with open(recovery_file, 'wb') as recovery: recovery.write(origin_data) print('Successful! The recovered file: %s' % recovery_file)
def main(): parser = argparse.ArgumentParser(description="debogus control flow script") parser.add_argument("-f", "--file", help="binary to analyze") parser.add_argument("--addr", help="address of target function in hex format") args = parser.parse_args() if args.file is None or args.addr is None: parser.print_help() sys.exit(0) filename = args.file start = int(args.addr, 16) project = angr.Project(filename, load_options={'auto_load_libs': False}) cfg = project.analyses.CFGFast(normalize=True, force_complete_scan=False) target_function = cfg.functions.get(start) supergraph = am_graph.to_supergraph(target_function.transition_graph) base_addr = project.loader.main_object.mapped_base >> 12 << 12 state = project.factory.blank_state( addr=target_function.addr, remove_options={angr.sim_options.LAZY_SOLVES}) flow = set() flow.add(target_function.addr) print('*******************symbolic execution*********************') sm = project.factory.simulation_manager(state) sm.step() while len(sm.active) > 0: for active in sm.active: flow.add(active.addr) sm.step() print('executed blocks: ', list(map(hex, flow))) print('************************patch******************************') with open(filename, 'rb') as origin: origin_data = bytearray(origin.read()) origin_data_len = len(origin_data) patch_nodes = set() for node in supergraph.nodes(): if node.addr in patch_nodes: continue if node.addr not in flow: # patch unnecessary node file_offset = node.addr - base_addr fill_nop(origin_data, file_offset, node.size, project.arch) else: suc_nodes = list(supergraph.successors(node)) jmp_targets = [] for suc_node in suc_nodes: if suc_node.addr in flow: jmp_targets.append(suc_node.addr) else: # patch unnecessary suc_node file_offset = suc_node.addr - base_addr fill_nop(origin_data, file_offset, suc_node.size, project.arch) patch_nodes.add(suc_node.addr) # patch jmp instruction if len(suc_nodes) > 1 and len(jmp_targets) == 1: if project.arch.name in ARCH_X86: file_offset = node.addr + node.size - 6 - base_addr # nop + jmp patch_value = OPCODES['x86']['nop'] + ins_j_jmp_hex_x86( node.addr + node.size - 5, jmp_targets[0], 'jmp') patch_instruction(origin_data, file_offset, patch_value) elif project.arch.name in ARCH_ARM: file_offset = node.addr + node.size - 4 - base_addr patch_value = ins_b_jmp_hex_arm(node.addr + node.size - 4, jmp_targets[0], 'b') if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) elif project.arch.name in ARCH_ARM64: file_offset = node.addr + node.size - 4 - base_addr patch_value = ins_b_jmp_hex_arm64( node.addr + node.size - 4, jmp_targets[0], 'b') if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) assert len( origin_data) == origin_data_len, "Error: size of data changed!!!" recovery_file = filename + '_recovered' with open(recovery_file, 'wb') as recovery: recovery.write(origin_data) print('Successful! The recovered file: %s' % recovery_file)