def _imm(self, imm, op_size, hexa, section=None, print_data=True, force_dont_print_data=False): if self.gctx.capstone_string != 0: hexa = True if hexa: imm = unsigned(imm) label_printed = self._label(imm, print_colon=False) if label_printed: ty = self._dis.mem.get_type(imm) # ty == -1 : from the terminal (with -x) there are no xrefs if # the file was loaded without a database. if imm in self._dis.xrefs and ty != MEM_UNK and \ ty != MEM_ASCII or ty == -1: return True if ty == MEM_ASCII: print_data = True force_dont_print_data = False if section is None: section = self._binary.get_section(imm) if section is not None and section.start == 0: section = None # For a raw file, if the raw base is 0 the immediate is considered # as an address only if it's in the symbols list. raw_base_zero = self._binary.type == T_BIN_RAW and self.gctx.raw_base == 0 if section is not None and not raw_base_zero: if not label_printed: self._address(imm, print_colon=False, notprefix=True) if not force_dont_print_data and print_data: s = self._binary.get_string(imm, self.gctx.max_data_size) if s is not None: self._add(" ") self._string('"' + s + '"') return True if label_printed: return True if op_size == 1: self._string("'%s'" % get_char(imm)) elif hexa: self._add(hex(imm)) else: self._add(str(imm)) if imm > 0: if op_size == 4: packed = struct.pack("<L", imm) elif op_size == 8: packed = struct.pack("<Q", imm) else: return True if set(packed).issubset(BYTES_PRINTABLE_SET): self._string(" \"" + "".join(map(chr, packed)) + "\"") return False # returns True because capstone print immediate in hexa and # it will be printed in a comment, sometimes it's better # to have the value in hexa return True return False
def analyze_operands(self, i, func_obj): b = self.dis.binary for op in i.operands: if op.type == self.CS_OP_IMM: val = unsigned(op.value.imm) elif op.type == self.CS_OP_MEM and op.mem.disp != 0: if self.is_x86: if op.mem.segment != 0: continue if op.mem.index == 0: # Compute the rip register if op.mem.base == self.X86_REG_EIP or \ op.mem.base == self.X86_REG_RIP: val = i.address + i.size + unsigned(op.mem.disp) # Check if it's a stack variable elif (op.mem.base == self.X86_REG_EBP or \ op.mem.base == self.X86_REG_RBP): if func_obj is not None: ty = self.dis.mem.find_type(op.size) func_obj[FUNC_VARS][op.mem.disp] = [ty, None] # Continue the loop !! continue else: val = unsigned(op.mem.disp) else: val = unsigned(op.mem.disp) # TODO: stack variables for arm/mips elif self.is_arm: if op.mem.index == 0 and op.mem.base == self.ARM_REG_PC: val = i.address + i.size * 2 + op.mem.disp else: val = op.mem.disp elif self.is_mips: if op.mem.base == self.MIPS_REG_GP: if self.dis.mips_gp == -1: continue val = op.mem.disp + self.dis.mips_gp else: val = op.mem.disp else: continue s = b.get_section(val) if s is None or s.start == 0: continue self.dis.add_xref(i.address, val) if not self.dis.mem.exists(val): sz = op.size if self.has_op_size else self.default_size deref = s.read_int(val, sz) # If (*val) is an address if deref is not None and b.is_address(deref): ty = MEM_OFFSET self.dis.add_xref(val, deref) if not self.dis.mem.exists(deref): self.dis.mem.add(deref, 1, MEM_UNK) # Do an anlysis on this value. if deref not in self.pending and \ deref not in self.pending_not_curr and \ self.first_inst_are_code(deref): self.pending_not_curr.add(deref) self.msg.put((deref, self.has_prolog(deref), False, True, None)) else: # Check if this is an address to a string sz = b.is_string(val) if sz != 0: ty = MEM_ASCII else: sz = op.size if self.has_op_size else self.default_size if op.type == self.CS_OP_MEM: ty = self.dis.mem.find_type(sz) else: ty = MEM_UNK self.dis.mem.add(val, sz, ty) if ty == MEM_UNK: # Do an analysis on this value, if this is not code # nothing will be done. # jumps and calls are already analyzed in analyze_flow. if val not in self.pending and \ not (self.is_jump(i) or self.is_call(i)) and \ val not in self.pending_not_curr and \ self.first_inst_are_code(val): self.pending_not_curr.add(val) self.msg.put( (val, self.has_prolog(val), False, True, None))
def __sub_analyze_flow(self, entry, inner_code, add_if_code): if self.dis.binary.get_section(entry) is None: return -1 stack = [entry] has_ret = False # If entry is not "code", we have to rollback added xrefs has_bad_inst = False if add_if_code: added_xrefs = [] while stack: ad = stack.pop() inst = self.disasm(ad) if inst is None: has_bad_inst = True if add_if_code: break continue if ad in inner_code: continue inner_code[ad] = inst if self.is_ret(inst): self.__add_prefetch(inner_code, inst) has_ret = True elif self.is_uncond_jump(inst): self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.CS_OP_IMM: nxt = unsigned(op.value.imm) self.dis.add_xref(ad, nxt) if nxt in self.functions: has_ret = not self.is_noreturn(nxt, entry) else: stack.append(nxt) if add_if_code: added_xrefs.append((ad, nxt)) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table self.dis.add_xref(ad, table) if add_if_code: added_xrefs.append((ad, table)) else: # TODO # This is a register or a memory access # we can't say if the function really returns has_ret = True elif self.is_cond_jump(inst): prefetch = self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.CS_OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(unsigned(op.value.imm)) self.dis.add_xref(ad, nxt_jmp) stack.append(direct_nxt) if add_if_code: added_xrefs.append((ad, nxt_jmp)) if nxt_jmp in self.functions: has_ret = not self.is_noreturn(nxt_jmp, entry) else: stack.append(nxt_jmp) elif self.is_call(inst): op = inst.operands[-1] if op.type == self.CS_OP_IMM: imm = unsigned(op.value.imm) self.dis.add_xref(ad, imm) if add_if_code: added_xrefs.append((ad, imm)) if imm not in self.functions: self.analyze_flow(imm, True, False, add_if_code) if imm in self.functions and self.is_noreturn(imm, entry): self.__add_prefetch(inner_code, inst) continue nxt = inst.address + inst.size stack.append(nxt) else: nxt = inst.address + inst.size stack.append(nxt) if add_if_code and has_bad_inst: for from_ad, to_ad in added_xrefs: self.dis.rm_xrefs(from_ad, to_ad) return -1 # for ELF if entry in self.dis.binary.imports: flags = self.import_flags(entry) elif has_ret: flags = 0 else: flags = FUNC_FLAG_NORETURN return flags
def __sub_analyze_flow(self, entry, inner_code, add_if_code): if self.dis.binary.get_section(entry) is None: return -1 stack = [entry] has_ret = False # If entry is not "code", we have to rollback added xrefs has_bad_inst = False if add_if_code: added_xrefs = [] while stack: ad = stack.pop() inst = self.disasm(ad) if inst is None: has_bad_inst = True if add_if_code: break continue if ad in inner_code: continue inner_code[ad] = inst if self.is_ret(inst): self.__add_prefetch(inner_code, inst) has_ret = True elif self.is_uncond_jump(inst): self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.CS_OP_IMM: nxt = unsigned(op.value.imm) self.api.add_xref(ad, nxt) if self.db.mem.is_func(nxt): has_ret = not self.is_noreturn(nxt, entry) else: stack.append(nxt) if add_if_code: added_xrefs.append((ad, nxt)) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table self.api.add_xref(ad, table) if add_if_code: added_xrefs.append((ad, table)) else: # TODO # This is a register or a memory access # we can't say if the function really returns has_ret = True elif self.is_cond_jump(inst): prefetch = self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.CS_OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(unsigned(op.value.imm)) self.api.add_xref(ad, nxt_jmp) stack.append(direct_nxt) if add_if_code: added_xrefs.append((ad, nxt_jmp)) if self.db.mem.is_func(nxt_jmp): has_ret = not self.is_noreturn(nxt_jmp, entry) else: stack.append(nxt_jmp) elif self.is_call(inst): op = inst.operands[-1] if op.type == self.CS_OP_IMM: imm = unsigned(op.value.imm) self.api.add_xref(ad, imm) if add_if_code: added_xrefs.append((ad, imm)) if not self.db.mem.is_func(imm): self.analyze_flow(imm, True, False, add_if_code) if self.db.mem.is_func(imm) and self.is_noreturn(imm, entry): self.__add_prefetch(inner_code, inst) continue nxt = inst.address + inst.size stack.append(nxt) else: nxt = inst.address + inst.size stack.append(nxt) if add_if_code and has_bad_inst: for from_ad, to_ad in added_xrefs: self.api.rm_xrefs(from_ad, to_ad) return -1 # for ELF if entry in self.dis.binary.imports: flags = self.import_flags(entry) elif has_ret: flags = 0 else: flags = FUNC_FLAG_NORETURN return flags
def analyze_operands(self, i, func_obj): b = self.dis.binary for op in i.operands: if op.type == self.CS_OP_IMM: val = unsigned(op.value.imm) elif op.type == self.CS_OP_MEM and op.mem.disp != 0: if self.is_x86: if op.mem.segment != 0: continue if op.mem.index == 0: # Compute the rip register if op.mem.base == self.X86_REG_EIP or \ op.mem.base == self.X86_REG_RIP: val = i.address + i.size + unsigned(op.mem.disp) # Check if it's a stack variable elif (op.mem.base == self.X86_REG_EBP or \ op.mem.base == self.X86_REG_RBP): if func_obj is not None: ty = self.db.mem.find_type(op.size) func_obj[FUNC_VARS][op.mem.disp] = [ty, None] # Continue the loop !! continue else: val = unsigned(op.mem.disp) else: val = unsigned(op.mem.disp) # TODO: stack variables for arm/mips elif self.is_arm: if op.mem.index == 0 and op.mem.base == self.ARM_REG_PC: val = i.address + i.size * 2 + op.mem.disp else: val = op.mem.disp elif self.is_mips: if op.mem.base == self.MIPS_REG_GP: if self.dis.mips_gp == -1: continue val = op.mem.disp + self.dis.mips_gp else: val = op.mem.disp else: continue s = b.get_section(val) if s is None or s.start == 0: continue self.api.add_xref(i.address, val) if not self.db.mem.exists(val): sz = op.size if self.has_op_size else self.default_size deref = s.read_int(val, sz) # If (*val) is an address if deref is not None and b.is_address(deref): ty = MEM_OFFSET self.api.add_xref(val, deref) if not self.db.mem.exists(deref): self.db.mem.add(deref, 1, MEM_UNK) # Do an anlysis on this value. if deref not in self.pending and \ deref not in self.pending_not_curr and \ self.first_inst_are_code(deref): self.pending_not_curr.add(deref) self.msg.put( (deref, self.has_prolog(deref), False, True, None)) else: # Check if this is an address to a string sz = b.is_string(val) if sz != 0: ty = MEM_ASCII else: sz = op.size if self.has_op_size else self.default_size if op.type == self.CS_OP_MEM: ty = self.db.mem.find_type(sz) else: ty = MEM_UNK self.db.mem.add(val, sz, ty) if ty == MEM_UNK: # Do an analysis on this value, if this is not code # nothing will be done. # jumps and calls are already analyzed in analyze_flow. if val not in self.pending and \ not (self.is_jump(i) or self.is_call(i)) and \ val not in self.pending_not_curr and \ self.first_inst_are_code(val): self.pending_not_curr.add(val) self.msg.put( (val, self.has_prolog(val), False, True, None))
def get_graph(self, entry): from capstone import CS_OP_IMM, CS_ARCH_MIPS self.CS_ARCH_MIPS = CS_ARCH_MIPS ARCH_UTILS = self.load_arch_module().utils gph = Graph(self, entry) stack = [entry] start = time() prefetch = None addresses = set() # WARNING: this assume that on every architectures the jump # address is the last operand (operands[-1]) # Here each instruction is a node. Blocks will be created in the # function __simplify. while stack: ad = stack.pop() inst = self.lazy_disasm(ad) if inst is None: # Remove all previous instructions which have a link # to this instruction. if ad in gph.link_in: for i in gph.link_in[ad]: gph.link_out[i].remove(ad) for i in gph.link_in[ad]: if not gph.link_out[i]: del gph.link_out[i] del gph.link_in[ad] continue if gph.exists(inst): continue addresses.add(ad) if ARCH_UTILS.is_ret(inst): prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_uncond_jump(inst): prefetch = self.__add_prefetch(addresses, inst) gph.uncond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: nxt = unsigned(op.value.imm) if nxt in self.functions: gph.new_node(inst, prefetch, None) else: stack.append(nxt) gph.new_node(inst, prefetch, [nxt]) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table gph.new_node(inst, prefetch, table) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_cond_jump(inst): prefetch = self.__add_prefetch(addresses, inst) gph.cond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(op.value.imm) stack.append(direct_nxt) if nxt_jmp in self.functions: gph.new_node(inst, prefetch, [direct_nxt]) else: stack.append(nxt_jmp) gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp]) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) else: if ad != entry and ARCH_UTILS.is_call(inst): op = inst.operands[0] if op.type == CS_OP_IMM: imm = unsigned(op.value.imm) if imm in self.functions and self.is_noreturn(imm): prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) continue nxt = inst.address + inst.size stack.append(nxt) gph.new_node(inst, None, [nxt]) if len(gph.nodes) == 0: return None, 0 if self.binary.type == T_BIN_PE: nb_new_syms = self.binary.pe_reverse_stripped_list(self, addresses) else: nb_new_syms = 0 elapsed = time() elapsed = elapsed - start debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes))) return gph, nb_new_syms
def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1): from capstone import CS_OP_IMM ARCH = self.load_arch_module() ARCH_OUTPUT = ARCH.output ARCH_UTILS = ARCH.utils ad = ctx.entry s = self.binary.get_section(ad) if s is None: # until is != -1 only from the visual mode # It allows to not go before the first section. if until != -1: return None # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: return None ad = s.start o = ARCH_OUTPUT.Output(ctx) o._new_line() o.section_prefix = True o.curr_section = s o.mode_dump = True l = 0 api = ctx.gctx.api while 1: if ad == s.start: if not o.is_last_2_line_empty(): o._new_line() o._dash() o._section(s.name) o._add(" 0x%x -> 0x%x" % (s.start, s.end)) o._new_line() o._new_line() while ((l < lines and until == -1) or (ad < until and until != -1)) \ and ad <= s.end: ty = self.mem.get_type(ad) # A PE import should not be displayed as a subroutine if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \ and self.mem.is_code(ad): is_func = ad in self.functions if is_func: if not o.is_last_2_line_empty(): o._new_line() o._dash() o._user_comment("; SUBROUTINE") o._new_line() o._dash() i = self.lazy_disasm(ad, s.start) if not is_func and ad in self.xrefs and \ not o.is_last_2_line_empty(): o._new_line() o._asm_inst(i) if ad in self.end_functions: for fad in self.end_functions[ad]: sy = api.get_symbol(fad) o._user_comment("; end function %s" % sy) o._new_line() o._new_line() elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i): o._new_line() elif ARCH_UTILS.is_call(i): op = i.operands[0] if op.type == CS_OP_IMM: imm = unsigned(op.value.imm) if imm in self.functions and self.is_noreturn(imm): o._new_line() ad += i.size elif ty == MEM_OFFSET: o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size(ad) off = s.read_int(ad, sz) if off is None: continue if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._data_prefix(sz) o._add(" ") o._imm(off, sz, True, print_data=False, force_dont_print_data=True) o._new_line() ad += sz elif ty == MEM_ASCII: o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size(ad) buf = self.binary.get_string(ad, sz) if buf is not None: if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._string('"' + buf + '"') o._add(", 0") o._new_line() ad += sz else: o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size_from_type(ty) if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._word(s.read_int(ad, sz), sz) o._new_line() ad += sz l += 1 s = self.binary.get_section(ad) if s is None: # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: break o._new_line() ad = s.start if until != -1 and ad >= until: break if (l >= lines and until == -1) or (ad >= until and until != -1): break o.curr_section = s if until == ad: if self.mem.is_code(ad) and ad in self.xrefs or ad == s.start: if not o.is_last_2_line_empty(): o._new_line() # remove the last empty line o.lines.pop(-1) o.token_lines.pop(-1) o.join_lines() return o