def find_data_reference_from_insn(insn, max_depth=10): """ search for data reference from instruction, return address of instruction if no reference exists """ depth = 0 ea = insn.ea while True: data_refs = list(idautils.DataRefsFrom(ea)) if len(data_refs) != 1: # break if no refs or more than one ref (assume nested pointers only have one data reference) break if ea == data_refs[0]: # break if circular reference break if not idaapi.is_mapped(data_refs[0]): # break if address is not mapped break depth += 1 if depth > max_depth: # break if max depth break ea = data_refs[0] return ea
def extract_insn_offset_features(f, bb, insn): """parse instruction structure offset features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: .text:0040112F cmp [esi+4], ebx """ for op in capa.features.extractors.ida.helpers.get_insn_ops( insn, target_ops=(idaapi.o_phrase, idaapi.o_displ)): if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n): continue p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op) op_off = p_info.get("offset", 0) if idaapi.is_mapped(op_off): # Ignore: # mov esi, dword_1005B148[esi] continue # I believe that IDA encodes all offsets as two's complement in a u32. # a 64-bit displacement isn't a thing, see: # https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits op_off = capa.features.extractors.helpers.twos_complement(op_off, 32) yield Offset(op_off), insn.ea yield Offset(op_off, arch=get_arch(f.ctx)), insn.ea
def extract_insn_number_features(f, bb, insn): """ parse instruction number features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push 3136B0h ; dwControlCode """ if idaapi.is_ret_insn(insn): # skip things like: # .text:0042250E retn 8 return if capa.features.extractors.ida.helpers.is_sp_modified(insn): # skip things like: # .text:00401145 add esp, 0Ch return for op in capa.features.extractors.ida.helpers.get_insn_ops( insn, target_ops=(idaapi.o_imm, )): const = capa.features.extractors.ida.helpers.mask_op_val(op) if not idaapi.is_mapped(const): yield Number(const), insn.ea yield Number(const, arch=get_arch(f.ctx)), insn.ea
def _getbytes(self, start, l=1): o = "" for ad in xrange(l): offset = ad + start + self.base_address if not is_mapped(offset): raise IOError("not enough bytes") o += chr(Byte(offset)) return o
def _getbytes(self, start, l=1): out = [] for ad in range(l): offset = ad + start + self.base_address if not is_mapped(offset): raise IOError("not enough bytes") out.append(int_to_byte(get_wide_byte(offset))) return b''.join(out)
def extract_insn_offset_features(f, bb, insn): """parse instruction structure offset features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: .text:0040112F cmp [esi+4], ebx """ for i, op in enumerate(insn.ops): if op.type == idaapi.o_void: break if op.type not in (idaapi.o_phrase, idaapi.o_displ): continue if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n): continue p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op) op_off = p_info.get("offset", 0) if idaapi.is_mapped(op_off): # Ignore: # mov esi, dword_1005B148[esi] continue # I believe that IDA encodes all offsets as two's complement in a u32. # a 64-bit displacement isn't a thing, see: # https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits op_off = capa.features.extractors.helpers.twos_complement(op_off, 32) yield Offset(op_off), insn.ea yield OperandOffset(i, op_off), insn.ea if (insn.itype == idaapi.NN_lea and i == 1 # o_displ is used for both: # [eax+1] # [eax+ebx+2] and op.type == idaapi.o_displ # but the SIB is only present for [eax+ebx+2] # which we don't want and not capa.features.extractors.ida.helpers.has_sib(op)): # for pattern like: # # lea eax, [ebx + 1] # # assume 1 is also an offset (imagine ebx is a zero register). yield Number(op_off), insn.ea yield OperandNumber(i, op_off), insn.ea
def extract_insn_offset_features(f, bb, insn): """ parse instruction structure offset features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: .text:0040112F cmp [esi+4], ebx """ for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_phrase, idaapi.o_displ)): if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n): continue p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op) op_off = p_info.get("offset", 0) if 0 == op_off: continue if idaapi.is_mapped(op_off): # Ignore: # mov esi, dword_1005B148[esi] continue yield Offset(op_off), insn.ea