Ejemplo n.º 1
0
def find_data_reference_from_insn(insn, max_depth=10):
    """ search for data reference from instruction, return address of instruction if no reference exists """
    depth = 0
    ea = insn.ea

    while True:
        data_refs = list(idautils.DataRefsFrom(ea))

        if len(data_refs) != 1:
            # break if no refs or more than one ref (assume nested pointers only have one data reference)
            break

        if ea == data_refs[0]:
            # break if circular reference
            break

        if not idaapi.is_mapped(data_refs[0]):
            # break if address is not mapped
            break

        depth += 1
        if depth > max_depth:
            # break if max depth
            break

        ea = data_refs[0]

    return ea
Ejemplo n.º 2
0
def extract_insn_offset_features(f, bb, insn):
    """parse instruction structure offset features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        .text:0040112F cmp [esi+4], ebx
    """
    for op in capa.features.extractors.ida.helpers.get_insn_ops(
            insn, target_ops=(idaapi.o_phrase, idaapi.o_displ)):
        if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
            continue
        p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
        op_off = p_info.get("offset", 0)
        if idaapi.is_mapped(op_off):
            # Ignore:
            #   mov esi, dword_1005B148[esi]
            continue

        # I believe that IDA encodes all offsets as two's complement in a u32.
        # a 64-bit displacement isn't a thing, see:
        # https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits
        op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)

        yield Offset(op_off), insn.ea
        yield Offset(op_off, arch=get_arch(f.ctx)), insn.ea
Ejemplo n.º 3
0
def extract_insn_number_features(f, bb, insn):
    """ parse instruction number features

        args:
            f (IDA func_t)
            bb (IDA BasicBlock)
            insn (IDA insn_t)

        example:
            push    3136B0h         ; dwControlCode
    """
    if idaapi.is_ret_insn(insn):
        # skip things like:
        #   .text:0042250E retn 8
        return

    if capa.features.extractors.ida.helpers.is_sp_modified(insn):
        # skip things like:
        #   .text:00401145 add esp, 0Ch
        return

    for op in capa.features.extractors.ida.helpers.get_insn_ops(
            insn, target_ops=(idaapi.o_imm, )):
        const = capa.features.extractors.ida.helpers.mask_op_val(op)
        if not idaapi.is_mapped(const):
            yield Number(const), insn.ea
            yield Number(const, arch=get_arch(f.ctx)), insn.ea
Ejemplo n.º 4
0
 def _getbytes(self, start, l=1):
     o = ""
     for ad in xrange(l):
         offset = ad + start + self.base_address
         if not is_mapped(offset):
             raise IOError("not enough bytes")
         o += chr(Byte(offset))
     return o
Ejemplo n.º 5
0
 def _getbytes(self, start, l=1):
     out = []
     for ad in range(l):
         offset = ad + start + self.base_address
         if not is_mapped(offset):
             raise IOError("not enough bytes")
         out.append(int_to_byte(get_wide_byte(offset)))
     return b''.join(out)
Ejemplo n.º 6
0
Archivo: insn.py Proyecto: clayne/capa
def extract_insn_offset_features(f, bb, insn):
    """parse instruction structure offset features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        .text:0040112F cmp [esi+4], ebx
    """
    for i, op in enumerate(insn.ops):
        if op.type == idaapi.o_void:
            break
        if op.type not in (idaapi.o_phrase, idaapi.o_displ):
            continue
        if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
            continue

        p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
        op_off = p_info.get("offset", 0)
        if idaapi.is_mapped(op_off):
            # Ignore:
            #   mov esi, dword_1005B148[esi]
            continue

        # I believe that IDA encodes all offsets as two's complement in a u32.
        # a 64-bit displacement isn't a thing, see:
        # https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits
        op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)

        yield Offset(op_off), insn.ea
        yield OperandOffset(i, op_off), insn.ea

        if (insn.itype == idaapi.NN_lea and i == 1
                # o_displ is used for both:
                #   [eax+1]
                #   [eax+ebx+2]
                and op.type == idaapi.o_displ
                # but the SIB is only present for [eax+ebx+2]
                # which we don't want
                and not capa.features.extractors.ida.helpers.has_sib(op)):
            # for pattern like:
            #
            #     lea eax, [ebx + 1]
            #
            # assume 1 is also an offset (imagine ebx is a zero register).
            yield Number(op_off), insn.ea
            yield OperandNumber(i, op_off), insn.ea
Ejemplo n.º 7
0
def extract_insn_offset_features(f, bb, insn):
    """ parse instruction structure offset features

        args:
            f (IDA func_t)
            bb (IDA BasicBlock)
            insn (IDA insn_t)

        example:
            .text:0040112F cmp [esi+4], ebx
    """
    for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_phrase, idaapi.o_displ)):
        if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
            continue
        p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
        op_off = p_info.get("offset", 0)
        if 0 == op_off:
            continue
        if idaapi.is_mapped(op_off):
            # Ignore:
            #   mov esi, dword_1005B148[esi]
            continue
        yield Offset(op_off), insn.ea