Example #1
0
def extract_insn_bytes_features(f, bb, insn):
    """parse referenced byte sequences

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        push    offset iid_004118d4_IShellLinkA ; riid
    """
    for ref in insn.dr:
        ref = str(ref)
        found = None
        # check string first (ghidra/ida put it there)
        if ref in f.unit.obj.bin.strings:
            found = f.unit.obj.bin.strings[ref]
            yield Bytes(found.encode("utf-16le")), insn.ea
        if found:
            return
        # then check referenced data
        if ref in f.unit.obj.bin.data:
            found = f.unit.obj.bin.data[ref]
            # found = struct.pack('<Q', int(found, base=16))
            # found = bytes.fromhex(found)
            found = base64.b64decode(found)
            yield Bytes(found), insn.ea
Example #2
0
def extract_insn_bytes_features(f, bb, insn):
    """
    parse byte sequence features from the given instruction.
    example:
        #     push    offset iid_004118d4_IShellLinkA ; riid
    """
    for oper in insn.opers:
        if insn.mnem == "call":
            continue

        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
        elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
            # handle case like:
            #   movzx   ecx, ds:byte_423258[eax]
            v = oper.disp
        elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
            # like 0x401000 in `mov eax, 0x401000[2 * ebx]`
            v = oper.imm
        elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper):
            # see: Lab21-01.exe_:0x1400010D3
            v = oper.getOperAddr(insn)
        else:
            continue

        for v in derefs(f.vw, v):
            try:
                buf = read_bytes(f.vw, v)
            except envi.SegmentationViolation:
                continue

            if capa.features.extractors.helpers.all_zeros(buf):
                continue

            yield Bytes(buf), insn.va
Example #3
0
def extract_insn_bytes_features(f, bb, insn):
    """
    parse byte sequence features from the given instruction.
    example:
        #     push    offset iid_004118d4_IShellLinkA ; riid
    """
    for data_ref in insn.getDataRefs():
        for v in derefs(f.smda_report, data_ref):
            bytes_read = read_bytes(f.smda_report, v)
            if bytes_read is None:
                continue
            if capa.features.extractors.helpers.all_zeros(bytes_read):
                continue

            yield Bytes(bytes_read), insn.offset
Example #4
0
def extract_insn_bytes_features(f, bb, insn):
    """ parse referenced byte sequences

        args:
            f (IDA func_t)
            bb (IDA BasicBlock)
            insn (IDA insn_t)

        example:
            push    offset iid_004118d4_IShellLinkA ; riid
    """
    if idaapi.is_call_insn(insn):
        # ignore call instructions
        return

    for ref in idautils.DataRefsFrom(insn.ea):
        extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
        if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
            yield Bytes(extracted_bytes), insn.ea
Example #5
0
File: insn.py Project: wisdark/capa
def extract_insn_bytes_features(f, bb, insn):
    """parse referenced byte sequences

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        push    offset iid_004118d4_IShellLinkA ; riid
    """
    if idaapi.is_call_insn(insn):
        return

    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
    if ref != insn.ea:
        extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
        if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
            yield Bytes(extracted_bytes), insn.ea
Example #6
0
def extract_insn_bytes_features(f, bb, insn):
    """
    parse byte sequence features from the given instruction.
    example:
        #     push    offset iid_004118d4_IShellLinkA ; riid
    """
    for oper in insn.opers:
        if insn.mnem == "call":
            # ignore call instructions
            continue

        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
        elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
            # handle case like:
            #   movzx   ecx, ds:byte_423258[eax]
            v = oper.disp
        elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper):
            # see: Lab21-01.exe_:0x1400010D3
            v = oper.getOperAddr(insn)
        else:
            continue

        segm = f.vw.getSegment(v)
        if not segm:
            continue

        segm_end = segm[0] + segm[1]
        try:
            # Do not read beyond the end of a segment
            if v + MAX_BYTES_FEATURE_SIZE > segm_end:
                extracted_bytes = f.vw.readMemory(v, segm_end - v)
            else:
                extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE)
        except envi.SegmentationViolation:
            pass
        else:
            if not capa.features.extractors.helpers.all_zeros(extracted_bytes):
                yield Bytes(extracted_bytes), insn.va