Example #1
0
def extract_insn_offset_features(f, bb, insn):
    """parse instruction structure offset features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        .text:0040112F cmp [esi+4], ebx
    """
    for op in capa.features.extractors.ida.helpers.get_insn_ops(
            insn, target_ops=(idaapi.o_phrase, idaapi.o_displ)):
        if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
            continue
        p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
        op_off = p_info.get("offset", 0)
        if idaapi.is_mapped(op_off):
            # Ignore:
            #   mov esi, dword_1005B148[esi]
            continue

        # I believe that IDA encodes all offsets as two's complement in a u32.
        # a 64-bit displacement isn't a thing, see:
        # https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits
        op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)

        yield Offset(op_off), insn.ea
        yield Offset(op_off, arch=get_arch(f.ctx)), insn.ea
Example #2
0
def test_rule_yaml_descriptions():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - and:
                    - number: 1 = This is the number 1
                    - string: This program cannot be run in DOS mode.
                      description: MS-DOS stub message
                    - string: '/SELECT.*FROM.*WHERE/i'
                      description: SQL WHERE Clause
                    - count(number(2 = AF_INET/SOCK_DGRAM)): 2
                    - or:
                        - and:
                            - offset: 0x50 = IMAGE_NT_HEADERS.OptionalHeader.SizeOfImage
                            - offset: 0x34 = IMAGE_NT_HEADERS.OptionalHeader.ImageBase
                          description: 32-bits
                        - and:
                            - offset: 0x50 = IMAGE_NT_HEADERS64.OptionalHeader.SizeOfImage
                            - offset: 0x30 = IMAGE_NT_HEADERS64.OptionalHeader.ImageBase
                          description: 64-bits
                      description: PE headers offsets
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert (r.evaluate({
        Number(1): {1},
        Number(2): {2, 3},
        String("This program cannot be run in DOS mode."): {4},
        String("SELECT password FROM hidden_table WHERE user == admin"): {5},
        Offset(0x50): {6},
        Offset(0x30): {7},
    }) == True)
Example #3
0
def extract_insn_offset_features(f, bb, insn):
    """parse structure offset features from the given instruction."""
    # examples:
    #
    #     mov eax, [esi + 4]
    #     mov eax, [esi + ecx + 16384]
    operands = [o.strip() for o in insn.operands.split(",")]
    for operand in operands:
        if not "ptr" in operand:
            continue
        if "esp" in operand or "ebp" in operand or "rbp" in operand:
            continue
        number = 0
        number_hex = re.search(PATTERN_HEXNUM, operand)
        number_int = re.search(PATTERN_SINGLENUM, operand)
        if number_hex:
            number = int(number_hex.group("num"), 16)
            number = -1 * number if number_hex.group().startswith(
                "-") else number
        elif number_int:
            number = int(number_int.group("num"))
            number = -1 * number if number_int.group().startswith(
                "-") else number
        yield Offset(number), insn.offset
        yield Offset(number, arch=get_arch(f.smda_report)), insn.offset
Example #4
0
def extract_insn_offset_features(f, bb, insn):
    """parse structure offset features from the given instruction."""
    # example:
    #
    #     .text:0040112F    cmp     [esi+4], ebx
    for oper in insn.opers:
        # this is for both x32 and x64
        if not isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
            continue

        if oper.reg == envi.archs.i386.disasm.REG_ESP:
            continue

        if oper.reg == envi.archs.i386.disasm.REG_EBP:
            continue

        # TODO: do x64 support for real.
        if oper.reg == envi.archs.amd64.disasm.REG_RBP:
            continue

        # viv already decodes offsets as signed
        v = oper.disp

        yield Offset(v), insn.va
        yield Offset(v, arch=get_arch(f.vw)), insn.va
Example #5
0
def extract_insn_offset_features(f, bb, insn):
    """parse instruction structure offset features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        .text:0040112F cmp [esi+4], ebx
    """
    syntax = f.unit.syntax
    for operand in insn.oprs:
        operand = operand.lower()
        if any(reg in operand for reg in syntax.registers_cat['ptr'].keys()):
            continue
        number = 0
        number_hex = re.search(PATTERN_HEXNUM, operand)
        number_hex_2 = re.search(PATTERN_HEXNUM_2, operand)
        number_int = re.search(PATTERN_SINGLENUM, operand)
        if number_hex:
            number = int(number_hex.group("num"), 16)
            number = -1 * number if number_hex.group().startswith(
                "-") else number
        elif number_hex_2:
            number = int(number_hex_2.group("num"), 16)
            number = -1 * number if number_hex_2.group().startswith(
                "-") else number
        elif number_int:
            number = int(number_int.group("num"))
            number = -1 * number if number_int.group().startswith(
                "-") else number
        yield Offset(number), insn.ea
        yield Offset(number, arch=get_arch(f)), insn.ea
Example #6
0
def test_offset_arch():
    r = capa.rules.Rule.from_yaml(
        textwrap.dedent("""
            rule:
                meta:
                    name: test rule
                features:
                    - offset/x32: 2
            """))
    assert r.evaluate({Offset(2, arch=ARCH_X32): {1}}) == True

    assert r.evaluate({Offset(2): {1}}) == False
    assert r.evaluate({Offset(2, arch=ARCH_X64): {1}}) == False
Example #7
0
File: insn.py Project: clayne/capa
def extract_insn_number_features(f, bb, insn):
    """parse number features from the given instruction."""
    # example:
    #
    #     push    3136B0h         ; dwControlCode
    operands = [o.strip() for o in insn.operands.split(",")]
    if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
        # skip things like:
        #
        #    .text:00401140                 call    sub_407E2B
        #    .text:00401145                 add     esp, 0Ch
        return
    for i, operand in enumerate(operands):
        try:
            # The result of bitwise operations is calculated as though carried out
            # in two’s complement with an infinite number of sign bits
            value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1)
        except ValueError:
            continue
        else:
            yield Number(value), insn.offset
            yield OperandNumber(i, value), insn.offset

            if insn.mnemonic == "add" and 0 < value < MAX_STRUCTURE_SIZE:
                # for pattern like:
                #
                #     add eax, 0x10
                #
                # assume 0x10 is also an offset (imagine eax is a pointer).
                yield Offset(value), insn.offset
                yield OperandOffset(i, value), insn.offset
Example #8
0
File: insn.py Project: clayne/capa
def extract_op_offset_features(f, bb, insn, i, oper):
    """parse structure offset features from the given operand."""
    # example:
    #
    #     .text:0040112F    cmp     [esi+4], ebx

    # this is for both x32 and x64
    # like [esi + 4]
    #       reg   ^
    #             disp
    if isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
        if oper.reg == envi.archs.i386.regs.REG_ESP:
            return

        if oper.reg == envi.archs.i386.regs.REG_EBP:
            return

        # TODO: do x64 support for real.
        if oper.reg == envi.archs.amd64.regs.REG_RBP:
            return

        # viv already decodes offsets as signed
        v = oper.disp

        yield Offset(v), insn.va
        yield OperandOffset(i, v), insn.va

        if insn.mnem == "lea" and i == 1 and not f.vw.probeMemory(
                v, 1, envi.memory.MM_READ):
            # for pattern like:
            #
            #     lea eax, [ebx + 1]
            #
            # assume 1 is also an offset (imagine ebx is a zero register).
            yield Number(v), insn.va
            yield OperandNumber(i, v), insn.va

    # like: [esi + ecx + 16384]
    #        reg   ^     ^
    #              index ^
    #                    disp
    elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
        # viv already decodes offsets as signed
        v = oper.disp

        yield Offset(v), insn.va
        yield OperandOffset(i, v), insn.va
Example #9
0
def test_count_offset_symbol():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - or:
                    - count(offset(2 = symbol name)): 1
                    - count(offset(0x100 = symbol name)): 2 or more
                    - count(offset(0x11 = (FLAG_A | FLAG_B))): 2 or more
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert r.evaluate({Offset(2): {}}) == False
    assert r.evaluate({Offset(2): {1}}) == True
    assert r.evaluate({Offset(2): {1, 2}}) == False
    assert r.evaluate({Offset(0x100, "symbol name"): {1}}) == False
    assert r.evaluate({Offset(0x100, "symbol name"): {1, 2, 3}}) == True
Example #10
0
def test_offset_arch_symbol():
    r = capa.rules.Rule.from_yaml(
        textwrap.dedent("""
            rule:
                meta:
                    name: test rule
                features:
                    - offset/x32: 2 = some constant
            """))
    assert r.evaluate(
        {Offset(2, arch=ARCH_X32, description="some constant"): {1}}) == True
Example #11
0
def test_offset_symbol():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - and:
                    - offset: 1
                    - offset: 2 = symbol name
                    - offset: 3  =  symbol name
                    - offset: 4  =  symbol name = another name
                    - offset: 0x100 = symbol name
        """)
    r = capa.rules.Rule.from_yaml(rule)
    children = list(r.statement.get_children())
    assert (Offset(1) in children) == True
    assert (Offset(2, "symbol name") in children) == True
    assert (Offset(3, "symbol name") in children) == True
    assert (Offset(4, "symbol name = another name") in children) == True
    assert (Offset(0x100, "symbol name") in children) == True
Example #12
0
File: insn.py Project: H1d3r/capa
def extract_insn_offset_features(f, bb, insn):
    """parse structure offset features from the given instruction."""
    # example:
    #
    #     .text:0040112F    cmp     [esi+4], ebx
    for oper in insn.opers:

        # this is for both x32 and x64
        # like [esi + 4]
        #       reg   ^
        #             disp
        if isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
            if oper.reg == envi.archs.i386.regs.REG_ESP:
                continue

            if oper.reg == envi.archs.i386.regs.REG_EBP:
                continue

            # TODO: do x64 support for real.
            if oper.reg == envi.archs.amd64.regs.REG_RBP:
                continue

            # viv already decodes offsets as signed
            v = oper.disp

            yield Offset(v), insn.va
            yield Offset(v, bitness=get_bitness(f.vw)), insn.va

        # like: [esi + ecx + 16384]
        #        reg   ^     ^
        #              index ^
        #                    disp
        elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
            # viv already decodes offsets as signed
            v = oper.disp

            yield Offset(v), insn.va
            yield Offset(v, bitness=get_bitness(f.vw)), insn.va
Example #13
0
File: insn.py Project: clayne/capa
def extract_insn_offset_features(f, bb, insn):
    """parse instruction structure offset features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        .text:0040112F cmp [esi+4], ebx
    """
    for i, op in enumerate(insn.ops):
        if op.type == idaapi.o_void:
            break
        if op.type not in (idaapi.o_phrase, idaapi.o_displ):
            continue
        if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
            continue

        p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
        op_off = p_info.get("offset", 0)
        if idaapi.is_mapped(op_off):
            # Ignore:
            #   mov esi, dword_1005B148[esi]
            continue

        # I believe that IDA encodes all offsets as two's complement in a u32.
        # a 64-bit displacement isn't a thing, see:
        # https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits
        op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)

        yield Offset(op_off), insn.ea
        yield OperandOffset(i, op_off), insn.ea

        if (insn.itype == idaapi.NN_lea and i == 1
                # o_displ is used for both:
                #   [eax+1]
                #   [eax+ebx+2]
                and op.type == idaapi.o_displ
                # but the SIB is only present for [eax+ebx+2]
                # which we don't want
                and not capa.features.extractors.ida.helpers.has_sib(op)):
            # for pattern like:
            #
            #     lea eax, [ebx + 1]
            #
            # assume 1 is also an offset (imagine ebx is a zero register).
            yield Number(op_off), insn.ea
            yield OperandNumber(i, op_off), insn.ea
Example #14
0
File: insn.py Project: clayne/capa
def extract_insn_number_features(f, bb, insn):
    """parse instruction number features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        push    3136B0h         ; dwControlCode
    """
    if idaapi.is_ret_insn(insn):
        # skip things like:
        #   .text:0042250E retn 8
        return

    if capa.features.extractors.ida.helpers.is_sp_modified(insn):
        # skip things like:
        #   .text:00401145 add esp, 0Ch
        return

    for i, op in enumerate(insn.ops):
        if op.type == idaapi.o_void:
            break
        if op.type not in (idaapi.o_imm, idaapi.o_mem):
            continue
        # skip things like:
        #   .text:00401100 shr eax, offset loc_C
        if capa.features.extractors.ida.helpers.is_op_offset(insn, op):
            continue

        if op.type == idaapi.o_imm:
            const = capa.features.extractors.ida.helpers.mask_op_val(op)
        else:
            const = op.addr

        yield Number(const), insn.ea
        yield OperandNumber(i, const), insn.ea

        if insn.itype == idaapi.NN_add and 0 < const < MAX_STRUCTURE_SIZE and op.type == idaapi.o_imm:
            # for pattern like:
            #
            #     add eax, 0x10
            #
            # assume 0x10 is also an offset (imagine eax is a pointer).
            yield Offset(const), insn.ea
            yield OperandOffset(i, const), insn.ea
Example #15
0
File: insn.py Project: clayne/capa
def extract_op_number_features(f, bb, insn, i, oper):
    """parse number features from the given operand."""
    # example:
    #
    #     push    3136B0h         ; dwControlCode

    # this is for both x32 and x64
    if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper,
                             envi.archs.i386.disasm.i386ImmMemOper)):
        return

    if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
        v = oper.getOperValue(oper)
    else:
        v = oper.getOperAddr(oper)

    if f.vw.probeMemory(v, 1, envi.memory.MM_READ):
        # this is a valid address
        # assume its not also a constant.
        return

    if insn.mnem == "add" and insn.opers[0].isReg(
    ) and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP:
        # skip things like:
        #
        #    .text:00401140                 call    sub_407E2B
        #    .text:00401145                 add     esp, 0Ch
        return

    yield Number(v), insn.va
    yield OperandNumber(i, v), insn.va

    if insn.mnem == "add" and 0 < v < MAX_STRUCTURE_SIZE and isinstance(
            oper, envi.archs.i386.disasm.i386ImmOper):
        # for pattern like:
        #
        #     add eax, 0x10
        #
        # assume 0x10 is also an offset (imagine eax is a pointer).
        yield Offset(v), insn.va
        yield OperandOffset(i, v), insn.va
Example #16
0
File: insn.py Project: clayne/capa
def extract_insn_offset_features(f, bb, insn):
    """parse structure offset features from the given instruction."""
    # examples:
    #
    #     mov eax, [esi + 4]
    #     mov eax, [esi + ecx + 16384]
    operands = [o.strip() for o in insn.operands.split(",")]
    for i, operand in enumerate(operands):
        if "esp" in operand or "ebp" in operand or "rbp" in operand:
            continue

        number = 0
        number_hex = re.search(PATTERN_HEXNUM, operand)
        number_int = re.search(PATTERN_SINGLENUM, operand)
        if number_hex:
            number = int(number_hex.group("num"), 16)
            number = -1 * number if number_hex.group().startswith(
                "-") else number
        elif number_int:
            number = int(number_int.group("num"))
            number = -1 * number if number_int.group().startswith(
                "-") else number

        if "ptr" not in operand:
            if (insn.mnemonic == "lea" and i == 1
                    and (operand.count("+") + operand.count("-")) == 1
                    and operand.count("*") == 0):
                # for pattern like:
                #
                #     lea eax, [ebx + 1]
                #
                # assume 1 is also an offset (imagine ebx is a zero register).
                yield Number(number), insn.offset
                yield OperandNumber(i, number), insn.offset

            continue

        yield Offset(number), insn.offset
        yield OperandOffset(i, number), insn.offset
Example #17
0
def extract_insn_offset_features(f, bb, insn):
    """ parse instruction structure offset features

        args:
            f (IDA func_t)
            bb (IDA BasicBlock)
            insn (IDA insn_t)

        example:
            .text:0040112F cmp [esi+4], ebx
    """
    for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_phrase, idaapi.o_displ)):
        if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
            continue
        p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
        op_off = p_info.get("offset", 0)
        if 0 == op_off:
            continue
        if idaapi.is_mapped(op_off):
            # Ignore:
            #   mov esi, dword_1005B148[esi]
            continue
        yield Offset(op_off), insn.ea