Exemple #1
0
def extract_insn_api_features(f, bb, insn):
    """parse API features from the given instruction."""

    # example:
    #
    #    call dword [0x00473038]

    if insn.mnem not in ("call", "jmp"):
        return

    if insn.mnem == "jmp":
        if f.vw.getFunctionMeta(f.va, "Thunk"):
            return

    # traditional call via IAT
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
        oper = insn.opers[0]
        target = oper.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    # call via thunk on x86,
    # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
    #
    # this is also how calls to internal functions may be decoded on x64.
    # see Lab21-01.exe_:0x140001178
    #
    # follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in
    # 0x140059342 (viv) / 0x14005E0C0 (IDA)
    # 14005E0FF call    j_ElfClearEventLogFileW (14005AAF8)
    #   14005AAF8 jmp     ElfClearEventLogFileW (14005E196)
    #     14005E196 jmp     cs:__imp_ElfClearEventLogFileW

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
        imports = get_imports(f.vw)
        target = capa.features.extractors.viv.helpers.get_coderef_from(
            f.vw, insn.va)
        if not target:
            return

        for _ in range(THUNK_CHAIN_DEPTH_DELTA):
            if target in imports:
                dll, symbol = imports[target]
                for name in capa.features.extractors.helpers.generate_symbols(
                        dll, symbol):
                    yield API(name), insn.va

            target = capa.features.extractors.viv.helpers.get_coderef_from(
                f.vw, target)
            if not target:
                return

    # call via import on x64
    # see Lab21-01.exe_:0x14000118C
    elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
        op = insn.opers[0]
        target = op.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        try:
            (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
        except NotFoundError:
            # not able to resolve the indirect call, sorry
            return

        if target is None:
            # not able to resolve the indirect call, sorry
            return

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va
Exemple #2
0
def extract_insn_api_features(f, bb, insn):
    """parse API features from the given instruction."""

    # example:
    #
    #    call dword [0x00473038]

    if insn.mnem != "call":
        return

    # traditional call via IAT
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
        oper = insn.opers[0]
        target = oper.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    # call via thunk on x86,
    # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
    #
    # this is also how calls to internal functions may be decoded on x64.
    # see Lab21-01.exe_:0x140001178
    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
        target = insn.opers[0].getOperValue(insn)

        try:
            thunk = f.vw.getFunctionMeta(target, "Thunk")
        except vivisect.exc.InvalidFunction:
            return
        else:
            if thunk:
                dll, _, symbol = thunk.rpartition(".")
                if symbol.startswith("ord"):
                    symbol = "#" + symbol[len("ord"):]
                for name in capa.features.extractors.helpers.generate_symbols(
                        dll, symbol):
                    yield API(name), insn.va

    # call via import on x64
    # see Lab21-01.exe_:0x14000118C
    elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
        op = insn.opers[0]
        target = op.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        try:
            (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
        except NotFoundError:
            # not able to resolve the indirect call, sorry
            return

        if target is None:
            # not able to resolve the indirect call, sorry
            return

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va
Exemple #3
0
def extract_insn_api_features(f, bb, insn):
    """parse API features from the given instruction."""

    # example:
    #
    #    call dword [0x00473038]
    if insn.mnem not in ("call", "jmp"):
        return

    if insn.mnem == "jmp":
        if f.vw.getFunctionMeta(f.va, "Thunk"):
            return

    # traditional call via IAT
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
        oper = insn.opers[0]
        target = oper.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    # call via thunk on x86,
    # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
    #
    # this is also how calls to internal functions may be decoded on x32 and x64.
    # see Lab21-01.exe_:0x140001178
    #
    # follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in
    # 0x140059342 (viv) / 0x14005E0C0 (IDA)
    # 14005E0FF call    j_ElfClearEventLogFileW (14005AAF8)
    #   14005AAF8 jmp     ElfClearEventLogFileW (14005E196)
    #     14005E196 jmp     cs:__imp_ElfClearEventLogFileW

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
        imports = get_imports(f.vw)
        target = capa.features.extractors.viv.helpers.get_coderef_from(
            f.vw, insn.va)
        if not target:
            return

        if viv_utils.flirt.is_library_function(f.vw, target):
            name = viv_utils.get_function_name(f.vw, target)
            yield API(name), insn.va
            if name.startswith("_"):
                # some linkers may prefix linked routines with a `_` to avoid name collisions.
                # extract features for both the mangled and un-mangled representations.
                # e.g. `_fwrite` -> `fwrite`
                # see: https://stackoverflow.com/a/2628384/87207
                yield API(name[1:]), insn.va
            return

        for _ in range(THUNK_CHAIN_DEPTH_DELTA):
            if target in imports:
                dll, symbol = imports[target]
                for name in capa.features.extractors.helpers.generate_symbols(
                        dll, symbol):
                    yield API(name), insn.va

            # if jump leads to an ENDBRANCH instruction, skip it
            if f.vw.getByteDef(target)[1].startswith(b"\xf3\x0f\x1e"):
                target += 4

            target = capa.features.extractors.viv.helpers.get_coderef_from(
                f.vw, target)
            if not target:
                return

    # call via import on x64
    # see Lab21-01.exe_:0x14000118C
    elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
        op = insn.opers[0]
        target = op.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        try:
            (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
        except NotFoundError:
            # not able to resolve the indirect call, sorry
            return

        if target is None:
            # not able to resolve the indirect call, sorry
            return

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va