Ejemplo n.º 1
0
def generate_api_features(apiname, va):
    """
    for a given function name and address, generate API names.
    we over-generate features to make matching easier.
    these include:
      - kernel32.CreateFileA
      - kernel32.CreateFile
      - CreateFileA
      - CreateFile
    """
    # (kernel32.CreateFileA, 0x401000)
    yield API(apiname), va

    if is_aw_function(apiname):
        # (kernel32.CreateFile, 0x401000)
        yield API(apiname[:-1]), va

    if "." in apiname:
        modname, impname = apiname.split(".")
        # strip modname to support importname-only matching
        # (CreateFileA, 0x401000)
        yield API(impname), va

        if is_aw_function(impname):
            # (CreateFile, 0x401000)
            yield API(impname[:-1]), va
Ejemplo n.º 2
0
Archivo: insn.py Proyecto: skysbsb/capa
def extract_insn_api_features(f, bb, insn):
    """parse API features from the given instruction."""
    if insn.offset in f.apirefs:
        api_entry = f.apirefs[insn.offset]
        # reformat
        dll_name, api_name = api_entry.split("!")
        dll_name = dll_name.split(".")[0]
        dll_name = dll_name.lower()
        for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
            yield API(name), insn.offset
    elif insn.offset in f.outrefs:
        current_function = f
        current_instruction = insn
        for index in range(THUNK_CHAIN_DEPTH_DELTA):
            if current_function and len(current_function.outrefs[current_instruction.offset]) == 1:
                target = current_function.outrefs[current_instruction.offset][0]
                referenced_function = current_function.smda_report.getFunction(target)
                if referenced_function:
                    # TODO SMDA: implement this function for both jmp and call, checking if function has 1 instruction which refs an API
                    if referenced_function.isApiThunk():
                        api_entry = (
                            referenced_function.apirefs[target] if target in referenced_function.apirefs else None
                        )
                        if api_entry:
                            # reformat
                            dll_name, api_name = api_entry.split("!")
                            dll_name = dll_name.split(".")[0]
                            dll_name = dll_name.lower()
                            for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
                                yield API(name), insn.offset
                    elif referenced_function.num_instructions == 1 and referenced_function.num_outrefs == 1:
                        current_function = referenced_function
                        current_instruction = [i for i in referenced_function.getInstructions()][0]
                else:
                    return
Ejemplo n.º 3
0
def extract_insn_api_features(f, bb, insn):
    """parse instruction API features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        call dword [0x00473038]
    """
    if not insn.get_canon_mnem() in ("call", "jmp"):
        return

    for api in check_for_api_call(f.ctx, insn):
        dll, _, symbol = api.rpartition(".")
        for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
            yield API(name), insn.ea

    # extract IDA/FLIRT recognized API functions
    targets = tuple(idautils.CodeRefsFrom(insn.ea, False))
    if not targets:
        return

    target = targets[0]
    target_func = idaapi.get_func(target)
    if not target_func or target_func.start_ea != target:
        # not a function (start)
        return

    if target_func.flags & idaapi.FUNC_LIB:
        name = idaapi.get_name(target_func.start_ea)
        yield API(name), insn.ea
Ejemplo n.º 4
0
def extract_insn_api_features(f, bb, insn):
    """parse instruction API features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        call dword [0x00473038]
    """

    unit: DataUnit = f.unit

    if insn.mne in unit.syntax.operations:
        if not unit.syntax.operations[insn.mne].jmp:
            return

    for c in insn.cr + insn.dr:
        if str(c) in unit.obj.bin.import_functions:
            module, func, _ = unit.obj.bin.import_functions[str(c)]
            if '.dll' in module:
                module = module.replace('.dll', '')
            for symbol in helpers.generate_symbols(module, func):
                yield API(symbol), insn.ea

        # THUNK!!
        depth = 0
        _next = c
        while depth < THUNK_CHAIN_DEPTH_DELTA:
            if _next not in unit.map_f:
                break
            c_f = unit.map_f[_next]
            if len(c_f.blocks) != 1:
                break
            if len(c_f.blocks[0].ins) != 1:
                break
            if len(c_f.blocks[0].ins[0].cr) == 1:
                # code reference of thunked function to the symbol
                _next = c_f.blocks[0].ins[0].cr[0]
            elif len(c_f.blocks[0].ins[0].dr) == 1:
                # data reference of thunked function to the symbol
                _next = c_f.blocks[0].ins[0].dr[0]
            else:
                break
            if str(_next) in unit.obj.bin.import_functions:
                module, func, _ = unit.obj.bin.import_functions[str(_next)]
                if '.dll' in module:
                    module = module.replace('.dll', '')
                for symbol in helpers.generate_symbols(module, func):
                    yield API(symbol), insn.ea
            depth += 1
Ejemplo n.º 5
0
def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody,
                              insn: Instruction) -> Iterator[Tuple[API, int]]:
    """parse instruction API features"""
    if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp,
                           OpCodes.Calli):
        return

    name: str = get_imports(f.ctx).get(insn.operand.value, "")
    if not name:
        return

    if "::" in name:
        # like System.IO.File::OpenRead
        yield API(name), insn.offset
    else:
        # like kernel32.CreateFileA
        dll, _, symbol = name.rpartition(".")
        for name_variant in capa.features.extractors.helpers.generate_symbols(
                dll, symbol):
            yield API(name_variant), insn.offset
Ejemplo n.º 6
0
Archivo: insn.py Proyecto: clayne/capa
def extract_insn_api_features(f, bb, insn):
    """parse instruction API features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        call dword [0x00473038]
    """
    if not insn.get_canon_mnem() in ("call", "jmp"):
        return

    for api in check_for_api_call(f.ctx, insn):
        dll, _, symbol = api.rpartition(".")
        for name in capa.features.extractors.helpers.generate_symbols(
                dll, symbol):
            yield API(name), insn.ea

    # extract IDA/FLIRT recognized API functions
    targets = tuple(idautils.CodeRefsFrom(insn.ea, False))
    if not targets:
        return

    target = targets[0]
    target_func = idaapi.get_func(target)
    if not target_func or target_func.start_ea != target:
        # not a function (start)
        return

    if target_func.flags & idaapi.FUNC_LIB:
        name = idaapi.get_name(target_func.start_ea)
        yield API(name), insn.ea
        if name.startswith("_"):
            # some linkers may prefix linked routines with a `_` to avoid name collisions.
            # extract features for both the mangled and un-mangled representations.
            # e.g. `_fwrite` -> `fwrite`
            # see: https://stackoverflow.com/a/2628384/87207
            yield API(name[1:]), insn.ea
Ejemplo n.º 7
0
Archivo: insn.py Proyecto: wisdark/capa
def extract_insn_api_features(f, bb, insn):
    """parse instruction API features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        call dword [0x00473038]
    """
    for api in check_for_api_call(f.ctx, insn):
        dll, _, symbol = api.rpartition(".")
        for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
            yield API(name), insn.ea
Ejemplo n.º 8
0
def extract_insn_api_features(f, bb, insn):
    """parse API features from the given instruction."""

    # example:
    #
    #    call dword [0x00473038]

    if insn.mnem != "call":
        return

    # traditional call via IAT
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
        oper = insn.opers[0]
        target = oper.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    # call via thunk on x86,
    # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
    #
    # this is also how calls to internal functions may be decoded on x64.
    # see Lab21-01.exe_:0x140001178
    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
        target = insn.opers[0].getOperValue(insn)

        try:
            thunk = f.vw.getFunctionMeta(target, "Thunk")
        except vivisect.exc.InvalidFunction:
            return
        else:
            if thunk:
                dll, _, symbol = thunk.rpartition(".")
                if symbol.startswith("ord"):
                    symbol = "#" + symbol[len("ord"):]
                for name in capa.features.extractors.helpers.generate_symbols(
                        dll, symbol):
                    yield API(name), insn.va

    # call via import on x64
    # see Lab21-01.exe_:0x14000118C
    elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
        op = insn.opers[0]
        target = op.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        try:
            (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
        except NotFoundError:
            # not able to resolve the indirect call, sorry
            return

        if target is None:
            # not able to resolve the indirect call, sorry
            return

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va
Ejemplo n.º 9
0
def extract_insn_api_features(f, bb, insn):
    """parse API features from the given instruction."""

    # example:
    #
    #    call dword [0x00473038]

    if insn.mnem not in ("call", "jmp"):
        return

    if insn.mnem == "jmp":
        if f.vw.getFunctionMeta(f.va, "Thunk"):
            return

    # traditional call via IAT
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
        oper = insn.opers[0]
        target = oper.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    # call via thunk on x86,
    # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
    #
    # this is also how calls to internal functions may be decoded on x64.
    # see Lab21-01.exe_:0x140001178
    #
    # follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in
    # 0x140059342 (viv) / 0x14005E0C0 (IDA)
    # 14005E0FF call    j_ElfClearEventLogFileW (14005AAF8)
    #   14005AAF8 jmp     ElfClearEventLogFileW (14005E196)
    #     14005E196 jmp     cs:__imp_ElfClearEventLogFileW

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
        imports = get_imports(f.vw)
        target = capa.features.extractors.viv.helpers.get_coderef_from(
            f.vw, insn.va)
        if not target:
            return

        for _ in range(THUNK_CHAIN_DEPTH_DELTA):
            if target in imports:
                dll, symbol = imports[target]
                for name in capa.features.extractors.helpers.generate_symbols(
                        dll, symbol):
                    yield API(name), insn.va

            target = capa.features.extractors.viv.helpers.get_coderef_from(
                f.vw, target)
            if not target:
                return

    # call via import on x64
    # see Lab21-01.exe_:0x14000118C
    elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
        op = insn.opers[0]
        target = op.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        try:
            (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
        except NotFoundError:
            # not able to resolve the indirect call, sorry
            return

        if target is None:
            # not able to resolve the indirect call, sorry
            return

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va
Ejemplo n.º 10
0
Archivo: insn.py Proyecto: clayne/capa
def extract_insn_api_features(f, bb, insn):
    """parse API features from the given instruction."""

    # example:
    #
    #    call dword [0x00473038]
    if insn.mnem not in ("call", "jmp"):
        return

    if insn.mnem == "jmp":
        if f.vw.getFunctionMeta(f.va, "Thunk"):
            return

    # traditional call via IAT
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
        oper = insn.opers[0]
        target = oper.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    # call via thunk on x86,
    # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
    #
    # this is also how calls to internal functions may be decoded on x32 and x64.
    # see Lab21-01.exe_:0x140001178
    #
    # follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in
    # 0x140059342 (viv) / 0x14005E0C0 (IDA)
    # 14005E0FF call    j_ElfClearEventLogFileW (14005AAF8)
    #   14005AAF8 jmp     ElfClearEventLogFileW (14005E196)
    #     14005E196 jmp     cs:__imp_ElfClearEventLogFileW

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
        imports = get_imports(f.vw)
        target = capa.features.extractors.viv.helpers.get_coderef_from(
            f.vw, insn.va)
        if not target:
            return

        if viv_utils.flirt.is_library_function(f.vw, target):
            name = viv_utils.get_function_name(f.vw, target)
            yield API(name), insn.va
            if name.startswith("_"):
                # some linkers may prefix linked routines with a `_` to avoid name collisions.
                # extract features for both the mangled and un-mangled representations.
                # e.g. `_fwrite` -> `fwrite`
                # see: https://stackoverflow.com/a/2628384/87207
                yield API(name[1:]), insn.va
            return

        for _ in range(THUNK_CHAIN_DEPTH_DELTA):
            if target in imports:
                dll, symbol = imports[target]
                for name in capa.features.extractors.helpers.generate_symbols(
                        dll, symbol):
                    yield API(name), insn.va

            # if jump leads to an ENDBRANCH instruction, skip it
            if f.vw.getByteDef(target)[1].startswith(b"\xf3\x0f\x1e"):
                target += 4

            target = capa.features.extractors.viv.helpers.get_coderef_from(
                f.vw, target)
            if not target:
                return

    # call via import on x64
    # see Lab21-01.exe_:0x14000118C
    elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
        op = insn.opers[0]
        target = op.getOperAddr(insn)

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        try:
            (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
        except NotFoundError:
            # not able to resolve the indirect call, sorry
            return

        if target is None:
            # not able to resolve the indirect call, sorry
            return

        imports = get_imports(f.vw)
        if target in imports:
            dll, symbol = imports[target]
            for name in capa.features.extractors.helpers.generate_symbols(
                    dll, symbol):
                yield API(name), insn.va