def generate_api_features(apiname, va): """ for a given function name and address, generate API names. we over-generate features to make matching easier. these include: - kernel32.CreateFileA - kernel32.CreateFile - CreateFileA - CreateFile """ # (kernel32.CreateFileA, 0x401000) yield API(apiname), va if is_aw_function(apiname): # (kernel32.CreateFile, 0x401000) yield API(apiname[:-1]), va if "." in apiname: modname, impname = apiname.split(".") # strip modname to support importname-only matching # (CreateFileA, 0x401000) yield API(impname), va if is_aw_function(impname): # (CreateFile, 0x401000) yield API(impname[:-1]), va
def extract_insn_api_features(f, bb, insn): """parse API features from the given instruction.""" if insn.offset in f.apirefs: api_entry = f.apirefs[insn.offset] # reformat dll_name, api_name = api_entry.split("!") dll_name = dll_name.split(".")[0] dll_name = dll_name.lower() for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name): yield API(name), insn.offset elif insn.offset in f.outrefs: current_function = f current_instruction = insn for index in range(THUNK_CHAIN_DEPTH_DELTA): if current_function and len(current_function.outrefs[current_instruction.offset]) == 1: target = current_function.outrefs[current_instruction.offset][0] referenced_function = current_function.smda_report.getFunction(target) if referenced_function: # TODO SMDA: implement this function for both jmp and call, checking if function has 1 instruction which refs an API if referenced_function.isApiThunk(): api_entry = ( referenced_function.apirefs[target] if target in referenced_function.apirefs else None ) if api_entry: # reformat dll_name, api_name = api_entry.split("!") dll_name = dll_name.split(".")[0] dll_name = dll_name.lower() for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name): yield API(name), insn.offset elif referenced_function.num_instructions == 1 and referenced_function.num_outrefs == 1: current_function = referenced_function current_instruction = [i for i in referenced_function.getInstructions()][0] else: return
def extract_insn_api_features(f, bb, insn): """parse instruction API features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: call dword [0x00473038] """ if not insn.get_canon_mnem() in ("call", "jmp"): return for api in check_for_api_call(f.ctx, insn): dll, _, symbol = api.rpartition(".") for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): yield API(name), insn.ea # extract IDA/FLIRT recognized API functions targets = tuple(idautils.CodeRefsFrom(insn.ea, False)) if not targets: return target = targets[0] target_func = idaapi.get_func(target) if not target_func or target_func.start_ea != target: # not a function (start) return if target_func.flags & idaapi.FUNC_LIB: name = idaapi.get_name(target_func.start_ea) yield API(name), insn.ea
def extract_insn_api_features(f, bb, insn): """parse instruction API features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: call dword [0x00473038] """ unit: DataUnit = f.unit if insn.mne in unit.syntax.operations: if not unit.syntax.operations[insn.mne].jmp: return for c in insn.cr + insn.dr: if str(c) in unit.obj.bin.import_functions: module, func, _ = unit.obj.bin.import_functions[str(c)] if '.dll' in module: module = module.replace('.dll', '') for symbol in helpers.generate_symbols(module, func): yield API(symbol), insn.ea # THUNK!! depth = 0 _next = c while depth < THUNK_CHAIN_DEPTH_DELTA: if _next not in unit.map_f: break c_f = unit.map_f[_next] if len(c_f.blocks) != 1: break if len(c_f.blocks[0].ins) != 1: break if len(c_f.blocks[0].ins[0].cr) == 1: # code reference of thunked function to the symbol _next = c_f.blocks[0].ins[0].cr[0] elif len(c_f.blocks[0].ins[0].dr) == 1: # data reference of thunked function to the symbol _next = c_f.blocks[0].ins[0].dr[0] else: break if str(_next) in unit.obj.bin.import_functions: module, func, _ = unit.obj.bin.import_functions[str(_next)] if '.dll' in module: module = module.replace('.dll', '') for symbol in helpers.generate_symbols(module, func): yield API(symbol), insn.ea depth += 1
def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]: """parse instruction API features""" if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return name: str = get_imports(f.ctx).get(insn.operand.value, "") if not name: return if "::" in name: # like System.IO.File::OpenRead yield API(name), insn.offset else: # like kernel32.CreateFileA dll, _, symbol = name.rpartition(".") for name_variant in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name_variant), insn.offset
def extract_insn_api_features(f, bb, insn): """parse instruction API features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: call dword [0x00473038] """ if not insn.get_canon_mnem() in ("call", "jmp"): return for api in check_for_api_call(f.ctx, insn): dll, _, symbol = api.rpartition(".") for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.ea # extract IDA/FLIRT recognized API functions targets = tuple(idautils.CodeRefsFrom(insn.ea, False)) if not targets: return target = targets[0] target_func = idaapi.get_func(target) if not target_func or target_func.start_ea != target: # not a function (start) return if target_func.flags & idaapi.FUNC_LIB: name = idaapi.get_name(target_func.start_ea) yield API(name), insn.ea if name.startswith("_"): # some linkers may prefix linked routines with a `_` to avoid name collisions. # extract features for both the mangled and un-mangled representations. # e.g. `_fwrite` -> `fwrite` # see: https://stackoverflow.com/a/2628384/87207 yield API(name[1:]), insn.ea
def extract_insn_api_features(f, bb, insn): """parse instruction API features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: call dword [0x00473038] """ for api in check_for_api_call(f.ctx, insn): dll, _, symbol = api.rpartition(".") for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): yield API(name), insn.ea
def extract_insn_api_features(f, bb, insn): """parse API features from the given instruction.""" # example: # # call dword [0x00473038] if insn.mnem != "call": return # traditional call via IAT if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] target = oper.getOperAddr(insn) imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va # call via thunk on x86, # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 # # this is also how calls to internal functions may be decoded on x64. # see Lab21-01.exe_:0x140001178 elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): target = insn.opers[0].getOperValue(insn) try: thunk = f.vw.getFunctionMeta(target, "Thunk") except vivisect.exc.InvalidFunction: return else: if thunk: dll, _, symbol = thunk.rpartition(".") if symbol.startswith("ord"): symbol = "#" + symbol[len("ord"):] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va # call via import on x64 # see Lab21-01.exe_:0x14000118C elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): op = insn.opers[0] target = op.getOperAddr(insn) imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): try: (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn) except NotFoundError: # not able to resolve the indirect call, sorry return if target is None: # not able to resolve the indirect call, sorry return imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va
def extract_insn_api_features(f, bb, insn): """parse API features from the given instruction.""" # example: # # call dword [0x00473038] if insn.mnem not in ("call", "jmp"): return if insn.mnem == "jmp": if f.vw.getFunctionMeta(f.va, "Thunk"): return # traditional call via IAT if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] target = oper.getOperAddr(insn) imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va # call via thunk on x86, # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 # # this is also how calls to internal functions may be decoded on x64. # see Lab21-01.exe_:0x140001178 # # follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in # 0x140059342 (viv) / 0x14005E0C0 (IDA) # 14005E0FF call j_ElfClearEventLogFileW (14005AAF8) # 14005AAF8 jmp ElfClearEventLogFileW (14005E196) # 14005E196 jmp cs:__imp_ElfClearEventLogFileW elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): imports = get_imports(f.vw) target = capa.features.extractors.viv.helpers.get_coderef_from( f.vw, insn.va) if not target: return for _ in range(THUNK_CHAIN_DEPTH_DELTA): if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va target = capa.features.extractors.viv.helpers.get_coderef_from( f.vw, target) if not target: return # call via import on x64 # see Lab21-01.exe_:0x14000118C elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): op = insn.opers[0] target = op.getOperAddr(insn) imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): try: (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn) except NotFoundError: # not able to resolve the indirect call, sorry return if target is None: # not able to resolve the indirect call, sorry return imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va
def extract_insn_api_features(f, bb, insn): """parse API features from the given instruction.""" # example: # # call dword [0x00473038] if insn.mnem not in ("call", "jmp"): return if insn.mnem == "jmp": if f.vw.getFunctionMeta(f.va, "Thunk"): return # traditional call via IAT if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] target = oper.getOperAddr(insn) imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va # call via thunk on x86, # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 # # this is also how calls to internal functions may be decoded on x32 and x64. # see Lab21-01.exe_:0x140001178 # # follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in # 0x140059342 (viv) / 0x14005E0C0 (IDA) # 14005E0FF call j_ElfClearEventLogFileW (14005AAF8) # 14005AAF8 jmp ElfClearEventLogFileW (14005E196) # 14005E196 jmp cs:__imp_ElfClearEventLogFileW elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): imports = get_imports(f.vw) target = capa.features.extractors.viv.helpers.get_coderef_from( f.vw, insn.va) if not target: return if viv_utils.flirt.is_library_function(f.vw, target): name = viv_utils.get_function_name(f.vw, target) yield API(name), insn.va if name.startswith("_"): # some linkers may prefix linked routines with a `_` to avoid name collisions. # extract features for both the mangled and un-mangled representations. # e.g. `_fwrite` -> `fwrite` # see: https://stackoverflow.com/a/2628384/87207 yield API(name[1:]), insn.va return for _ in range(THUNK_CHAIN_DEPTH_DELTA): if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va # if jump leads to an ENDBRANCH instruction, skip it if f.vw.getByteDef(target)[1].startswith(b"\xf3\x0f\x1e"): target += 4 target = capa.features.extractors.viv.helpers.get_coderef_from( f.vw, target) if not target: return # call via import on x64 # see Lab21-01.exe_:0x14000118C elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): op = insn.opers[0] target = op.getOperAddr(insn) imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): try: (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn) except NotFoundError: # not able to resolve the indirect call, sorry return if target is None: # not able to resolve the indirect call, sorry return imports = get_imports(f.vw) if target in imports: dll, symbol = imports[target] for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.va