def extract_insn_peb_access_characteristic_features(f, bb, insn): """ parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 """ # TODO handle where fs/gs are loaded into a register or onto the stack and used later if insn.mnem not in ["push", "mov"]: return if "fs" in insn.getPrefixName(): for oper in insn.opers: # examples # # IDA: mov eax, large fs:30h # viv: fs: mov eax,dword [0x00000030] ; i386ImmMemOper # IDA: push large dword ptr fs:30h # viv: fs: push dword [0x00000030] # fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0 if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (isinstance( oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30): yield Characteristic("peb access"), insn.va elif "gs" in insn.getPrefixName(): for oper in insn.opers: if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (isinstance( oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60): yield Characteristic("peb access"), insn.va else: pass
def extract_function_calls_from(f, bb, insn): if insn.mnem != "call": return target = None # traditional call via IAT, x32 if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] target = oper.getOperAddr(insn) yield Characteristic("calls from"), target # call via thunk on x86, # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 # # call to internal function on x64 # see Lab21-01.exe_:0x140001178 elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): target = insn.opers[0].getOperValue(insn) yield Characteristic("calls from"), target # call via IAT, x64 elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): op = insn.opers[0] target = op.getOperAddr(insn) yield Characteristic("calls from"), target if target and target == f.va: # if we found a jump target and it's the function address # mark as recursive yield Characteristic("recursive call"), target
def extract_function_indirect_call_characteristic_features(f, bb, insn): """extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 most relevant at the function or basic block scope; however, its most efficient to extract at the instruction scope args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ u: DataUnit u = f.unit s: Assembly s = u.syntax mne = insn.mne if 'CALL' in mne: if len(insn.oprs) > 0: opr = insn.oprs[0].lower() if opr.startswith("0x"): return if "qword ptr" in opr and "rip" in opr: return if opr.startswith("dword ptr [0x"): return if "ptr_" in opr: return if is_mem_ref(opr, insn.oprs_tp[0]): yield Characteristic("indirect call"), insn.ea if is_reg(opr, insn.oprs_tp[0]): yield Characteristic("indirect call"), insn.ea
def extract_insn_segment_access_features(f, bb, insn): """ parse the instruction for access to fs or gs """ operands = [o.strip() for o in insn.operands.split(",")] for operand in operands: if "fs:" in operand: yield Characteristic("fs access"), insn.offset elif "gs:" in operand: yield Characteristic("gs access"), insn.offset
def extract_insn_segment_access_features(f, bb, insn): """ parse the instruction for access to fs or gs """ operands = insn.oprs for operand in operands: operand = operand.lower() if "fs:" in operand: yield Characteristic("fs access"), insn.ea elif "gs:" in operand: yield Characteristic("gs access"), insn.ea
def extract_insn_segment_access_features(f, bb, insn): """ parse the instruction for access to fs or gs """ prefix = insn.getPrefixName() if prefix == "fs": yield Characteristic("fs access"), insn.va if prefix == "gs": yield Characteristic("gs access"), insn.va
def extract_function_calls_from(f, bb, insn): if insn.mnemonic != "call": return if insn.offset in f.outrefs: for outref in f.outrefs[insn.offset]: yield Characteristic("calls from"), outref if outref == f.offset: # if we found a jump target and it's the function address # mark as recursive yield Characteristic("recursive call"), outref if insn.offset in f.apirefs: yield Characteristic("calls from"), insn.offset
def extract_insn_peb_access_characteristic_features(f, bb, insn): """ parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 """ if insn.mnemonic not in ["push", "mov"]: return operands = [o.strip() for o in insn.operands.split(",")] for operand in operands: if "fs:" in operand and "0x30" in operand: yield Characteristic("peb access"), insn.offset elif "gs:" in operand and "0x60" in operand: yield Characteristic("peb access"), insn.offset
def extract_function_calls(f): unit: DataUnit = f.unit # print(f.calls) # print([f.addr_start for f in unit.map_f_xcall[f.addr_start]]) for callee in f.calls: if f.addr_start == callee: continue if callee in unit.map_f: yield Characteristic("calls from"), unit.map_f[callee].addr_start else: yield Characteristic("calls from"), callee for caller in unit.map_f_xcall[f.addr_start]: if caller.addr_start in unit.map_f: yield Characteristic("calls to"), caller.addr_start else: print('what?')
def extract_function_switch(f): """ parse if a function contains a switch statement based on location names method can be optimized """ if f.va in get_functions_with_switch(f.vw): yield Characteristic("switch"), f.va
def extract_insn_cross_section_cflow(f, bb, insn): """ inspect the instruction for a CALL or JMP that crosses section boundaries. """ for va, flags in insn.getBranches(): if flags & envi.BR_FALL: continue try: # skip 32-bit calls to imports if insn.mnem == "call" and isinstance( insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] target = oper.getOperAddr(insn) if target in get_imports(f.vw): continue # skip 64-bit calls to imports elif insn.mnem == "call" and isinstance( insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): op = insn.opers[0] target = op.getOperAddr(insn) if target in get_imports(f.vw): continue if get_section(f.vw, insn.va) != get_section(f.vw, va): yield Characteristic("cross section flow"), insn.va except KeyError: continue
def extract_recursive_call(f): """ extract recursive function call args: f (IDA func_t) """ if capa.features.extractors.ida.helpers.is_function_recursive(f): yield Characteristic("recursive call"), f.start_ea
def extract_function_calls_to(f): """ extract callers to a function args: f (IDA func_t) """ for ea in idautils.CodeRefsTo(f.start_ea, True): yield Characteristic("calls to"), ea
def extract_insn_cross_section_cflow(f, bb, insn): """ inspect the instruction for a CALL or JMP that crosses section boundaries. """ if insn.mnemonic in ["call", "jmp"]: if insn.offset in f.apirefs: return smda_report = insn.smda_function.smda_report if insn.offset in f.outrefs: for target in f.outrefs[insn.offset]: if smda_report.getSection(insn.offset) != smda_report.getSection(target): yield Characteristic("cross section flow"), insn.offset elif insn.operands.startswith("0x"): target = int(insn.operands, 16) if smda_report.getSection(insn.offset) != smda_report.getSection(target): yield Characteristic("cross section flow"), insn.offset
def extract_function_switch(f): """ extract switch indicators from a function arg: f (IDA func_t) """ if capa.features.extractors.ida.helpers.is_function_switch_statement(f): yield Characteristic("switch"), f.start_ea
def extract_function_indirect_call_characteristic_features(f, bb, insn): """ extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 """ if insn.mnem != "call": return # Checks below work for x86 and x64 if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): # call edx yield Characteristic("indirect call"), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper): # call dword ptr [eax+50h] yield Characteristic("indirect call"), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper): # call qword ptr [rsp+78h] yield Characteristic("indirect call"), insn.va
def extract_bb_stackstring(f, bb): """extract stackstring indicators from basic block args: f (IDA func_t) bb (IDA BasicBlock) """ if bb_contains_stackstring(f, bb): yield Characteristic("stack string"), bb.addr_start
def extract_bb_tight_loop(f, bb): """extract tight loop indicators from a basic block args: f (IDA func_t) bb (IDA BasicBlock) """ if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bb): yield Characteristic("tight loop"), bb.start_ea
def extract_insn_segment_access_features(f, bb, insn): """parse instruction fs or gs access TODO: IDA should be able to do this... """ if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)): # try to optimize for only memory references return disasm = idc.GetDisasm(insn.ea) if " fs:" in disasm: # TODO: replace above with proper IDA yield Characteristic("fs access"), insn.ea if " gs:" in disasm: # TODO: replace above with proper IDA yield Characteristic("gs access"), insn.ea
def extract_function_loop(f): edges = [] # construct control flow graph for b in f.blocks: for c in b.calls: edges.append((b.addr_start, c)) if loops.has_loop(edges): yield Characteristic("loop"), f.addr_start
def extract_insn_peb_access_characteristic_features(f, bb, insn): """parse instruction peb access fs:[0x30] on x86, gs:[0x60] on x64 TODO: IDA should be able to do this.. """ if insn.mne not in ["PUSH", "MOV"]: return operands = insn.oprs for operand in operands: operand = operand.lower() if "fs:" in operand and ("0x30" in operand or "30h" in operand): yield Characteristic("peb access"), insn.ea elif "gs:" in operand and ("0x60" in operand or "60h" in operand): yield Characteristic("peb access"), insn.ea
def extract_file_embedded_pe(): """extract embedded PE features IDA must load resource sections for this to be complete - '-R' from console - Check 'Load resource sections' when opening binary in IDA manually """ for seg in capa.features.extractors.ida.helpers.get_segments( skip_header_segments=True): for (ea, _) in check_segment_for_pe(seg): yield Characteristic("embedded pe"), ea
def extract_function_loop(f): """ parse if a function has a loop """ edges = [] for bb_from, bb_tos in f.blockrefs.items(): for bb_to in bb_tos: edges.append((bb_from, bb_to)) if edges and loops.has_loop(edges): yield Characteristic("loop"), f.offset
def extract_insn_cross_section_cflow(f, bb, insn): """ inspect the instruction for a CALL or JMP that crosses section boundaries. """ u: DataUnit u = f.unit s: Assembly s = u.syntax mne = insn.mne if mne in s.operations and s.operations[mne].jmp is True: if len(insn.cr) > 0: for target in insn.cr: if str(target) in u.obj.bin.import_functions: continue if u.find_seg(insn.ea) != u.find_seg(target): yield Characteristic("cross section flow"), insn.ea elif len(insn.oprs) > 0 and insn.oprs[0].startswith("0x"): target = int(insn.oprs[0], 16) if u.find_seg(insn.ea) != u.find_seg(target): yield Characteristic("cross section flow"), insn.ea
def extract_function_calls_from(f, bb, insn): """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ if idaapi.is_call_insn(insn): for ref in idautils.CodeRefsFrom(insn.ea, False): yield Characteristic("calls from"), ref
def extract_function_indirect_call_characteristic_features(f, bb, insn): """ extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 most relevant at the function or basic block scope; however, its most efficient to extract at the instruction scope args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ if idaapi.is_call_insn(insn) and idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ): yield Characteristic("indirect call"), insn.ea
def extract_function_loop(f): """ parse if a function has a loop """ edges = [] for bb in f.basic_blocks: if len(bb.instructions) > 0: for bva, bflags in bb.instructions[-1].getBranches(): if bflags & vivisect.envi.BR_COND or bflags & vivisect.envi.BR_FALL or bflags & vivisect.envi.BR_TABLE: edges.append((bb.va, bva)) if edges and loops.has_loop(edges): yield Characteristic("loop"), f.va
def extract_function_loop(f): """ extract loop indicators from a function args: f (IDA func_t) """ edges = [] # construct control flow graph for bb in idaapi.FlowChart(f): for succ in bb.succs(): edges.append((bb.start_ea, succ.start_ea)) if loops.has_loop(edges): yield Characteristic("loop"), f.start_ea
def extract_insn_nzxor_characteristic_features(f, bb, insn): """ parse non-zeroing XOR instruction from the given instruction. ignore expected non-zeroing XORs, e.g. security cookies. """ if insn.mnem != "xor": return if insn.opers[0] == insn.opers[1]: return if is_security_cookie(f, bb, insn): return yield Characteristic("nzxor"), insn.va
def extract_insn_nzxor_characteristic_features(f, bb, insn): """ parse non-zeroing XOR instruction from the given instruction. ignore expected non-zeroing XORs, e.g. security cookies. """ if insn.mne not in ("XOR", "XORPD", "XORPS", "PXOR"): return operands = insn.oprs if operands[0] == operands[1]: return if contains_stack_cookie_keywords(insn, bb, f): return yield Characteristic("nzxor"), insn.ea