def extract_file_strings(buf, **kwargs): """ extract ASCII and UTF-16 LE strings from file """ for s in capa.features.extractors.strings.extract_ascii_strings(buf): yield String(s.s), s.offset for s in capa.features.extractors.strings.extract_unicode_strings(buf): yield String(s.s), s.offset
def extract_file_strings(pe, file_path): """ extract ASCII and UTF-16 LE strings from file """ with open(file_path, "rb") as f: b = f.read() for s in capa.features.extractors.strings.extract_ascii_strings(b): yield String(s.s), s.offset for s in capa.features.extractors.strings.extract_unicode_strings(b): yield String(s.s), s.offset
def test_rule_yaml_count_string(): rule = textwrap.dedent( """ rule: meta: name: test rule features: - count(string(foo)): 2 """ ) r = capa.rules.Rule.from_yaml(rule) assert r.evaluate({String("foo"): {}}) == False assert r.evaluate({String("foo"): {1}}) == False assert r.evaluate({String("foo"): {1, 2}}) == True assert r.evaluate({String("foo"): {1, 2, 3}}) == False
def test_explicit_string_values_int(): rule = textwrap.dedent( """ rule: meta: name: test rule features: - or: - string: "123" - string: "0x123" """ ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) assert (String("123") in children) == True assert (String("0x123") in children) == True
def extract_insn_string_features(f, bb, insn): """parse string features from the given instruction.""" # example: # # push offset aAcr ; "ACR > " for oper in insn.opers: if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) elif isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper): # like 0x10056CB4 in `lea eax, dword [0x10056CB4]` v = oper.imm elif isinstance(oper, envi.archs.i386.disasm.i386SibOper): # like 0x401000 in `mov eax, 0x401000[2 * ebx]` v = oper.imm elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): v = oper.getOperAddr(insn) else: continue for v in derefs(f.vw, v): try: s = read_string(f.vw, v) except ValueError: continue else: yield String(s.rstrip("\x00")), insn.va
def test_string_values_special_characters(): rule = textwrap.dedent( """ rule: meta: name: test rule features: - or: - string: "hello\\r\\nworld" - string: "bye\\nbye" description: "test description" """ ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) assert (String("hello\r\nworld") in children) == True assert (String("bye\nbye") in children) == True
def extract_file_strings(): """extract ASCII and UTF-16 LE strings IDA must load resource sections for this to be complete - '-R' from console - Check 'Load resource sections' when opening binary in IDA manually """ for seg in capa.features.extractors.ida.helpers.get_segments(): seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg) for s in capa.features.extractors.strings.extract_ascii_strings( seg_buff): yield String(s.s), (seg.start_ea + s.offset) for s in capa.features.extractors.strings.extract_unicode_strings( seg_buff): yield String(s.s), (seg.start_ea + s.offset)
def extract_insn_string_features(f, bb, insn): """parse string features from the given instruction.""" # example: # # push offset aAcr ; "ACR > " for data_ref in insn.getDataRefs(): for v in derefs(f.smda_report, data_ref): string_read = read_string(f.smda_report, v) if string_read: yield String(string_read.rstrip("\x00")), insn.offset
def extract_insn_string_features( f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[String, int]]: """parse instruction string features""" if not insn.is_ldstr(): return if not isinstance(insn.operand, StringToken): return user_string: Optional[str] = read_dotnet_user_string( f.ctx["pe"], insn.operand) if user_string is None: return yield String(user_string), insn.offset
def extract_insn_string_features(f, bb, insn): """parse instruction string features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset aAcr ; "ACR > " """ ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn) if ref != insn.ea: found = capa.features.extractors.ida.helpers.find_string_at(ref) if found: yield String(found), insn.ea