def test_rule_yaml_descriptions(): rule = textwrap.dedent(""" rule: meta: name: test rule features: - and: - number: 1 = This is the number 1 - string: This program cannot be run in DOS mode. description: MS-DOS stub message - string: '/SELECT.*FROM.*WHERE/i' description: SQL WHERE Clause - count(number(2 = AF_INET/SOCK_DGRAM)): 2 - or: - and: - offset: 0x50 = IMAGE_NT_HEADERS.OptionalHeader.SizeOfImage - offset: 0x34 = IMAGE_NT_HEADERS.OptionalHeader.ImageBase description: 32-bits - and: - offset: 0x50 = IMAGE_NT_HEADERS64.OptionalHeader.SizeOfImage - offset: 0x30 = IMAGE_NT_HEADERS64.OptionalHeader.ImageBase description: 64-bits description: PE headers offsets """) r = capa.rules.Rule.from_yaml(rule) assert (r.evaluate({ Number(1): {1}, Number(2): {2, 3}, String("This program cannot be run in DOS mode."): {4}, String("SELECT password FROM hidden_table WHERE user == admin"): {5}, Offset(0x50): {6}, Offset(0x30): {7}, }) == True)
def extract_file_strings(data: DataUnit): """ extract ASCII and UTF-16 LE strings from file """ # for addr, s in data.obj.items(): # yield String(s), addr for s in strings.extract_ascii_strings(data.fbytes): yield String(s.s), s.offset for s in strings.extract_unicode_strings(data.fbytes): yield String(s.s), s.offset
def extract_file_strings(vw, file_path): """ extract ASCII and UTF-16 LE strings from file """ with open(file_path, "rb") as f: b = f.read() for s in capa.features.extractors.strings.extract_ascii_strings(b): yield String(s.s), s.offset for s in capa.features.extractors.strings.extract_unicode_strings(b): yield String(s.s), s.offset
def test_rule_yaml_count_string(): rule = textwrap.dedent(""" rule: meta: name: test rule features: - count(string(foo)): 2 """) r = capa.rules.Rule.from_yaml(rule) assert r.evaluate({String("foo"): {}}) == False assert r.evaluate({String("foo"): {1}}) == False assert r.evaluate({String("foo"): {1, 2}}) == True assert r.evaluate({String("foo"): {1, 2, 3}}) == False
def test_explicit_string_values_int(): rule = textwrap.dedent(""" rule: meta: name: test rule features: - or: - string: "123" - string: "0x123" """) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) assert (String("123") in children) == True assert (String("0x123") in children) == True
def test_string_values_special_characters(): rule = textwrap.dedent(""" rule: meta: name: test rule features: - or: - string: "hello\\r\\nworld" - string: "bye\\nbye" description: "test description" """) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) assert (String("hello\r\nworld") in children) == True assert (String("bye\nbye") in children) == True
def extract_insn_string_features(f, bb, insn): """parse string features from the given instruction.""" # example: # # push offset aAcr ; "ACR > " for oper in insn.opers: if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) elif isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper): # like 0x10056CB4 in `lea eax, dword [0x10056CB4]` v = oper.imm elif isinstance(oper, envi.archs.i386.disasm.i386SibOper): # like 0x401000 in `mov eax, 0x401000[2 * ebx]` v = oper.imm elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): v = oper.getOperAddr(insn) else: continue for v in derefs(f.vw, v): try: s = read_string(f.vw, v) except ValueError: continue else: yield String(s.rstrip("\x00")), insn.va
def extract_file_strings(): """extract ASCII and UTF-16 LE strings IDA must load resource sections for this to be complete - '-R' from console - Check 'Load resource sections' when opening binary in IDA manually """ for seg in capa.features.extractors.ida.helpers.get_segments(): seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg) for s in capa.features.extractors.strings.extract_ascii_strings( seg_buff): yield String(s.s), (seg.start_ea + s.offset) for s in capa.features.extractors.strings.extract_unicode_strings( seg_buff): yield String(s.s), (seg.start_ea + s.offset)
def extract_insn_string_features(f, bb, insn): """parse string features from the given instruction.""" # example: # # push offset aAcr ; "ACR > " for data_ref in insn.getDataRefs(): for v in derefs(f.smda_report, data_ref): string_read = read_string(f.smda_report, v) if string_read: yield String(string_read.rstrip("\x00")), insn.offset
def test_rule_yaml_descriptions(): rule = textwrap.dedent(""" rule: meta: name: test rule features: - and: - number: 1 = This is the number 1 - string: This program cannot be run in DOS mode. description: MS-DOS stub message - string: '/SELECT.*FROM.*WHERE/i' description: SQL WHERE Clause - count(number(2 = AF_INET/SOCK_DGRAM)): 2 """) r = capa.rules.Rule.from_yaml(rule) assert (r.evaluate({ Number(1): {1}, Number(2): {2, 3}, String("This program cannot be run in DOS mode."): {4}, String("SELECT password FROM hidden_table WHERE user == admin"): {5}, }) == True)
def extract_insn_string_features(f, bb, insn): """ parse instruction string features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset aAcr ; "ACR > " """ for ref in idautils.DataRefsFrom(insn.ea): found = capa.features.extractors.ida.helpers.find_string_at(ref) if found: yield String(found), insn.ea
def extract_insn_string_features(f, bb, insn): """parse instruction string features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset aAcr ; "ACR > " """ ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn) if ref != insn.ea: found = capa.features.extractors.ida.helpers.find_string_at(ref) if found: yield String(found), insn.ea
def extract_insn_string_features(f, bb, insn): """parse instruction string features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset aAcr ; "ACR > " """ for ref in insn.dr: ref = str(ref) if ref in f.unit.obj.bin.strings: found = f.unit.obj.bin.strings[ref] yield String(found), insn.ea
def extract_insn_string_features(f, bb, insn): """parse string features from the given instruction.""" # example: # # push offset aAcr ; "ACR > " for oper in insn.opers: if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): v = oper.getOperAddr(insn) else: continue try: s = read_string(f.vw, v) except ValueError: continue else: yield String(s.rstrip("\x00")), insn.va