class ServiceWithDefaultConfig(Service): name = "dummy2" version = "1.1" default_config = [ ServiceConfigOption("some option", ServiceConfigOption.STRING), ServiceConfigOption("private option", ServiceConfigOption.BOOL, private=True), ]
class EntropycalcService(Service): """ Calculate entropy over data. """ name = "entropycalc" version = '0.0.1' type_ = Service.TYPE_CUSTOM supported_types = ['Sample'] default_config = [ ServiceConfigOption('start_offset', ServiceConfigOption.INT, description="Start offset", required=False, private=False, default=DEFAULT_START), ServiceConfigOption('end_offset', ServiceConfigOption.INT, description="End offset", required=True, private=False, default=DEFAULT_END), ] @staticmethod def valid_for(context): # Only run if there's data return context.has_data() def _calculate_entropy(self, data): entropy = 0.0 if len(data) == 0: return entropy occurences = array.array('L', [0] * 256) for x in data: occurences[ord(x)] += 1 for x in occurences: if x: p_x = float(x) / len(data) entropy -= p_x * math.log(p_x, 2) return entropy def _scan(self, context): start_offset = self.config.get("start_offset", DEFAULT_START) end_offset = self.config.get("end_offset", DEFAULT_END) output = self._calculate_entropy(context.data[start_offset:end_offset]) self._add_result('Entropy calculation', "%.1f" % output, {'Value': output})
class MetaChecker(Service): """ Compare metadata of this sample to others """ name = "meta_checker" version = '1.0.2' type_ = Service.TYPE_CUSTOM purpose = "comparison" supported_types = ['Sample'] required_fields = ['md5'] default_config = [ ServiceConfigOption( 'max_result', ServiceConfigOption.INT, description="Max result threshold for showing metadata", required=True, private=False, default=DEFAULT_MAX), ] def _get_meta_count(self, meta_type, meta_val): query_field = "analysis.results.{0}".format(meta_type) query = {query_field: meta_val} total_count = self._fetch_meta(query, {'md5': 1}).count() return total_count def _scan(self, context): max_result = self.config.get("max_result", DEFAULT_MAX) my_md5 = context.md5 my_results = self._fetch_meta({'md5': my_md5}, {'analysis': 1}) if "analysis" not in my_results[0]: logger.error = "Could not get analysis results for %s" % my_md5 self._error("Could not get analysis results for %s" % my_md5) return completed_results = [] for result_set in my_results[0]["analysis"]: # skip our own results so we don't get nasty feedback if result_set["service_name"] == self.name: continue for result in result_set["results"]: if "md5" in result: res_type = "md5" else: res_type = "result" res_hash = "{0}-{1}".format(result_set["service_name"], result[res_type]) if result[res_type] and res_hash not in completed_results: total_count = self._get_meta_count(res_type, result[res_type]) count_result = { 'service': result_set["service_name"], 'type': res_type, res_type: result[res_type], 'count': total_count, } self._add_result("meta_count_{0}".format(res_type), result["result"], count_result) completed_results.append(res_hash)
class CarverService(Service): name = "carver" version = '0.0.1' type_ = Service.TYPE_CUSTOM supported_types = ['Sample'] default_config = [ ServiceConfigOption('start_offset', ServiceConfigOption.INT, description="Start offset", required=False, private=False, default=DEFAULT_START), ServiceConfigOption('end_offset', ServiceConfigOption.INT, description="End offset", required=False, private=False, default=DEFAULT_END), ] @staticmethod def valid_for(context): return context.has_data() def _scan(self, context): start_offset = self.config.get("start_offset", DEFAULT_START) end_offset = self.config.get("end_offset", DEFAULT_END) # Start must be 0 or higher. If end is greater than zero it must # also be greater than end_offset. if start_offset < 0 or (end_offset > 0 and start_offset > end_offset): self._error("Invalid offsets.") return data = context.data[start_offset:end_offset] if not data: self._error("No data.") else: self._add_file(data, filename=hashlib.md5(data).hexdigest(), log_msg="Carved file with MD5: {0}", relationship="Contains") return
class PyewService(Service): """ Run a binary through the Pyew disassember. """ name = "Pyew" version = '0.0.1' type_ = Service.TYPE_CUSTOM template = None supported_types = ['Sample'] default_config = [ ServiceConfigOption('pyew', ServiceConfigOption.STRING, description="Full path to pyew py file.", default=None, private=True, required=True), ServiceConfigOption( 'port', ServiceConfigOption.STRING, description="port the pyew websocket is listening on.", default=9876, private=True, required=True), ServiceConfigOption('secure', ServiceConfigOption.BOOL, description="Use secure websockets"), ] def __init__(self, *args, **kwargs): pass def _scan(self, context): pass def stop(self): pass
class OPSWATService(Service): """ Pushes a sample to your local OPSWAT appliance and scans the sample with different custom engines. Specify the URL for the REST API. Also include any API option in the URL. ie:http://example.org:8008/metascan_rest/scanner?method=scan&archive_pwd=infected' """ name = "OPSWAT" version = "1.0.0" type_ = Service.TYPE_AV supported_types = ['Sample'] default_config = [ ServiceConfigOption('OPSWAT_url', ServiceConfigOption.STRING, description="URL for the OPSWAT REST API.", default='http://example.org:8008/metascan_rest/scanner?method=scan&archive_pwd=infected', required=True, private=True), ] def _scan(self, context): data = get_file(context.md5) zipdata = create_zip([("samples", data)]) url = self.config.get('OPSWAT_url', '') req = urllib2.Request(url) req.add_header("Content-Type", "application/zip") req.add_data(bytearray(zipdata)) out = urllib2.urlopen(req) text_out = out.read() # Parse XML output handler = XMLTagHandler() parser = xml.parsers.expat.ParserCreate() parser.StartElementHandler = handler.StartElement parser.EndElementHandler = handler.EndElement parser.CharacterDataHandler = handler.CharData parser.Parse(text_out) for threat in handler.threatList: self._add_result('av_result', threat["threat_name"], {"engine":threat["engine_name"], "date":datetime.now().isoformat()})
class DelayService(Service): name = "delay" version = '1.0.0' type_ = Service.TYPE_CUSTOM default_config = [ ServiceConfigOption('sleep_time', ServiceConfigOption.INT, description="Number of seconds to" " sleep between notifications.", default=5), ] def _scan(self, data, sample_dict): for i in xrange(5): self._info(i) logger.info(i) self._info("sleeping") logger.info("sleeping") self._notify() time.sleep(self.config['sleep_time'])
class DelayService(Service): name = "delay" version = '1.0.0' type_ = Service.TYPE_CUSTOM default_config = [ ServiceConfigOption('sleep_time', ServiceConfigOption.INT, description="Number of seconds to" " sleep between notifications.", default=5), ] @staticmethod def valid_for(obj): return def run(self, obj, config): for i in xrange(5): self._info(i) logger.info(i) self._info("sleeping") logger.info("sleeping") self._notify() time.sleep(self.config['sleep_time'])
class UpxService(Service): """ Attempt to unpack a binary using UPX. """ name = "upx" version = '1.0.2' type_ = Service.TYPE_UNPACKER supported_types = ['Sample'] default_config = [ ServiceConfigOption('upx_path', ServiceConfigOption.STRING, description="Location of the upx binary.", default='/usr/bin/upx', required=True, private=True), ] @classmethod def _validate(cls, config): upx_path = config.get("upx_path", "") if not upx_path: raise ServiceConfigError("Must specify UPX path.") if not os.path.isfile(upx_path): raise ServiceConfigError("UPX path does not exist.") if not os.access(upx_path, os.X_OK): raise ServiceConfigError("UPX path is not executable.") if not 'upx' in upx_path.lower(): raise ServiceConfigError("Executable does not appear" " to be UPX.") def _scan(self, context): upx_path = self.config.get("upx_path", "") # The _write_to_file() context manager will delete this file at the # end of the "with" block. with self._write_to_file() as tmp_file: (working_dir, filename) = os.path.split(tmp_file) args = [upx_path, "-q", "-d", filename] # UPX does not generate a lot of output, so we should not have to # worry about this hanging because the buffer is full proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=working_dir) # Note that we are redirecting STDERR to STDOUT, so we can ignore # the second element of the tuple returned by communicate(). output = proc.communicate()[0] self._debug(output) if proc.returncode: # UPX return code of 1 indicates an error. # UPX return code of 2 indicates a warning (usually, the # file was not packed by UPX). msg = ("UPX could not unpack the file.") self._warning(msg) return with open(tmp_file, "rb") as newfile: data = newfile.read() #TODO: check to make sure file was modified (new MD5), indicating # it was actually unpacked self._add_file(data, log_msg="UPX unpacked file with MD5: {0}", relationship="Packed_From")
class ChopShopService(Service): """ Run a PCAP through ChopShop. """ name = "ChopShop" version = '0.0.5' type_ = Service.TYPE_CUSTOM template = None supported_types = ['PCAP'] default_config = [ ServiceConfigOption( 'basedir', ServiceConfigOption.STRING, description= "A base directory where all the modules and libraries exist.", default=None, required=True, private=True), ServiceConfigOption('modules', ServiceConfigOption.MULTI_SELECT, description="Supported modules.", choices=DEFAULT_MODULES, default=DEFAULT_MODULES) ] def __init__(self, *args, **kwargs): super(ChopShopService, self).__init__(*args, **kwargs) logger.debug("Initializing ChopShop service.") self.base_dir = self.config['basedir'] self.modules = "" if 'HTTP' in self.config['modules']: self.modules += ";http | http_extractor -m" if 'DNS' in self.config['modules']: self.modules += ";dns | dns_extractor" self.template = "chopshop_analysis.html" def _scan(self, context): logger.debug("Setting up shop...") shop_path = "%s/shop" % self.base_dir if not os.path.exists(self.base_dir): self._error("ChopShop path does not exist") elif not os.path.exists(shop_path): self._error("ChopShop shop path does not exist") else: sys.path.append(shop_path) import ChopLib as CL # I wanted to do this check in validate, but if it fails and # then you fix the path to point to the appropriate chopshop # it requires a webserver restart to take effect. So just do # the check at each scan. if StrictVersion(str(CL.VERSION)) < StrictVersion('4.0'): self._error("Need ChopShop 4.0 or newer") from ChopLib import ChopLib from ChopUi import ChopUi logger.debug("Scanning...") choplib = ChopLib() chopui = ChopUi() choplib.base_dir = self.base_dir # XXX: Convert from unicode to str... choplib.modules = str(self.modules) chopui.jsonout = jsonhandler choplib.jsonout = True # ChopShop (because of pynids) needs to read a file off disk. # The services framework forces you to use 'with' here. It's not # possible to just get a path to a file on disk. with self._write_to_file() as pcap_file: choplib.filename = pcap_file chopui.bind(choplib) chopui.start() chopui.jsonclass.set_service(self) choplib.start() while chopui.is_alive(): time.sleep(.1) chopui.join() choplib.finish() choplib.join()
class TotalHashService(Service): """ (PE Clustering) as implemented by Team Cymru' PEhash http://totalhash.com/pehash-source-code/. Optionally look up the resulting hash on totalhash. """ name = "totalhash" version = '0.1.0' type_ = Service.TYPE_CUSTOM supported_types = ['Sample'] default_config = [ ServiceConfigOption('th_api_key', ServiceConfigOption.STRING, description="Required. Obtain from Totalhash.", required=True, private=True), ServiceConfigOption('th_user', ServiceConfigOption.STRING, description="Required. Obtain from Totalhash.", required=True, private=True), ServiceConfigOption('th_query_url', ServiceConfigOption.STRING, default='https://api.totalhash.com/', required=True, private=True), ] def _scan(self, context): # If we have an API key, go ahead and look it up. key = str(self.config.get('th_api_key', '')) user = self.config.get('th_user', '') url = self.config.get('th_query_url', '') # XXX: Context doesn't provide sha1. When we move away from contexts # this can just use str(obj.sha1) h = hashlib.sha1(context.data).hexdigest() if not key: self._add_result('Analysis Link', url + "/analysis/" + h) self._info("No API key, not checking Totalhash.") return signature = hmac.new(key, msg=h, digestmod=hashlib.sha256).hexdigest() params = "/analysis/" + h + "&id=" + user + "&sign=" + signature req = urllib2.Request(url + params) if settings.HTTP_PROXY: proxy = urllib2.ProxyHandler({'https': settings.HTTP_PROXY}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) try: response = urllib2.urlopen(req) data = response.read() except Exception as e: logger.info("Totalhash: network connection error (%s)" % e) self._info("Network connection error checking totalhash (%s)" % e) return from lxml import etree try: root = etree.fromstring(data) except Exception as e: logger.error("Totalhash: parse error (%s)" % e) self._error("Error parsing results: %s" % e) return self._add_result('Analysis Metadata', root.attrib['time']) it = root.getiterator('av') for av in it: stats = { 'scanner': av.attrib['scanner'], 'timestamp': av.attrib['timestamp'] } self._add_result('AV', av.attrib['signature'], stats) it = root.getiterator('process') for proc in it: filename = proc.attrib['filename'] # Some entries appear with an empty filename and nothing else. if filename == '': continue pid = proc.attrib['pid'] dlls = [] for dll in proc.findall('dll_handling_section/load_dll'): dlls.append(dll.attrib['filename']) files = [] for file_ in proc.findall('filesystem_section/create_file'): info = { 'Filename': file_.attrib['srcfile'], 'Action': 'create' } files.append(info) for file_ in proc.findall('filesystem_section/delete_file'): info = { 'Filename': file_.attrib['srcfile'], 'Action': 'delete' } files.append(info) procs = [] for cp in proc.findall('process_section/create_process'): info = { 'Cmdline': cp.attrib['cmdline'], 'Target PID': cp.attrib['targetpid'], 'Action': 'create' } procs.append(info) for op in proc.findall('process_section/open_process'): info = { 'Target PID': op.attrib['targetpid'], 'API': op.attrib['apifunction'], 'Action': 'open' } procs.append(info) hosts = [] for host in proc.findall('winsock_section/getaddrinfo'): hosts.append(host.attrib['requested_host']) mutexes = [] for mutex in proc.findall('mutex_section/create_mutex'): mutexes.append(mutex.attrib['name']) hooks = [] for hook in proc.findall('windows_hook_section/set_windows_hook'): hooks.append(hook.attrib['hookid']) regs = [] for reg in proc.findall('registry_section/set_value'): info = { 'Key': reg.attrib['key'], 'Value': reg.attrib['value'], 'Action': 'set' } regs.append(info) svcs = [] for svc in proc.findall('service_section/create_service'): info = { 'Display Name': svc.attrib['displayname'], 'Image Path': svc.attrib['imagepath'], 'Action': 'create' } svcs.append(info) for svc in proc.findall('service_section/start_service'): info = { 'Display Name': svc.attrib['displayname'], 'Action': 'start' } svcs.append(info) sysinfo = [] for si in proc.findall('system_info_section/check_for_debugger'): sysinfo.append(si.attrib['apifunction']) stats = { 'PID': pid, 'Loaded DLLs': ', '.join([dll for dll in dlls]), 'Files': files, 'Processes': procs, 'Requested hosts': ', '.join([host for host in hosts]), 'Created mutexes': ', '.join([mutex for mutex in mutexes]), 'Registry keys': regs, 'Created services': svcs, 'Hooks': ', '.join([hook for hook in hooks]), 'System checks': ', '.join([si for si in sysinfo]) } self._add_result('Processes', filename, stats) it = root.getiterator('running_process') for proc in it: stats = {'PID': proc.attrib['pid'], 'PPID': proc.attrib['ppid']} self._add_result('Running processes', proc.attrib['filename'], stats) it = root.getiterator('flows') for flow in it: info = { 'Source IP': flow.attrib['src_ip'], 'Source Port': flow.attrib['src_port'], 'Dest Port': flow.attrib['dst_port'], 'Bytes': flow.attrib['bytes'] } if flow.attrib['protocol'] == '6': proto = 'TCP' elif flow.attrib['protocol'] == '17': proto = 'UDP' else: proto = flow.attrib['protocol'] info['Protocol'] = proto self._add_result('Flows', flow.attrib['dst_ip'], info) it = root.getiterator('dns') for dns in it: info = { 'Type': dns.attrib['type'], 'IP': dns.attrib.get('ip', 'Not resolved.') } self._add_result('DNS', dns.attrib['rr'], info) it = root.getiterator('http') for http in it: info = { 'User Agent': http.attrib['user_agent'], 'Type': http.attrib['type'] } self._add_result('HTTP', http.text, info)
class PassiveTotalService(Service): """ Check the PassiveTotal database to see if it contains this domain or IP This service reliess on a user's allowed searches within the PassiveTotal system which are earned through accurate domain/IP classifications Requires an API key available from passivetotal.org """ name = "passivetotal_lookup" version = '1.0.0' type_ = Service.TYPE_CUSTOM supported_types = ['Domain', 'IP'] required_fields = [] default_config = [ ServiceConfigOption('pt_api_key', ServiceConfigOption.STRING, description="Required. Obtain from PassiveTotal.", required=True, private=True), ServiceConfigOption('pt_query_url', ServiceConfigOption.STRING, default='https://www.passivetotal.org/api/query/', required=True, private=True), ] def _scan(self, context): headers = {'Content-type': 'application/json', 'Accept': 'text/plain'} apiKey = self.config.get('pt_api_key', '') queryUrl = self.config.get('pt_query_url', '') if not apiKey: self._error("PassiveTotal API key is invalid or blank") if context.crits_type == 'Domain': params = json.dumps({ 'value': context.domain_dict['domain'], 'apiKey': apiKey }) elif context.crits_type == 'IP': params = json.dumps({ 'value': context.ip_dict['ip'], 'apiKey': apiKey }) else: logger.error("PassiveTotal: Invalid type.") self._error("Invalid type.") return try: response = requests.post(queryUrl, data=params, headers=headers) except Exception as e: logger.error("PassiveTotal: network connection error (%s)" % e) self._error("Network connection error checking PassiveTotal (%s)" % e) return loaded = json.loads(response.content) # handling a valid response if loaded['resultCount'] == 0: return if len(loaded['errors']) > 0: logger.error("PassiveTotal: query error (%s)" % str(loaded['errors'])) self._error("PassiveTotal: query error (%s)" % str(loaded['errors'])) results = loaded['results'] if context.crits_type == 'Domain': for resolve in results['resolutions']: stats = { 'value': results['focusPoint'], 'first_seen': resolve['firstSeen'], 'last_seen': resolve['lastSeen'], 'source': ','.join(resolve['source']), 'as_name': resolve['as_name'], 'asn': resolve['asn'], 'country': resolve['country'], 'network': resolve['network'] } self._add_result('Passive DNS Data', resolve['value'], stats) elif context.crits_type == 'IP': stats = { 'as_name': results['as_name'], 'asn': results['asn'], 'country': results['country'], 'firstSeen': results['firstSeen'], 'lastSeen': results['lastSeen'], 'network': results['network'] } self._add_result('Metadata', results['focusPoint'], stats) for resolve in results['resolutions']: stats = { 'firstSeen': resolve['firstSeen'], 'lastSeen': resolve['lastSeen'], 'source': ','.join(resolve['source']), 'whois': resolve.get('whois', {}) } self._add_result('Passive DNS Data', resolve['value'], stats)
class PEInfoService(Service): """ Extract metadata about Windows PE/COFF files. Leverages a combination of pefile python module and some custom code to parse the structures of a PE/COFF binary and extract metadata about its sections, imports, exports, debug information and timestamps. """ name = "peinfo" version = '1.1.3' type_ = Service.TYPE_CUSTOM supported_types = ['Sample'] default_config = [ ServiceConfigOption( 'extract_all_resource', ServiceConfigOption.BOOL, description="Create new samples for all resource files"), ] @staticmethod def valid_for(context): # Only run on PE files return context.is_pe() def _get_pehash(self, exe): #image characteristics img_chars = bitstring.BitArray(hex(exe.FILE_HEADER.Characteristics)) #pad to 16 bits img_chars = bitstring.BitArray(bytes=img_chars.tobytes()) img_chars_xor = img_chars[0:7] ^ img_chars[8:15] #start to build pehash pehash_bin = bitstring.BitArray(img_chars_xor) #subsystem - sub_chars = bitstring.BitArray(hex(exe.FILE_HEADER.Machine)) #pad to 16 bits sub_chars = bitstring.BitArray(bytes=sub_chars.tobytes()) sub_chars_xor = sub_chars[0:7] ^ sub_chars[8:15] pehash_bin.append(sub_chars_xor) #Stack Commit Size stk_size = bitstring.BitArray( hex(exe.OPTIONAL_HEADER.SizeOfStackCommit)) stk_size_bits = string.zfill(stk_size.bin, 32) #now xor the bits stk_size = bitstring.BitArray(bin=stk_size_bits) stk_size_xor = stk_size[8:15] ^ stk_size[16:23] ^ stk_size[24:31] #pad to 8 bits stk_size_xor = bitstring.BitArray(bytes=stk_size_xor.tobytes()) pehash_bin.append(stk_size_xor) #Heap Commit Size hp_size = bitstring.BitArray(hex(exe.OPTIONAL_HEADER.SizeOfHeapCommit)) hp_size_bits = string.zfill(hp_size.bin, 32) #now xor the bits hp_size = bitstring.BitArray(bin=hp_size_bits) hp_size_xor = hp_size[8:15] ^ hp_size[16:23] ^ hp_size[24:31] #pad to 8 bits hp_size_xor = bitstring.BitArray(bytes=hp_size_xor.tobytes()) pehash_bin.append(hp_size_xor) #Section chars for section in exe.sections: #virutal address sect_va = bitstring.BitArray(hex(section.VirtualAddress)) sect_va = bitstring.BitArray(bytes=sect_va.tobytes()) pehash_bin.append(sect_va) #rawsize sect_rs = bitstring.BitArray(hex(section.SizeOfRawData)) sect_rs = bitstring.BitArray(bytes=sect_rs.tobytes()) sect_rs_bits = string.zfill(sect_rs.bin, 32) sect_rs = bitstring.BitArray(bin=sect_rs_bits) sect_rs = bitstring.BitArray(bytes=sect_rs.tobytes()) sect_rs_bits = sect_rs[8:31] pehash_bin.append(sect_rs_bits) #section chars sect_chars = bitstring.BitArray(hex(section.Characteristics)) sect_chars = bitstring.BitArray(bytes=sect_chars.tobytes()) sect_chars_xor = sect_chars[16:23] ^ sect_chars[24:31] pehash_bin.append(sect_chars_xor) #entropy calulation address = section.VirtualAddress size = section.SizeOfRawData raw = exe.write()[address + size:] if size == 0: kolmog = bitstring.BitArray(float=1, length=32) pehash_bin.append(kolmog[0:7]) continue bz2_raw = bz2.compress(raw) bz2_size = len(bz2_raw) #k = round(bz2_size / size, 5) k = bz2_size / size kolmog = bitstring.BitArray(float=k, length=32) pehash_bin.append(kolmog[0:7]) m = hashlib.sha1() m.update(pehash_bin.tobytes()) output = m.hexdigest() self._add_result('PEhash value', "%s" % output, {'Value': output}) def _scan(self, context): try: pe = pefile.PE(data=context.data) except pefile.PEFormatError as e: self._error("A PEFormatError occurred: %s" % e) return self._get_sections(pe) self._get_pehash(pe) if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): self._dump_resource_data("ROOT", pe.DIRECTORY_ENTRY_RESOURCE, pe) else: self._debug("No resources") if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): self._get_imports(pe) else: self._debug("No imports") if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): self._get_exports(pe) else: self._debug("No exports") if hasattr(pe, 'VS_VERSIONINFO'): self._get_version_info(pe) else: self._debug("No Version information") if hasattr(pe, 'DIRECTORY_ENTRY_DEBUG'): self._get_debug_info(pe) else: self._debug("No debug info") if hasattr(pe, 'DIRECTORY_ENTRY_TLS'): self._get_tls_info(pe) else: self._debug("No TLS info") if callable(getattr(pe, 'get_imphash', None)): self._get_imphash(pe) else: self._debug( "pefile does not support get_imphash, upgrade to 1.2.10-139") self._get_timestamp(pe) self._get_rich_header(pe) # http://www.ntcore.com/files/richsign.htm def _get_rich_header(self, pe): rich_hdr = pe.parse_rich_header() if not rich_hdr: return data = {"raw": str(rich_hdr['values'])} self._add_result('rich_header', hex(rich_hdr['checksum']), data) # Generate a signature of the block. Need to apply checksum # appropriately. The hash here is sha256 because others are using # that here. # # Most of this code was taken from pefile but modified to work # on the start and checksum blocks. try: rich_data = pe.get_data(0x80, 0x80) if len(rich_data) != 0x80: return None data = list(struct.unpack("<32I", rich_data)) except pefile.PEFormatError as e: return None checksum = data[1] headervalues = [] for i in xrange(len(data) / 2): if data[2 * i] == 0x68636952: # Rich if data[2 * i + 1] != checksum: self._parse_error('Rich Header corrupted') break headervalues += [ data[2 * i] ^ checksum, data[2 * i + 1] ^ checksum ] sha_256 = hashlib.sha256() for hv in headervalues: sha_256.update(struct.pack('<I', hv)) self._add_result('rich_header', sha_256.hexdigest(), None) def _get_imphash(self, pe): imphash = pe.get_imphash() self._add_result('imphash', imphash, {'import_hash': imphash}) def _dump_resource_data(self, name, dir, pe): for i in dir.entries: try: if hasattr(i, 'data'): x = i.data rva = x.struct.OffsetToData rname = "%s_%s_%s" % (name, i.name, x.struct.name) size = x.struct.Size data = pe.get_memory_mapped_image()[rva:rva + size] if not data: data = "" if len(data) > 0: if (self.config['extract_all_resource'] or data[:2] == 'MZ' or data[:4] == "%%PDF"): self._debug( "Adding new file from resource len %d - %s" % (len(data), rname)) self._add_file(data, filename=rname, relationship="Extracted_From") results = { "resource_type": x.struct.name.decode('UTF-8', errors='replace'), "resource_id": i.id, "language": x.lang, "sub_language": x.sublang, "address": x.struct.OffsetToData, "size": len(data), "md5": hashlib.md5(data).hexdigest(), } self._debug("Adding result for resource %s" % i.name) self._add_result('pe_resource', x.struct.name, results) if hasattr(i, "directory"): self._debug("Parsing next directory entry %s" % i.name) self._dump_resource_data(name + "_%s" % i.name, i.directory, pe) except Exception as e: self._parse_error("Resource directory entry", e) def _get_sections(self, pe): for section in pe.sections: try: section_name = section.Name.decode('UTF-8', errors='replace') if section_name == "": section_name = "NULL" data = { "virt_address": section.VirtualAddress, "virt_size": section.Misc_VirtualSize, "size": section.SizeOfRawData, "md5": section.get_hash_md5(), "entropy": section.get_entropy(), } self._add_result('pe_section', section_name, data) except Exception as e: self._parse_error("section info", e) continue def _get_imports(self, pe): try: for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: if imp.name: name = imp.name else: name = "%s#%s" % (entry.dll, imp.ordinal) data = { "dll": "%s" % entry.dll, "ordinal": "%s" % imp.ordinal, } self._add_result('pe_import', name, data) except Exception as e: self._parse_error("imports", e) def _get_exports(self, pe): try: for entry in pe.DIRECTORY_ENTRY_EXPORT.symbols: data = { "rva_offset": pe.OPTIONAL_HEADER.ImageBase + entry.address } self._add_result('pe_export', entry.name, data) except Exception as e: self._parse_error("exports", e) def _get_timestamp(self, pe): try: timestamp = pe.FILE_HEADER.TimeDateStamp time_string = strftime('%Y-%m-%d %H:%M:%S', localtime(timestamp)) data = {"raw": timestamp} self._add_result('pe_timestamp', time_string, data) except Exception as e: self._parse_error("timestamp", e) def _get_debug_info(self, pe): # woe is pefile when it comes to debug entries # we're mostly interested in codeview stuctures, namely NB10 and RSDS try: for dbg in pe.DIRECTORY_ENTRY_DEBUG: dbg_path = "" if hasattr(dbg.struct, "Type"): result = { 'MajorVersion': dbg.struct.MajorVersion, 'MinorVersion': dbg.struct.MinorVersion, 'PointerToRawData': dbg.struct.PointerToRawData, 'SizeOfData': dbg.struct.SizeOfData, 'TimeDateStamp': dbg.struct.TimeDateStamp, 'TimeDateString': strftime('%Y-%m-%d %H:%M:%S', localtime(dbg.struct.TimeDateStamp)), 'Type': dbg.struct.Type, 'subtype': 'pe_debug', } # type 0x2 is codeview, though not any specific version # for other types we don't parse them yet # but sounds like a great project for an enterprising CRITs coder... if dbg.struct.Type == 0x2: debug_offset = dbg.struct.PointerToRawData debug_size = dbg.struct.SizeOfData # ok, this probably isn't right, fix me if debug_size < 0x200 and debug_size > 0: # there might be a better way than __data__ in pefile to get the raw data # i think that get_data uses RVA's, which this requires physical address debug_data = pe.__data__[ debug_offset:debug_offset + debug_size] # now we need to check the codeview version, # http://www.debuginfo.com/articles/debuginfomatch.html # as far as I can tell the gold is in RSDS and NB10 if debug_data[:4] == "RSDS": result.update({ 'DebugSig': debug_data[0x00:0x04], 'DebugGUID': binascii.hexlify(debug_data[0x04:0x14]), 'DebugAge': struct.unpack('I', debug_data[0x14:0x18])[0], }) if dbg.struct.SizeOfData > 0x18: dbg_path = debug_data[0x18:dbg.struct. SizeOfData - 1].decode( 'UTF-8', errors='replace') result.update({ 'DebugPath': "%s" % dbg_path, 'result': "%s" % dbg_path, }) if debug_data[:4] == "NB10": result.update({ 'DebugSig': debug_data[0x00:0x04], 'DebugTime': struct.unpack('I', debug_data[0x08:0x0c])[0], 'DebugAge': struct.unpack('I', debug_data[0x0c:0x10])[0], }) if dbg.struct.SizeOfData > 0x10: dbg_path = debug_data[0x10:dbg.struct. SizeOfData - 1].decode( 'UTF-8', errors='replace') result.update({ 'DebugPath': "%s" % dbg_path, 'result': "%s" % dbg_path, }) self._add_result('pe_debug', dbg_path, result) except Exception as e: self._parse_error("could not extract debug info", e) def _get_version_info(self, pe): if hasattr(pe, 'FileInfo'): try: for entry in pe.FileInfo: if hasattr(entry, 'StringTable'): for st_entry in entry.StringTable: for str_entry in st_entry.entries.items(): try: value = str_entry[1].encode('ascii') result = { 'key': str_entry[0], 'value': value, } except: value = str_entry[1].encode( 'ascii', errors='ignore') raw = binascii.hexlify( str_entry[1].encode('utf-8')) result = { 'key': str_entry[0], 'value': value, 'raw': raw, } result_name = str_entry[0] + ': ' + value[:255] self._add_result('version_info', result_name, result) elif hasattr(entry, 'Var'): for var_entry in entry.Var: if hasattr(var_entry, 'entry'): for key in var_entry.entry.keys(): try: value = var_entry.entry[key].encode( 'ascii') result = { 'key': key, 'value': value, } except: value = var_entry.entry[key].encode( 'ascii', errors='ignore') raw = binascii.hexlify( var_entry.entry[key]) result = { 'key': key, 'value': value, 'raw': raw, } result_name = key + ': ' + value self._add_result('version_var', result_name, result) except Exception as e: self._parse_error("version info", e) def _get_tls_info(self, pe): self._info("TLS callback table listed at 0x%08x" % pe.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks) callback_array_rva = pe.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks - pe.OPTIONAL_HEADER.ImageBase # read the array of TLS callbacks until we hit a NULL ptr (end of array) idx = 0 callback_functions = [] while pe.get_dword_from_data( pe.get_data(callback_array_rva + 4 * idx, 4), 0): callback_functions.append( pe.get_dword_from_data( pe.get_data(callback_array_rva + 4 * idx, 4), 0)) idx += 1 # if we start with a NULL ptr, then there are no callback functions if idx == 0: self._info("No TLS callback functions supported") else: for idx, va in enumerate(callback_functions): va_string = "0x%08x" % va self._info("TLS callback function at %s" % va_string) data = {'Callback Function': idx} self._add_result('tls_callback', va_string, data) def _parse_error(self, item, e): self._error("Error parsing %s (%s): %s" % (item, e.__class__.__name__, e))
class SSDeepService(Service): """ Compare sample to others with ssdeep. """ name = "ssdeep_compare" version = '1.0.2' type_ = Service.TYPE_CUSTOM purpose = "comparison" supported_types = ['Sample'] # TODO: Figure out how to do this. #required_fields = ['ssdeep', 'mimetype'] default_config = [ ServiceConfigOption('threshold', ServiceConfigOption.INT, description="Min threshold for match", required=True, private=False, default=50), ] def _scan(self, context): threshold = self.config.get("threshold", 50) target_ssdeep = context.sample_dict.get('ssdeep', None) target_md5 = context.md5 target_mimetype = context.sample_dict.get('mimetype', None) if not target_ssdeep: logger.error = "Could not get the target ssdeep value for sample" self._error("Could not get the target ssdeep value for sample") return # setup the sample space to compare against # first use the mimetype as a comparator if available query_filter = {} if target_mimetype: query_filter['mimetype'] = target_mimetype # then use only samples with a multiple of chunksize chunk_size = int(target_ssdeep.split(":")[0]) query_filter["$or"] = [] query_filter["$or"].append( {"ssdeep": { "$regex": "^%d:" % chunk_size * 2 }}) query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % chunk_size}}) query_filter["$or"].append( {"ssdeep": { "$regex": "^%d:" % (chunk_size / 2) }}) result_filter = {'md5': 1, 'ssdeep': 1} candidate_space = self._fetch_meta(query_filter, result_filter) match_list = [] for candidate in candidate_space: if "ssdeep" in candidate: score = pydeep.compare(target_ssdeep, candidate["ssdeep"]) if score >= threshold and candidate["md5"] != target_md5: match_list.append({ 'md5': candidate["md5"], 'score': score }) # finally sort the results match_list.sort(key=lambda sample: sample["score"], reverse=True) for match in match_list: self._add_result("ssdeep_match", match["md5"], { 'md5': match["md5"], 'score': match["score"] })
class OpenDNSService(Service): """ Request more information about an artifacts from OpenDNS """ name = "opendns_investigate" version = '1.0.0' type_ = Service.TYPE_CUSTOM template = "opendns_service_template.html" supported_types = ['Domain', 'IP'] required_fields = [] default_config = [ ServiceConfigOption('Investigate_API_Token', ServiceConfigOption.STRING, description="Required. Obtain from OpenDNS.", required=True, private=True), ServiceConfigOption('Investigate_URI', ServiceConfigOption.STRING, default='https://investigate.api.opendns.com', required=True, private=True), ] def _replace(self, string): return string.replace("_", " ") def _scan(self, context): token = self.config.get('Investigate_API_Token', '') uri = self.config.get('Investigate_URI', '') headers = {'Authorization': 'Bearer ' + token} reqs = {} resps = {} scores = {u'-1': 'Bad', u'0': 'Unknown', u'1': 'Good'} if not token: self._error("A valid API token is required to use this service.") if context.crits_type == 'Domain': thing = context.domain_dict['domain'] reqs[ "categorization"] = "/domains/categorization/" + context.domain_dict[ 'domain'] + "?showLabels" reqs["score"] = "/domains/score/" + context.domain_dict['domain'] reqs[ "recommendations"] = "/recommendations/name/" + context.domain_dict[ 'domain'] + ".json" reqs["links"] = "/links/name/" + context.domain_dict[ 'domain'] + ".json" reqs["security"] = "/security/name/" + context.domain_dict[ 'domain'] + ".json" reqs["latest_tags"] = "/domains/" + context.domain_dict[ 'domain'] + "/latest_tags" reqs["dnsdb"] = "/dnsdb/name/a/" + context.domain_dict[ 'domain'] + ".json" elif context.crits_type == 'IP': thing = context.ip_dict['ip'] reqs["dnsdb"] = "/dnsdb/ip/a/" + context.ip_dict['ip'] + ".json" reqs["latest_domains"] = "/ips/" + context.ip_dict[ 'ip'] + "/latest_domains" else: logger.error("Unsupported type.") self._error("Unsupported type.") return try: for r in reqs.keys(): resp = requests.get(uri + reqs[r], headers=headers) if resp.status_code == 204: logger.error( "No content status returned from request: %s" % (r)) self._error("No content status returned from request: %s" % (r)) resps[ r] = "No content status returned from request: %s" % ( r) elif resp.status_code != 200: logger.error("Request: %s, error, %s" % (r, resp.reason)) self._error("Request: %s, error, %s" % (r, resp.reason)) resps[r] = "Request: %s, error, %s" % (r, resp.reason) else: resps[r] = json.loads(self._replace(resp.content)) except Exception as e: logger.error("Network connection or HTTP request error (%s)" % e) self._error("Network connection or HTTP request error (%s)" % e) return for r in resps.keys(): if r == 'categorization': self._add_result(r, thing, resps[r][thing]) elif r == 'score': self._add_result(r, thing, {'Score': scores[resps[r][thing]]}) elif r == 'dnsdb': self._add_result(r, thing, resps[r]['features']) elif r == 'security': self._add_result(r, thing, resps[r]) elif r == 'latest_tags': for tag in resps[r]: self._add_result(r, thing, tag) elif r == 'recommendations': self._add_result(r, thing, resps[r]) elif r == 'links': self._add_result(r, thing, resps[r]) elif r == 'latest_domains': for domain in resps[r]: self._add_result(r, domain['name'], domain) else: self._add_result(r, thing, {str(type(resps[r])): str(resps[r])}) logger.error("Unsure how to handle %s" % (str(resps[r]))) self._error("Unsure how to handle %s" % (str(resps[r])))
class TAXIIClient(Service): """ Send TAXII message to TAXII server. """ name = "taxii_service" version = "1.0.1" type_ = Service.TYPE_CUSTOM supported_types = ['Event'] required_fields = ['_id'] rerunnable = True template = "taxii_service_results.html" default_config = [ ServiceConfigOption('hostname', ServiceConfigOption.STRING, description="TAXII Server hostname.", default=None, required=True, private=True), ServiceConfigOption( 'keyfile', ServiceConfigOption.STRING, description="Location of your keyfile on the server.", default=None, required=True, private=True), ServiceConfigOption( 'certfile', ServiceConfigOption.STRING, description="Location of your certfile on the server.", default=None, required=True, private=True), ServiceConfigOption('data_feed', ServiceConfigOption.STRING, description="Your TAXII Data Feed Name.", default=None, required=True, private=True), ServiceConfigOption('certfiles', ServiceConfigOption.LIST, description=("Comma-delimited list of CRITs Source" " name, TAXII feed name, and" " corresponding certificate" " file on disk for that source."), default=None, required=True, private=True), ] @classmethod def _validate(cls, config): hostname = config.get("hostname", "").strip() keyfile = config.get("keyfile", "").strip() certfile = config.get("certfile", "").strip() data_feed = config.get("data_feed", "").strip() certfiles = config.get("certfiles", "") if not hostname: raise ServiceConfigError("You must specify a TAXII Server.") if not keyfile: raise ServiceConfigError("You must specify a keyfile location.") if not os.path.isfile(keyfile): raise ServiceConfigError("keyfile does not exist.") if not certfile: raise ServiceConfigError("You must specify a certfile location.") if not os.path.isfile(certfile): raise ServiceConfigError("certfile does not exist.") if not data_feed: raise ServiceConfigError("You must specify a TAXII Data Feed.") if not certfiles: raise ServiceConfigError("You must specify at least one certfile.") for crtfile in certfiles: try: (source, feed, filepath) = crtfile.split(',') except ValueError: raise ServiceConfigError( ("You must specify a source, feed name" ", and certificate path for each source.")) source.strip() feed.strip() filepath.strip() if not does_source_exist(source): raise ServiceConfigError("Invalid source: %s" % source) if not os.path.isfile(filepath): raise ServiceConfigError("certfile does not exist: %s" % filepath) def __init__(self, *args, **kwargs): super(TAXIIClient, self).__init__(*args, **kwargs) logger.debug("Initializing TAXII Client.") self.hostname = self.config['hostname'].strip() self.keyfile = self.config['keyfile'].strip() self.certfile = self.config['certfile'].strip() self.certfiles = self.config['certfiles'] def _scan(self, context): #TODO: not sure if this should come after we make the TAXII message # so the check is closer to actual submission time? if not resolve_taxii_server(self.hostname): self._error("Cannot contact TAXII Server: %s" % self.hostname) return else: self._info("TAXII Server Online: %s" % self.hostname) self._notify() client = tc.HttpClient() client.setUseHttps(True) client.setAuthType(tc.HttpClient.AUTH_CERT) client.setAuthCredentials({ 'key_file': self.keyfile, 'cert_file': self.certfile }) if settings.HTTP_PROXY: proxy = settings.HTTP_PROXY if not proxy.startswith('http://'): proxy = 'http://' + proxy client.setProxy(proxy, proxy_type=tc.HttpClient.PROXY_HTTPS) event_list = Event.objects(id=context._id) if len(event_list) < 1: self._info("Could not locate event in the database") self._notify() else: event_data = event_list[0] (stix_doc, final_sources, final_objects) = event_data.to_stix(context.username) if len(final_sources) < 1: self._error( "No sources to send to! Ensure all related content is marked as releasable!" ) return final_objects.append(event_data) # collect the list of data feeds to send this message to destination_feeds = [] for crtfile in self.certfiles: (source, feed, filepath) = crtfile.split(',') if source.strip() in final_sources: destination_feeds.append( (source.strip(), feed.strip(), filepath.strip())) self._info("Generating STIX document(s).") self._notify() inbox_messages = [] # generate inbox messages # for now we will send one message per feed to isolate failures to one # feed submission and not prevent other messages from being sent. for feed in destination_feeds: # Create encrypted block encrypted_block = encrypt_block( tm.ContentBlock(content_binding=t.CB_STIX_XML_10, content=stix_doc.to_xml()).to_xml(), feed[2]) # Wrap encrypted block in content block content_block = tm.ContentBlock(content_binding="SMIME", content=encrypted_block) # Create inbox message inbox_message = tm.InboxMessage( message_id=tm.generate_message_id(), content_blocks=[content_block], extended_headers={'TargetFeed': feed[1]}) inbox_messages.append((feed[0], inbox_message)) self._info("Sending TAXII message(s)") self._notify() # send messages for (src, inbox_msg) in inbox_messages: response = client.callTaxiiService2( self.hostname, "/inbox/", t.VID_TAXII_XML_10, inbox_message.to_xml()) taxii_message = t.get_message_from_http_response( response, inbox_message.message_id) if taxii_message.status_type == tm.ST_SUCCESS: # update releasability for objects date = datetime.datetime.now() instance = Releasability.ReleaseInstance( analyst=context.username, date=date) for idx in enumerate(final_objects): final_objects[idx[0]].add_releasability_instance( name=src, instance=instance) self._add_result(self.name, "Success", {'recipient': src}) else: self._add_result(self.name, "Failure", {'recipient': src}) # save releasability to database self._info( "Updated releasability status for all related content.") self._notify() for obj in final_objects: obj.save() return
class MetaCapService(Service): """ Run a PCAP through ChopShop's MetaCap module. """ name = "MetaCap" version = '0.0.2' type_ = Service.TYPE_CUSTOM template = "metacap_service_template.html" supported_types = ['PCAP'] default_config = [ ServiceConfigOption('basedir', ServiceConfigOption.STRING, description="A base directory where all the ChopShop modules and libraries exist.", default=None, private=True, required=True), ServiceConfigOption('tcpdump', ServiceConfigOption.STRING, description="Full path to tcpdump binary.", default="/usr/sbin/tcpdump", private=True, required=True), ServiceConfigOption('tshark', ServiceConfigOption.STRING, description="Full path to tshark binary.", default="/usr/bin/tshark", private=True, required=True), ] def __init__(self, *args, **kwargs): super(MetaCapService, self).__init__(*args, **kwargs) logger.debug("Initializing MetaCap service.") self.base_dir = self.config['basedir'] self.modules = "metacap -b" def _scan(self, context): logger.debug("Setting up shop...") shop_path = "%s/shop" % self.base_dir if not os.path.exists(self.base_dir): raise ServiceConfigError("ChopShop path does not exist") elif not os.path.exists(shop_path): raise ServiceConfigError("ChopShop shop path does not exist") else: sys.path.append(shop_path) from ChopLib import ChopLib from ChopUi import ChopUi logger.debug("Scanning...") choplib = ChopLib() chopui = ChopUi() choplib.base_dir = self.base_dir # XXX: Convert from unicode to str... choplib.modules = str(self.modules) chopui.jsonout = jsonhandler choplib.jsonout = True # ChopShop (because of pynids) needs to read a file off disk. # The services framework forces you to use 'with' here. It's not # possible to just get a path to a file on disk. with self._write_to_file() as pcap_file: choplib.filename = pcap_file chopui.bind(choplib) chopui.start() chopui.jsonclass.set_service(self) choplib.start() while chopui.is_alive(): time.sleep(.1) chopui.join() choplib.finish() choplib.join()
class OfficeMetaService(Service): """ Parses meta data from Office documents using a custom parser. """ name = "office_meta" version = '1.0.2' type_ = Service.TYPE_CUSTOM supported_types = ['Sample'] default_config = [ ServiceConfigOption( 'overwrite', ServiceConfigOption.BOOL, description="Whether the previous results should be overwritten."), ServiceConfigOption( 'save_streams', ServiceConfigOption.BOOL, description="Whether streams should be added as new samples."), ] @staticmethod def valid_for(context): office_magic = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" if context.data: return office_magic in context.data return False def _scan(self, context): oparser = OfficeParser(context.data) oparser.parse_office_doc() if not oparser.office_header.get('maj_ver'): self._error("Could not parse file as an office document") return self._add_result( 'office_header', '%d.%d' % (oparser.office_header.get('maj_ver'), oparser.office_header.get('min_ver'))) for curr_dir in oparser.directory: result = { 'md5': curr_dir.get('md5', ''), 'size': curr_dir.get('stream_size', 0), 'mod_time': oparser.timestamp_string(curr_dir['modify_time'])[1], 'create_time': oparser.timestamp_string(curr_dir['create_time'])[1], } self._add_result('directory', curr_dir['norm_name'], result) if self.config.get('save_streams', 0) == 1 and 'data' in curr_dir: self._add_file(curr_dir['data'], curr_dir['norm_name'], relationship="Extracted_From") for prop_list in oparser.properties: for prop in prop_list['property_list']: prop_summary = oparser.summary_mapping.get( binascii.unhexlify(prop['clsid']), None) prop_name = prop_summary.get('name', 'Unknown') for item in prop['properties']['properties']: result = { 'name': item.get('name', 'Unknown'), 'value': item.get('date', item['value']), 'result': item.get('result', ''), } self._add_result('doc_meta', prop_name, result) def _parse_error(self, item, e): self._error("Error parsing %s (%s): %s" % (item, e.__class__.__name__, e))
class YaraService(Service): """ Scan a file using Yara signatures. """ name = "yara" version = '1.1.2' type_ = Service.TYPE_CUSTOM supported_types = ['Sample'] default_config = [ ServiceConfigOption('sigdir', ServiceConfigOption.STRING, description= "A base directory where all the signature files exist. It is prepended to " "each sigfile to determine the complete path to the signature file.", private=True), ServiceConfigOption('sigfiles', ServiceConfigOption.LIST, description= "A list of signature files. If `sigdir` is defined, each " "sigfile should relative to this directory; otherwise it should be an " "absolute path. Do not put quotes around file names.", required=True) ] @staticmethod def validate(config): #Try to compile the rules files. YaraService._compile_rules(config['sigdir'], config['sigfiles']) def __init__(self, *args, **kwargs): super(YaraService, self).__init__(*args, **kwargs) logger.debug("Initializing Yara scanner.") self.sigsets = self._compile_rules(self.config['sigdir'], self.config['sigfiles']) @staticmethod def _compile_rules(sigdir, sigfiles): if not sigfiles: raise ServiceConfigError("No signature files specified.") logger.debug("Sigdir: %s" % sigdir) sigsets = [] for sigfile in sigfiles: sigfile = sigfile.strip() logger.debug("Sigfile: %s" % sigfile) if sigdir: abspath = os.path.abspath(os.path.join(sigdir, sigfile)) else: abspath = sigfile logger.debug("Full path to file file: %s" % abspath) filename = os.path.basename(abspath) version = sigfile.split('.')[0] try: with open(abspath, "rt") as f: data = f.read() except: logger.exception("File cannot be opened: %s" % abspath) raise sig_md5 = md5(data).hexdigest() try: rules = yara.compile(source=data) except yara.SyntaxError: message = "Not a valid yara rules file: %s" % abspath logger.exception(message) raise ServiceConfigError(message) sigsets.append({'name': filename, 'md5': sig_md5, 'rules': rules, 'version': version}) logger.debug(str(sigsets)) return sigsets def _scan(self, context): logger.debug("Scanning...") if not context.data: self._info("No data to scan, skipping") return for sigset in self.sigsets: logger.debug("Signature set name: %s" % sigset['name']) self._info("Scanning with %s (%s)" % (sigset['name'], sigset['md5'])) matches = sigset['rules'].match(data=context.data) for match in matches: strings = {} for s in match.strings: s_name = s[1] s_offset = s[0] try: s_data = s[2].decode('ascii') except UnicodeError: s_data = "Hex: " + binascii.hexlify(s[2]) s_key = "{0}-{1}".format(s_name, s_data) if s_key in strings: strings[s_key]['offset'].append(s_offset) else: strings[s_key] = { 'offset': [s_offset], 'name': s_name, 'data': s_data, } string_list = [] for key in strings: string_list.append(strings[key]) self._add_result(self.name, match.rule, {'strings': string_list})