def GetIDForHosts(fileFullPathList, DB): # Returns: (filePath, instanceID, hostname, hostID, ingest_type) hostsTest = {} hostsProcess = [] progress_total = 0 progress_current = 0 # Determine plugin_type and hostname for file_name_fullpath in fileFullPathList: hostName = None ingest_type = None loop_counter = 0 while True: if loop_counter > len(ingest_plugins_types_stack): # We ignore empty file from hosts with no appcompat data # todo: Omit suppression on verbose mode tmp_file_size = file_size(file_name_fullpath) if tmp_file_size > 500: logger.warning("No ingest plugin could process: %s (skipping file) [size: %d]" % (ntpath.basename(file_name_fullpath), tmp_file_size)) break ingest_type = ingest_plugins_types_stack[0] if ingest_plugins[ingest_type].matchFileNameFilter(file_name_fullpath): # Check magic: try: magic_check = ingest_plugins[ingest_type].checkMagic(file_name_fullpath) if isinstance(magic_check, tuple): logger.error("Report bug") else: magic_check_res = magic_check if magic_check_res: # Magic OK, go with this plugin hostName = ingest_plugins[ingest_type].getHostName(file_name_fullpath) break except Exception as e: logger.exception("Error processing: %s (%s)" % (file_name_fullpath, str(e))) # Emulate stack with list to minimize internal looping (place last used plugin at the top) ingest_plugins_types_stack.remove(ingest_type) ingest_plugins_types_stack.insert(len(ingest_plugins_types_stack), ingest_type) loop_counter += 1 if hostName is not None: if hostName in hostsTest: hostsTest[hostName].append((file_name_fullpath, ingest_plugins[ingest_type])) else: hostsTest[hostName] = [] hostsTest[hostName].append((file_name_fullpath, ingest_plugins[ingest_type])) progress_total = len(hostsTest.keys()) # Iterate over hosts. If host exists in DB grab rowID else create and grab rowID. conn = DB.appGetConn() with closing(conn.cursor()) as c: for hostName in hostsTest.keys(): assert(hostName) logger.debug("Processing host: %s" % hostName) # Check if Host exists c.execute("SELECT count(*) FROM Hosts WHERE HostName = '%s'" % hostName) data = c.fetchone()[0] if (data != 0): # Host already has at least one instance in the DB c.execute("SELECT HostID, Instances FROM Hosts WHERE HostName = '%s'" % hostName) data = c.fetchone() tmpHostID = data[0] tmpInstances = eval(data[1]) for (file_fullpath, ingest_plugin) in hostsTest[hostName]: logger.debug("Grabbing instanceID from file: %s" % file_fullpath) try: instance_ID = CalculateInstanceID(file_fullpath, ingest_plugin) except Exception: logger.error("Error parsing: %s (skipping)" % file_fullpath) traceback.print_exc(file=sys.stdout) else: if str(instance_ID) not in tmpInstances: tmpInstances.append(str(instance_ID)) hostsProcess.append((file_fullpath, instance_ID, hostName, tmpHostID, ingest_plugin)) else: logger.debug("Duplicate host and instance found: %s" %hostName) continue # Save updated Instances list c.execute("UPDATE Hosts SET Instances = %s, InstancesCounter = %d WHERE HostName = '%s'" % ('"' + str(repr(tmpInstances)) + '"', len(tmpInstances), hostName)) else: # Host does not exist. Add instance and grab the host ID. tmpInstances = [] newInstances = [] for (file_fullpath, ingest_plugin) in hostsTest[hostName]: try: instance_ID = CalculateInstanceID(file_fullpath, ingest_plugin) except Exception: logger.error("Error parsing: %s (skipping)" % file_fullpath) traceback.print_exc(file=sys.stdout) else: if str(instance_ID) not in tmpInstances: tmpInstances.append(str(instance_ID)) newInstances.append((file_fullpath, instance_ID, ingest_plugin)) c.execute("INSERT INTO Hosts VALUES (NULL,%s,%s,%d,%d,%d)" % ('"' + hostName + '"', '"' + str(repr(tmpInstances)) + '"', len(tmpInstances), 0, 0)) tmpHostID = c.lastrowid for (file_fullpath, instance_ID, ingest_plugin) in newInstances: # todo: Do we want/need each row to track from what instance it came? hostsProcess.append((file_fullpath, instance_ID, hostName, tmpHostID, ingest_plugin)) # Update progress progress_current += 1 if settings.logger_getDebugMode(): status_extra_data = " [RAM: %d%%]" % psutil_phymem_usage() else: status_extra_data = "" # logger.debug("Pre-process new hosts/instances%s" % status_extra_data) logger.info(update_progress(min(1, float(progress_current) / float(progress_total)), "Calculate ID's for new hosts/instances%s" % status_extra_data, True)) conn.commit() # Return hosts to be processed return hostsProcess
def appSearchMP(dbfilenameFullPath, searchType, search_space, options): (outputFile, maxCores) = (options.outputFile, options.maxCores) known_bad_data = None # Start timer t0 = time.time() DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__) conn = DB.appConnectDB() # If possible use the available indexes if hasattr( options, 'field_name' ) and searchType == 'LITERAL' and options.searchLiteral[0][0] not in [ '=', '>', '<' ] and DB.appIndexExistsDB(options.field_name): num_hits = namedtuple('hits', 'value') num_hits_suppressed = namedtuple('hits', 'value') (num_hits.value, num_hits_suppressed.value, results) = runIndexedSearch(dbfilenameFullPath, search_space, options) else: # Get total number of entries to search entriesCount = DB.CountEntries() logger.debug("Total entries in search space: %d" % entriesCount) # Pre-load known_bad if required if searchType == 'KNOWNBAD': known_bad_data = LoadRegexBulkSearch(options.knownbad_file) # Establish communication queues tasks = multiprocessing.JoinableQueue() resultsProducers = multiprocessing.Queue() resultsConsumers = multiprocessing.Queue() hitHistogram_queue = multiprocessing.Queue() # Start producers/consumers num_consumers = 1 num_producers = max(1, maxCores - 1) # Prep lock for progress update Producers progProducers = multiprocessing.Value('i', 0) # Prep lock for progress update Consumers progConsumers = multiprocessing.Value('i', 0) # Prep Consumers return values num_hits = multiprocessing.Value('i', 0) num_hits_suppressed = multiprocessing.Value('i', 0) logger.debug( 'Using %d cores for searching / %d cores for dumping results' % (num_producers, num_consumers)) # Queue tasks for Producers # Limit rowsPerJob to constrain memory use and ensure reasonable progress updates rowsPerJob = min((entriesCount / 8), 5000) logger.debug("RowsPerJob: %d" % rowsPerJob) num_tasks = 0 for startingRowID in range(0, entriesCount - rowsPerJob, rowsPerJob): tasks.put(Task(startingRowID, rowsPerJob - 1)) logger.debug( "Creating search job %d: [%d - %d]" % (num_tasks, startingRowID, startingRowID + rowsPerJob - 1)) num_tasks += 1 logger.debug("Creating search job %d: [%d - %d]" % (num_tasks, num_tasks * (rowsPerJob), ((num_tasks * rowsPerJob) + (entriesCount - (num_tasks * (rowsPerJob) - 1))))) # Special consideration for the last one: tasks.put( Task(num_tasks * (rowsPerJob), (entriesCount - ((num_tasks * rowsPerJob) - 1)))) logger.debug("Number of tasks: %d" % num_tasks) # Add a poison pill for each producer for i in xrange(num_producers): tasks.put(None) # Start producer threads producers = [Producer(tasks, resultsProducers, dbfilenameFullPath, progProducers, num_consumers, \ searchType, search_space, options, num_hits, known_bad_data) for i in xrange(num_producers)] for producer in producers: producer.daemon = True # Remove for debugging producer.start() # Start consumer threads consumers = [Consumer(resultsProducers, resultsConsumers, progConsumers, num_producers, outputFile, \ dbfilenameFullPath, searchType, search_space, options, num_hits, \ num_hits_suppressed, hitHistogram_queue, known_bad_data) for i in xrange(num_consumers)] for consumer in consumers: consumer.daemon = True # Remove for debugging consumer.start() # Producer progress loop while (num_tasks > progProducers.value and progProducers.value >= 0): logger.debug("Producer num_tasks: %d - v.value: %d" % (num_tasks, progProducers.value)) update_progress( min(1, float(progProducers.value) / float(num_tasks)), "Searching [%d]" % (num_hits.value - num_hits_suppressed.value)) time.sleep(0.5) update_progress( 1, "Searching [%d]" % (num_hits.value - num_hits_suppressed.value)) # Wait for consumers dumping results to finish too while (num_hits.value > progConsumers.value and progConsumers.value >= 0): logger.debug("Consuming hit: %d / %d" % (progConsumers.value, num_hits.value)) update_progress( min(1, float(progConsumers.value) / float(num_hits.value)), "Dumping results to disk [%d]" % progConsumers.value) time.sleep(0.5) # Make sure we dumped as many hits as we found assert (num_hits.value == progConsumers.value) update_progress(1, "Dumping results to disk [%d]" % progConsumers.value) # Track Consumers deaths logger.debug("Waiting for consumer reverse-poison pills") while num_consumers > 0: tmp = resultsConsumers.get() # Check for reverse-poison pill if tmp is None: num_consumers -= 1 logger.debug("Consumer finished!") logger.debug("All consumers accounted for") # Wait for consumer threads to finish logger.debug("Waiting for consumer threads to finish") for consumer in consumers: consumer.join() logger.debug("Consumer threads finished") # Print hit histogram: results = [] results.append(('cyan', ("Hit histogram:", "", ""))) while not hitHistogram_queue.empty(): (name, regex, regex_hits) = hitHistogram_queue.get() results.append(('white', (name, regex, regex_hits))) if len(results) > 1: outputcolum(results) # Stop timer t1 = time.time() logger.info("Search hits: %d" % num_hits.value) logger.info("Suppresed duplicate hits: %d" % num_hits_suppressed.value) logger.info("Search time: %s" % (str(timedelta(seconds=(t1 - t0))))) if num_hits.value: logger.info("Head:") # Dump head of output file: num_lines = file_size(options.outputFile) from itertools import islice with open(options.outputFile) as myfile: head = list(islice(myfile, 5)) for line in head: logger.info(line.strip('\n\r')) logger.info("(%d lines suppressed)" % max(0, (num_lines - 5))) return (num_hits.value, num_hits_suppressed.value, results)
def parseManifestAuditFileName(jsondata, zip_archive_filename): # Parse manifest.json data and return files which will need to be processed file_list = [] m = re.match(r'^.*(?:\\|\/)(.*)[-_].{22}\..{3}$', zip_archive_filename) if m: hostname = m.group(1) data = json.load(jsondata) if 'audits' in data: for audit in data['audits']: if 'sysinfo' in audit['generator']: continue if 'install' not in audit['generator']: if 'registry-api' in audit[ 'generator'] or 'w32registryapi' in audit[ 'generator']: for result in audit['results']: if 'application/xml' in result['type']: file_list.append( (os.path.join(zip_archive_filename, result['payload']), os.path.join( zip_archive_filename, hostname + "_" + result['payload'] + ".xml"))) else: continue elif 'plugin-execute' in audit['generator']: for result in audit['results']: if 'application/vnd.mandiant.issues+xml' not in result[ 'type']: file_list.append( (os.path.join(zip_archive_filename, result['payload']), os.path.join( zip_archive_filename, hostname + "_" + result['payload'] + ".xml"))) else: continue elif 'w32scripting-persistence' in audit['generator']: for result in audit['results']: if 'application/vnd.mandiant.issues+xml' not in result[ 'type']: file_list.append( (os.path.join(zip_archive_filename, result['payload']), os.path.join( zip_archive_filename, hostname + "_" + result['payload'] + ".xml"))) else: continue elif 'file-acquisition' in audit['generator']: for result in audit['results']: if 'application/vnd.mandiant.issues+xml' not in result[ 'type']: file_list.append( (os.path.join(zip_archive_filename, result['payload']), os.path.join( zip_archive_filename, hostname + "_" + result['payload'] + ".xml"))) else: continue # elif 'plugin' not in audit['generator'] and len(audit['results']) == 1: # file_list.append((os.path.join(zip_archive_filename, audit['results'][0]['payload']), os.path.join(zip_archive_filename, hostname+"_"+audit['results'][0]['payload']+".xml"))) # elif 'plugin' in audit['generator'] and len(audit['results']) <= 1: # pass # elif 'plugin' in audit['generator'] and len(audit['results']) == 2: # file_list.append((os.path.join(zip_archive_filename, audit['results'][1]['payload']), os.path.join(zip_archive_filename, hostname+"_"+audit['results'][0]['payload']+".xml"))) # else: # logger.error("Unknown result type/format on HX audit manifest.json: %s (please report!)" % zip_archive_filename) else: logger.warning( "HX script execution failed for host: %s, ignoring" % hostname) else: logger.error( "Unable to extract hostname on parseManifestAuditFileName: %s" % zip_archive_filename) if len(file_list) == 0: logger.warning( "No file that could be processed found on manifest.json (likely to be a failed script run) for: %s [%d bytes]" % (zip_archive_filename, file_size(zip_archive_filename))) return file_list