def _GetParserMediator( self, event_queue, parse_error_queue, knowledge_base_values=None, file_entry=None, parser_chain=None): """Retrieves a parser context object. Args: event_queue: the event queue (instance of Queue). parse_error_queue: the parse error queue (instance of Queue). knowledge_base_values: optional dict containing the knowledge base values. The default is None. file_entry: optional dfVFS file_entry object (instance of dfvfs.FileEntry) being parsed. parser_chain: Optional string containing the parsing chain up to this point. The default is None. Returns: A parser context object (instance of ParserMediator). """ event_queue_producer = queue.ItemQueueProducer(event_queue) parse_error_queue_producer = queue.ItemQueueProducer(parse_error_queue) knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in knowledge_base_values.iteritems(): knowledge_base_object.SetValue(identifier, value) new_mediator = mediator.ParserMediator( event_queue_producer, parse_error_queue_producer, knowledge_base_object) if file_entry: new_mediator.SetFileEntry(file_entry) if parser_chain: new_mediator.parser_chain = parser_chain return new_mediator
def __init__(self, collection_queue, storage_queue, parse_error_queue): """Initialize the engine object. Args: collection_queue: the collection queue object (instance of Queue). storage_queue: the storage queue object (instance of Queue). parse_error_queue: the parser error queue object (instance of Queue). """ self._collection_queue = collection_queue self._enable_debug_output = False self._enable_profiling = False self._event_queue_producer = queue.ItemQueueProducer(storage_queue) self._filter_object = None self._mount_path = None self._parse_error_queue = parse_error_queue self._parse_error_queue_producer = queue.ItemQueueProducer( parse_error_queue) self._process_archive_files = False self._profiling_sample_rate = 1000 self._profiling_type = u'all' self._source = None self._source_path_spec = None self._source_file_entry = None self._text_prepend = None self.knowledge_base = knowledge_base.KnowledgeBase() self.storage_queue = storage_queue
def setUp(self): """Sets up the objects used throughout the test.""" self._temp_directory = tempfile.mkdtemp() self._storage_filename = os.path.join(self._temp_directory, 'plaso.db') self._tag_input_filename = os.path.join(self._temp_directory, 'input1.tag') tag_input_file = open(self._tag_input_filename, 'wb') tag_input_file.write('\n'.join([ 'Test Tag', ' filename contains \'/tmp/whoaaaa\'', ' parser is \'TestEvent\' and stuff is \'dude\''])) tag_input_file.close() pfilter.TimeRangeCache.ResetTimeConstraints() # TODO: add upper queue limit. test_queue = multi_process.MultiProcessingQueue() test_queue_producer = queue.ItemQueueProducer(test_queue) test_queue_producer.ProduceItems([ TestEvent(0), TestEvent(1000), TestEvent(2000000, '/tmp/whoaaaaa'), TestEvent(2500000, '/tmp/whoaaaaa'), TestEvent(5000000, '/tmp/whoaaaaa', 'dude')]) test_queue_producer.SignalEndOfInput() storage_writer = storage.StorageFileWriter( test_queue, self._storage_filename) storage_writer.WriteEventObjects() self._storage_file = storage.StorageFile(self._storage_filename) self._storage_file.SetStoreLimit()
def testStorageWriter(self): """Test the storage writer.""" self.assertEqual(len(self._event_objects), 4) # The storage writer is normally run in a separate thread. # For the purpose of this test it has to be run in sequence, # hence the call to WriteEventObjects after all the event objects # have been queued up. # TODO: add upper queue limit. # A timeout is used to prevent the multi processing queue to close and # stop blocking the current process. test_queue = multi_process.MultiProcessingQueue(timeout=0.1) test_queue_producer = queue.ItemQueueProducer(test_queue) test_queue_producer.ProduceItems(self._event_objects) test_queue_producer.SignalAbort() with tempfile.NamedTemporaryFile() as temp_file: storage_writer = storage.FileStorageWriter(test_queue, temp_file) storage_writer.WriteEventObjects() z_file = zipfile.ZipFile(temp_file, 'r', zipfile.ZIP_DEFLATED) expected_z_filename_list = [ u'plaso_index.000001', u'plaso_meta.000001', u'plaso_proto.000001', u'plaso_timestamps.000001', u'serializer.txt' ] z_filename_list = sorted(z_file.namelist()) self.assertEqual(len(z_filename_list), 5) self.assertEqual(z_filename_list, expected_z_filename_list)
def testUniqueDomainExtraction(self): """Tests for the unique domains plugin.""" event_queue = single_process.SingleProcessQueue() knowledge_base = self._SetUpKnowledgeBase() # Fill the incoming queue with events. test_queue_producer = queue.ItemQueueProducer(event_queue) events = [ self._CreateTestEventObject(test_event) for test_event in self.TEST_EVENTS ] test_queue_producer.ProduceItems(events) # Set up the plugin. analysis_plugin = unique_domains_visited.UniqueDomainsVisitedPlugin( event_queue) analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEqual(len(analysis_reports), 1) report_text = analysis_reports[0].GetString() for event_object in self.TEST_EVENTS: self.assertIn(event_object.get(u'domain', u''), report_text)
def testViperLookup(self): """Tests for the Viper analysis plugin.""" event_queue = single_process.SingleProcessQueue() knowledge_base = self._SetUpKnowledgeBase() # Fill the incoming queue with events. test_queue_producer = queue.ItemQueueProducer(event_queue) events = [ self._CreateTestEventObject(test_event) for test_event in self.TEST_EVENTS ] test_queue_producer.ProduceItems(events) # Set up the plugin. analysis_plugin = viper.ViperAnalysisPlugin(event_queue) analysis_plugin.SetProtocol(u'http') analysis_plugin.SetHost(u'localhost') # Run the analysis plugin. analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEqual(len(analysis_reports), 1) report = analysis_reports[0] tags = report.GetTags() self.assertEqual(len(tags), 1) tag = tags[0] self.assertEqual(tag.event_uuid, u'8') expected_string = ( u'File is present in Viper. Projects: \"default\" Tags \"' u'rat, darkcomet\"') self.assertEqual(tag.tags[0], expected_string)
def _ParseFile(self, parser_object, path, knowledge_base_object): """Parses a file using the parser object. Args: parser_object: the parser object. path: the path of the file to parse. knowledge_base_object: the knowledge base object (instance of KnowledgeBase). Returns: An event object queue object (instance of Queue). """ event_queue = single_process.SingleProcessQueue() event_queue_producer = queue.ItemQueueProducer(event_queue) parse_error_queue = single_process.SingleProcessQueue() parser_mediator = parsers_mediator.ParserMediator( event_queue_producer, parse_error_queue, knowledge_base_object) path_spec = path_spec_factory.Factory.NewPathSpec( definitions.TYPE_INDICATOR_OS, location=path) file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) parser_mediator.SetFileEntry(file_entry) parser_object.Parse(parser_mediator) event_queue.SignalEndOfInput() return event_queue
def GetAnalysisPluginsAndEventQueues(self, analysis_plugins_string): """Return a list of analysis plugins and event queues. Args: analysis_plugins_string: comma separated string with names of analysis plugins to load. Returns: A tuple of two lists, one containing list of analysis plugins and the other a list of event queues. """ if not analysis_plugins_string: return [], [] # Start queues and load up plugins. event_queue_producers = [] event_queues = [] analysis_plugins_list = [ name.strip() for name in analysis_plugins_string.split(u',') ] for _ in range(0, len(analysis_plugins_list)): # TODO: add upper queue limit. analysis_plugin_queue = multi_process.MultiProcessingQueue( timeout=5) event_queues.append(analysis_plugin_queue) event_queue_producers.append( queue.ItemQueueProducer(event_queues[-1])) analysis_plugins = analysis_manager.AnalysisPluginManager.LoadPlugins( analysis_plugins_list, event_queues) analysis_plugins = list(analysis_plugins) return analysis_plugins, event_queue_producers
def _RunAnalysisPlugin(self, analysis_plugin, knowledge_base_object, output_format=u'text'): """Analyzes an event object queue using the plugin object. Args: analysis_plugin: the analysis plugin object (instance of AnalysisPlugin). knowledge_base_object: the knowledge base object (instance of KnowledgeBase). output_format: Optional output format. The default is 'text'. Returns: An event object queue object (instance of Queue). """ analysis_report_queue = single_process.SingleProcessQueue() analysis_report_queue_consumer = TestAnalysisReportQueueConsumer( analysis_report_queue) analysis_report_queue_producer = queue.ItemQueueProducer( analysis_report_queue) analysis_mediator = mediator.AnalysisMediator( analysis_report_queue_producer, knowledge_base_object, output_format=output_format) analysis_plugin.RunPlugin(analysis_mediator) analysis_report_queue.SignalEndOfInput() return analysis_report_queue_consumer
def testVirusTotalLookup(self): """Tests for the VirusTotal analysis plugin.""" event_queue = single_process.SingleProcessQueue() knowledge_base = self._SetUpKnowledgeBase() # Fill the incoming queue with events. test_queue_producer = queue.ItemQueueProducer(event_queue) events = [ self._CreateTestEventObject(test_event) for test_event in self.TEST_EVENTS ] test_queue_producer.ProduceItems(events) analysis_plugin = virustotal.VirusTotalAnalysisPlugin(event_queue) analysis_plugin.SetAPIKey(self.FAKE_API_KEY) # Run the analysis plugin. analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEqual(len(analysis_reports), 1) report = analysis_reports[0] tags = report.GetTags() self.assertEqual(len(tags), 1) tag = tags[0] self.assertEqual(tag.event_uuid, u'8') self.assertEqual(tag.tags[0], u'VirusTotal Detections 10')
def CreateParserMediator(self, event_queue=None): """Create a parser mediator object. Args: event_queue: an optional event queue object (instance of Queue). Returns: A parser mediator object (instance of parsers_mediator.ParserMediator). """ if event_queue is None: event_queue = single_process.SingleProcessQueue() event_queue_producer = queue.ItemQueueProducer(event_queue) parse_error_queue = single_process.SingleProcessQueue() parse_error_queue_producer = queue.ItemQueueProducer(parse_error_queue) return parsers_mediator.ParserMediator(event_queue_producer, parse_error_queue_producer, self.knowledge_base_object)
def setUp(self): """Sets up the needed objects used throughout the test.""" knowledge_base = self._SetUpKnowledgeBase() analysis_report_queue = single_process.SingleProcessQueue() analysis_report_queue_producer = queue.ItemQueueProducer( analysis_report_queue) self._analysis_mediator = mediator.AnalysisMediator( analysis_report_queue_producer, knowledge_base)
def setUp(self): """Makes preparations before running an individual test.""" knowledge_base = self._SetUpKnowledgeBase() analysis_report_queue = single_process.SingleProcessQueue() analysis_report_queue_producer = queue.ItemQueueProducer( analysis_report_queue) self._analysis_mediator = mediator.AnalysisMediator( analysis_report_queue_producer, knowledge_base)
def _StartAnalysisPlugins( self, storage_file_path, analysis_plugins, pre_obj, analysis_queue_port=None, analysis_report_incoming_queue=None, command_line_arguments=None): """Start all the analysis plugin. Args: storage_file_path: string containing the path of the storage file. analysis_plugins: list of analysis plugin objects (instance of AnalysisPlugin) that should be started. pre_obj: The preprocessor object (instance of PreprocessObject). analysis_queue_port: optional TCP port that the ZeroMQ analysis report queues should use. analysis_report_incoming_queue: optional queue (instance of Queue) that reports should to pushed to, when ZeroMQ is not in use. command_line_arguments: optional string of the command line arguments or None if not set. """ logging.info(u'Starting analysis plugins.') self._SetAnalysisPluginProcessInformation( storage_file_path, analysis_plugins, pre_obj, command_line_arguments=command_line_arguments) knowledge_base_object = knowledge_base.KnowledgeBase(pre_obj=pre_obj) for analysis_plugin in analysis_plugins: if self._use_zeromq: analysis_plugin_output_queue = zeromq_queue.ZeroMQPushConnectQueue( delay_open=True, port=analysis_queue_port) else: analysis_plugin_output_queue = analysis_report_incoming_queue analysis_report_queue_producer = queue.ItemQueueProducer( analysis_plugin_output_queue) completion_event = multiprocessing.Event() analysis_mediator_object = analysis_mediator.AnalysisMediator( analysis_report_queue_producer, knowledge_base_object, data_location=self._data_location, completion_event=completion_event) analysis_process = multiprocessing.Process( name=u'Analysis {0:s}'.format(analysis_plugin.plugin_name), target=analysis_plugin.RunPlugin, args=(analysis_mediator_object,)) process_info = PsortAnalysisProcess( completion_event, analysis_plugin, analysis_process) self._analysis_process_info.append(process_info) analysis_process.start() logging.info( u'Plugin: [{0:s}] started.'.format(analysis_plugin.plugin_name)) logging.info(u'Analysis plugins running')
def BuildParserMediator(self, event_queue=None): """Build the parser object. Args: event_queue: An event queue object (instance of Queue). This is optional and if a queue is not provided a default one will be provided. Returns: A parser mediator object (instance of parsers_mediator.ParserMediator). """ if event_queue is None: event_queue = single_process.SingleProcessQueue() event_queue_producer = queue.ItemQueueProducer(event_queue) parse_error_queue = single_process.SingleProcessQueue() parse_error_queue_producer = queue.ItemQueueProducer(parse_error_queue) return parsers_mediator.ParserMediator( event_queue_producer, parse_error_queue_producer, PregCache.knowledge_base_object)
def testWinAnalyzePlugin(self): """Test the plugin against mock events.""" knowledge_base = self._SetUpKnowledgeBase( knowledge_base_values={'users': self.WIN_USERS}) event_queue = single_process.SingleProcessQueue() # Fill the incoming queue with events. test_queue_producer = queue.ItemQueueProducer(event_queue) test_queue_producer.ProduceItems( [self._CreateTestEventObject(path) for path in self.WIN_PATHS]) test_queue_producer.SignalEndOfInput() # Initialize plugin. analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue) # Run the analysis plugin. analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEqual(len(analysis_reports), 1) analysis_report = analysis_reports[0] self.assertEqual(analysis_plugin._sep, u'\\') # Due to the behavior of the join one additional empty string at the end # is needed to create the last empty line. expected_text = u'\n'.join([ u' == USER: dude ==', u' Google Keep - notes and lists [hmjkmjkepdijhoojdojkdfohbdgmmhki]', u'', u' == USER: frank ==', u' Google Play Music [icppfcnhkcmnfdhfhphakoifcfokfdhg]', u' YouTube [blpcfgokakmgnkcojhhkbfbldkacnbeo]', u'', u'' ]) self.assertEqual(analysis_report.text, expected_text) self.assertEqual(analysis_report.plugin_name, 'chrome_extension_test') expected_keys = set([u'frank', u'dude']) self.assertEqual(set(analysis_report.report_dict.keys()), expected_keys)
def testMacAnalyzerPlugin(self): """Test the plugin against mock events.""" knowledge_base = self._SetUpKnowledgeBase( knowledge_base_values={'users': self.MAC_USERS}) event_queue = single_process.SingleProcessQueue() # Fill the incoming queue with events. test_queue_producer = queue.ItemQueueProducer(event_queue) test_queue_producer.ProduceItems( [self._CreateTestEventObject(path) for path in self.MAC_PATHS]) test_queue_producer.SignalEndOfInput() # Initialize plugin. analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue) # Run the analysis plugin. analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEqual(len(analysis_reports), 1) analysis_report = analysis_reports[0] self.assertEqual(analysis_plugin._sep, u'/') # Due to the behavior of the join one additional empty string at the end # is needed to create the last empty line. expected_text = u'\n'.join([ u' == USER: dude ==', u' Google Drive [apdfllckaahabafndbhieahigkjlhalf]', u'', u' == USER: frank ==', u' Gmail [pjkljhegncpnkpknbcohdijeoejaedia]', u'', u'' ]) self.assertEqual(analysis_report.text, expected_text) self.assertEqual(analysis_report.plugin_name, 'chrome_extension_test') expected_keys = set([u'frank', u'dude']) self.assertEqual(set(analysis_report.report_dict.keys()), expected_keys)
def GetAnalysisPluginsAndEventQueues(self, analysis_plugins_string): """Return a list of analysis plugins and event queues. Args: analysis_plugins_string: comma separated string with names of analysis plugins to load. Returns: A tuple of two lists, one containing list of analysis plugins and the other a list of event queues. """ if not analysis_plugins_string: return [], [] event_producers = [] # These are the queues analysis plugins will read from. analysis_plugin_input_queues = [] analysis_plugins_list = [ name.strip() for name in analysis_plugins_string.split(u',') ] for _ in range(0, len(analysis_plugins_list)): if self._use_zeromq: output_queue = zeromq_queue.ZeroMQPushBindQueue() # Open the queue so it can bind to a random port, and we can get the # port number to use in the input queue. output_queue.Open() queue_port = output_queue.port input_queue = zeromq_queue.ZeroMQPullConnectQueue( port=queue_port, delay_open=True) analysis_plugin_input_queues.append(input_queue) else: input_queue = multi_process.MultiProcessingQueue(timeout=5) analysis_plugin_input_queues.append(input_queue) output_queue = input_queue event_producers.append(queue.ItemQueueProducer(output_queue)) analysis_plugins = analysis_manager.AnalysisPluginManager.LoadPlugins( analysis_plugins_list, analysis_plugin_input_queues) analysis_plugins = list(analysis_plugins) return analysis_plugins, event_producers
def testSyntheticKeysText(self): """Test the plugin against mock events.""" event_queue = single_process.SingleProcessQueue() # Fill the incoming queue with events. test_queue_producer = queue.ItemQueueProducer(event_queue) events = [ self._CreateTestEventObject(service_event) for service_event in self.SERVICE_EVENTS ] test_queue_producer.ProduceItems(events) test_queue_producer.SignalEndOfInput() # Initialize plugin. analysis_plugin = self._CreateAnalysisPlugin(event_queue, u'text') # Run the analysis plugin. knowledge_base = self._SetUpKnowledgeBase() analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEqual(len(analysis_reports), 1) analysis_report = analysis_reports[0] expected_text = (u'Listing Windows Services\n' u'TestbDriver\n' u'\tImage Path = C:\\Dell\\testdriver.sys\n' u'\tService Type = File System Driver (0x2)\n' u'\tStart Type = Auto Start (2)\n' u'\tService Dll = \n' u'\tObject Name = \n' u'\tSources:\n' u'\t\tC:\\WINDOWS\\system32\\SYSTEM:' u'\\ControlSet001\\services\\TestbDriver\n' u'\t\tC:\\WINDOWS\\system32\\SYSTEM:' u'\\ControlSet003\\services\\TestbDriver\n\n') self.assertEqual(expected_text, analysis_report.text) self.assertEqual(analysis_report.plugin_name, 'windows_services')
def testTag(self): """Test that the tagging plugin successfully tags events.""" event_queue = single_process.SingleProcessQueue() test_queue_producer = queue.ItemQueueProducer(event_queue) events = [self._CreateTestEventObject(test_event) for test_event in self.TEST_EVENTS] test_queue_producer.ProduceItems(events) analysis_plugin = tagging.TaggingPlugin(event_queue) test_file = self._GetTestFilePath([self.TEST_TAG_FILE_NAME]) analysis_plugin.SetAndLoadTagFile(test_file) # Run the plugin. knowledge_base = self._SetUpKnowledgeBase() analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEqual(len(analysis_reports), 1) report = analysis_reports[0] self.assertEqual(len(report.GetTags()), 2)
def _RunAnalysisPlugin(self, analysis_plugin, knowledge_base_object): """Analyzes an event object queue using the plugin object. Args: analysis_plugin: the analysis plugin object (instance of AnalysisPlugin). knowledge_base_object: the knowledge base object (instance of KnowledgeBase). Returns: An event object queue object (instance of Queue). """ analysis_report_queue = single_process.SingleProcessQueue() analysis_report_queue_consumer = TestAnalysisReportQueueConsumer( analysis_report_queue) analysis_report_queue_producer = queue.ItemQueueProducer( analysis_report_queue) analysis_mediator = mediator.AnalysisMediator( analysis_report_queue_producer, knowledge_base_object) analysis_plugin.RunPlugin(analysis_mediator) return analysis_report_queue_consumer
def testStorageWriter(self): """Test the storage writer.""" test_event_objects = test_lib.CreateTestEventObjects() # The storage writer is normally run in a separate thread. # For the purpose of this test it has to be run in sequence, # hence the call to WriteEventObjects after all the event objects # have been queued up. # TODO: add upper queue limit. # A timeout is used to prevent the multi processing queue to close and # stop blocking the current process. test_queue = multi_process.MultiProcessingQueue(timeout=0.1) test_queue_producer = queue.ItemQueueProducer(test_queue) test_queue_producer.ProduceItems(test_event_objects) test_queue_producer.SignalAbort() with shared_test_lib.TempDirectory() as temp_directory: temp_file = os.path.join(temp_directory, u'plaso.db') storage_writer = writer.FileStorageWriter(test_queue, temp_file) storage_writer.WriteEventObjects() zip_file = zipfile.ZipFile(temp_file, mode='r', compression=zipfile.ZIP_DEFLATED) expected_filename_list = [ u'plaso_index.000001', u'plaso_meta.000001', u'plaso_proto.000001', u'plaso_timestamps.000001', u'serializer.txt' ] filename_list = sorted(zip_file.namelist()) self.assertEqual(len(filename_list), 5) self.assertEqual(filename_list, expected_filename_list)
def testEvents(self): """Test the plugin against mock events.""" event_queue = single_process.SingleProcessQueue() # Fill the incoming queue with events. test_queue_producer = queue.ItemQueueProducer(event_queue) events = [ self._CreateTestEventObject(event_dict) for event_dict in self.EVENTS ] test_queue_producer.ProduceItems(events) # Initialize plugin. analysis_plugin = file_hashes.FileHashesPlugin(event_queue) # Run the analysis plugin. knowledge_base = self._SetUpKnowledgeBase() analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEqual(len(analysis_reports), 1) analysis_report = analysis_reports[0] expected_text = ( u'Listing file paths and hashes\n' u'FAKE:/opt/2hash_file: alternate_test_hash=5 test_hash=4\n' u'FAKE:/opt/dfvfs: test_hash=4\n' u'FAKE:/opt/no_hash_file:\n' u'FAKE:/var/testing directory with space/file.txt: test_hash=4\n' u'FAKE:C:\\Windows\\a.file.txt: test_hash=4\n') self.assertEqual(expected_text, analysis_report.text) self.assertEqual(analysis_report.plugin_name, u'file_hashes')
def ProcessStorage(self, output_module, storage_file, analysis_plugins, event_queue_producers, deduplicate_events=True, preferred_encoding=u'utf-8', time_slice=None, use_time_slicer=False): """Processes a plaso storage file. Args: output_module: an output module (instance of OutputModule). storage_file: the storage file object (instance of StorageFile). analysis_plugins: list of analysis plugin objects (instance of AnalysisPlugin). event_queue_producers: list of event queue producer objects (instance of ItemQueueProducer). deduplicate_events: optional boolean value to indicate if the event objects should be deduplicated. The default is True. preferred_encoding: optional preferred encoding. The default is "utf-8". time_slice: optional time slice object (instance of TimeSlice). The default is None. use_time_slicer: optional boolean value to indicate the 'time slicer' should be used. The default is False. The 'time slicer' will provide a context of events around an event of interest. Returns: A counter (an instance of counter.Counter) that contains the analysis plugin results or None. Raises: RuntimeError: if a non-recoverable situation is encountered. """ if time_slice: if time_slice.event_timestamp: pfilter.TimeRangeCache.SetLowerTimestamp( time_slice.start_timestamp) pfilter.TimeRangeCache.SetUpperTimestamp( time_slice.end_timestamp) elif use_time_slicer: self._filter_buffer = bufferlib.CircularBuffer( time_slice.duration) with storage_file: storage_file.SetStoreLimit(self._filter_object) # TODO: allow for single processing. # TODO: add upper queue limit. analysis_output_queue = multi_process.MultiProcessingQueue( timeout=5) if analysis_plugins: logging.info(u'Starting analysis plugins.') # Within all preprocessing objects, try to get the last one that has # time zone information stored in it, the highest chance of it # containing the information we are seeking (defaulting to the last # one). pre_objs = storage_file.GetStorageInformation() pre_obj = pre_objs[-1] for obj in pre_objs: if getattr(obj, u'time_zone_str', u''): pre_obj = obj # Fill in the collection information. pre_obj.collection_information = {} if preferred_encoding: cmd_line = u' '.join(sys.argv) try: pre_obj.collection_information[ u'cmd_line'] = cmd_line.decode(preferred_encoding) except UnicodeDecodeError: pass pre_obj.collection_information[u'file_processed'] = ( self._storage_file) pre_obj.collection_information[ u'method'] = u'Running Analysis Plugins' analysis_plugin_names = [ plugin.NAME for plugin in analysis_plugins ] pre_obj.collection_information[ u'plugins'] = analysis_plugin_names time_of_run = timelib.Timestamp.GetNow() pre_obj.collection_information[u'time_of_run'] = time_of_run pre_obj.counter = collections.Counter() # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj knowledge_base_object = knowledge_base.KnowledgeBase( pre_obj=pre_obj) # Now we need to start all the plugins. for analysis_plugin in analysis_plugins: analysis_report_queue_producer = queue.ItemQueueProducer( analysis_output_queue) completion_event = multiprocessing.Event() analysis_mediator_object = analysis_mediator.AnalysisMediator( analysis_report_queue_producer, knowledge_base_object, data_location=self._data_location, completion_event=completion_event) analysis_process = multiprocessing.Process( name=u'Analysis {0:s}'.format( analysis_plugin.plugin_name), target=analysis_plugin.RunPlugin, args=(analysis_mediator_object, )) process_info = PsortAnalysisProcess( completion_event, analysis_plugin, analysis_process) self._analysis_process_info.append(process_info) analysis_process.start() logging.info(u'Plugin: [{0:s}] started.'.format( analysis_plugin.plugin_name)) else: event_queue_producers = [] output_buffer = output_interface.EventBuffer( output_module, deduplicate_events) with output_buffer: counter = self.ProcessOutput( storage_file, output_buffer, my_filter=self._filter_object, filter_buffer=self._filter_buffer, analysis_queues=event_queue_producers) for information in storage_file.GetStorageInformation(): if hasattr(information, u'counter'): counter[u'Stored Events'] += information.counter[u'total'] if not self._quiet_mode: logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. self._ProcessAnalysisPlugins(analysis_plugins, analysis_output_queue, storage_file, counter, preferred_encoding=preferred_encoding) if self._output_file_object: self._output_file_object.close() self._output_file_object = None if self._filter_object and not counter[u'Limited By']: counter[u'Filter By Date'] = (counter[u'Stored Events'] - counter[u'Events Included'] - counter[u'Events Filtered Out']) return counter
def _Main(self): """The main loop.""" self._event_queue_producer = queue.ItemQueueProducer( self._event_object_queue) self._parse_error_queue_producer = queue.ItemQueueProducer( self._parse_error_queue) parser_mediator = parsers_mediator.ParserMediator( self._event_queue_producer, self._parse_error_queue_producer, self._knowledge_base) # We need a resolver context per process to prevent multi processing # issues with file objects stored in images. resolver_context = context.Context() self._extraction_worker = worker.BaseEventExtractionWorker( self._worker_number, self._path_spec_queue, self._event_queue_producer, self._parse_error_queue_producer, parser_mediator, resolver_context=resolver_context) self._extraction_worker.SetEnableDebugOutput(self._enable_debug_output) self._extraction_worker.SetEnableProfiling( self._enable_profiling, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type) self._extraction_worker.SetProcessArchiveFiles( self._process_archive_files) if self._filter_object: self._extraction_worker.SetFilterObject(self._filter_object) if self._mount_path: self._extraction_worker.SetMountPath(self._mount_path) if self._text_prepend: self._extraction_worker.SetTextPrepend(self._text_prepend) # We need to initialize the parser and hasher objects after the process # has forked otherwise on Windows the "fork" will fail with # a PickleError for Python modules that cannot be pickled. self._extraction_worker.InitializeParserObjects( parser_filter_string=self._parser_filter_string) if self._hasher_names_string: self._extraction_worker.SetHashers(self._hasher_names_string) logging.debug(u'Extraction worker: {0!s} (PID: {1:d}) started'.format( self._name, self._pid)) try: self._extraction_worker.Run() except Exception as exception: logging.warning((u'Unhandled exception in extraction worker {0!s} ' u'(PID: {1:d}).').format(self._name, self._pid)) logging.exception(exception) logging.debug(u'Extraction worker: {0!s} (PID: {1:d}) stopped'.format( self._name, self._pid)) self._path_spec_queue.Close(abort=True) self._event_object_queue.Close(abort=True) self._parse_error_queue.Close(abort=True)
def ProcessStorage(self, options): """Open a storage file and processes the events within. Args: options: the command line arguments (instance of argparse.Namespace). Returns: A counter. Raises: RuntimeError: if a non-recoverable situation is encountered. """ counter = None slice_option = getattr(options, u'slice', None) if slice_option: timezone = getattr(options, u'timezone', u'UTC') if timezone == u'UTC': zone = pytz.utc else: zone = pytz.timezone(timezone) timestamp = timelib.Timestamp.FromTimeString(slice_option, timezone=zone) # Convert number of minutes to microseconds. range_operator = self._slice_size * 60 * 1000000 # Set the time range. pfilter.TimeRangeCache.SetLowerTimestamp(timestamp - range_operator) pfilter.TimeRangeCache.SetUpperTimestamp(timestamp + range_operator) analysis_plugins = getattr(options, u'analysis_plugins', u'') if analysis_plugins: read_only = False else: read_only = True try: storage_file = self.OpenStorageFile(read_only=read_only) except IOError as exception: raise RuntimeError( u'Unable to open storage file: {0:s} with error: {1:s}.'.format( self._storage_file_path, exception)) with storage_file: storage_file.SetStoreLimit(self._filter_object) if self._output_filename: output_stream = self._output_filename else: output_stream = sys.stdout formatter_mediator = self.GetFormatMediator() try: formatter_mediator.SetPreferredLanguageIdentifier( self._preferred_language) except (KeyError, TypeError) as exception: raise RuntimeError(exception) try: # TODO: move this into a factory function? output_module_class = output_manager.OutputManager.GetOutputClass( self._output_format) output_module = output_module_class( storage_file, formatter_mediator, filehandle=output_stream, config=options, filter_use=self._filter_object) except IOError as exception: raise RuntimeError( u'Unable to create output module with error: {0:s}'.format( exception)) if not output_module: raise RuntimeError(u'Missing output module.') if analysis_plugins: logging.info(u'Starting analysis plugins.') # Within all preprocessing objects, try to get the last one that has # time zone information stored in it, the highest chance of it # containing the information we are seeking (defaulting to the last # one). pre_objs = storage_file.GetStorageInformation() pre_obj = pre_objs[-1] for obj in pre_objs: if getattr(obj, u'time_zone_str', u''): pre_obj = obj # Fill in the collection information. pre_obj.collection_information = {} encoding = getattr(pre_obj, u'preferred_encoding', None) if encoding: cmd_line = u' '.join(sys.argv) try: pre_obj.collection_information[u'cmd_line'] = cmd_line.decode( encoding) except UnicodeDecodeError: pass pre_obj.collection_information[u'file_processed'] = ( self._storage_file_path) pre_obj.collection_information[u'method'] = u'Running Analysis Plugins' pre_obj.collection_information[u'plugins'] = analysis_plugins time_of_run = timelib.Timestamp.GetNow() pre_obj.collection_information[u'time_of_run'] = time_of_run pre_obj.counter = collections.Counter() # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj # Start queues and load up plugins. # TODO: add upper queue limit. analysis_output_queue = multi_process.MultiProcessingQueue() event_queue_producers = [] event_queues = [] analysis_plugins_list = [ name.strip() for name in analysis_plugins.split(u',')] for _ in xrange(0, len(analysis_plugins_list)): # TODO: add upper queue limit. analysis_plugin_queue = multi_process.MultiProcessingQueue() event_queues.append(analysis_plugin_queue) event_queue_producers.append( queue.ItemQueueProducer(event_queues[-1])) knowledge_base_object = knowledge_base.KnowledgeBase() analysis_plugins = analysis.LoadPlugins( analysis_plugins_list, event_queues, options) # Now we need to start all the plugins. for analysis_plugin in analysis_plugins: analysis_report_queue_producer = queue.ItemQueueProducer( analysis_output_queue) analysis_context_object = analysis_context.AnalysisContext( analysis_report_queue_producer, knowledge_base_object) analysis_process = multiprocessing.Process( name=u'Analysis {0:s}'.format(analysis_plugin.plugin_name), target=analysis_plugin.RunPlugin, args=(analysis_context_object,)) self._analysis_processes.append(analysis_process) analysis_process.start() logging.info( u'Plugin: [{0:s}] started.'.format(analysis_plugin.plugin_name)) else: event_queue_producers = [] deduplicate_events = getattr(options, u'dedup', True) output_buffer = output_interface.EventBuffer( output_module, deduplicate_events) with output_buffer: counter = self.ProcessOutput( storage_file, output_buffer, my_filter=self._filter_object, filter_buffer=self._filter_buffer, analysis_queues=event_queue_producers) for information in storage_file.GetStorageInformation(): if hasattr(information, u'counter'): counter[u'Stored Events'] += information.counter[u'total'] if not getattr(options, u'quiet', False): logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. if analysis_plugins: logging.info(u'Processing data from analysis plugins.') for event_queue_producer in event_queue_producers: event_queue_producer.SignalEndOfInput() # Wait for all analysis plugins to complete. for number, analysis_process in enumerate(self._analysis_processes): logging.debug( u'Waiting for analysis plugin: {0:d} to complete.'.format(number)) if analysis_process.is_alive(): analysis_process.join(10) else: logging.warning(u'Plugin {0:d} already stopped.'.format(number)) analysis_process.terminate() logging.debug(u'All analysis plugins are now stopped.') # Close the output queue. analysis_output_queue.SignalEndOfInput() # Go over each output. analysis_queue_consumer = PsortAnalysisReportQueueConsumer( analysis_output_queue, storage_file, self._filter_expression, self.preferred_encoding) analysis_queue_consumer.ConsumeItems() if analysis_queue_consumer.tags: storage_file.StoreTagging(analysis_queue_consumer.tags) # TODO: analysis_queue_consumer.anomalies: for item, value in analysis_queue_consumer.counter.iteritems(): counter[item] = value if self._filter_object and not counter[u'Limited By']: counter[u'Filter By Date'] = ( counter[u'Stored Events'] - counter[u'Events Included'] - counter[u'Events Filtered Out']) return counter