def GetAnalysisPluginsAndEventQueues(self, analysis_plugins_string): """Return a list of analysis plugins and event queues. Args: analysis_plugins_string: comma separated string with names of analysis plugins to load. Returns: A tuple of two lists, one containing list of analysis plugins and the other a list of event queues. """ if not analysis_plugins_string: return [], [] # Start queues and load up plugins. event_queue_producers = [] event_queues = [] analysis_plugins_list = [ name.strip() for name in analysis_plugins_string.split(u',') ] for _ in range(0, len(analysis_plugins_list)): # TODO: add upper queue limit. analysis_plugin_queue = multi_process.MultiProcessingQueue( timeout=5) event_queues.append(analysis_plugin_queue) event_queue_producers.append( queue.ItemQueueProducer(event_queues[-1])) analysis_plugins = analysis_manager.AnalysisPluginManager.LoadPlugins( analysis_plugins_list, event_queues) analysis_plugins = list(analysis_plugins) return analysis_plugins, event_queue_producers
def testStorageWriter(self): """Test the storage writer.""" self.assertEqual(len(self._event_objects), 4) # The storage writer is normally run in a separate thread. # For the purpose of this test it has to be run in sequence, # hence the call to WriteEventObjects after all the event objects # have been queued up. # TODO: add upper queue limit. # A timeout is used to prevent the multi processing queue to close and # stop blocking the current process. test_queue = multi_process.MultiProcessingQueue(timeout=0.1) test_queue_producer = queue.ItemQueueProducer(test_queue) test_queue_producer.ProduceItems(self._event_objects) test_queue_producer.SignalAbort() with tempfile.NamedTemporaryFile() as temp_file: storage_writer = storage.FileStorageWriter(test_queue, temp_file) storage_writer.WriteEventObjects() z_file = zipfile.ZipFile(temp_file, 'r', zipfile.ZIP_DEFLATED) expected_z_filename_list = [ u'plaso_index.000001', u'plaso_meta.000001', u'plaso_proto.000001', u'plaso_timestamps.000001', u'serializer.txt' ] z_filename_list = sorted(z_file.namelist()) self.assertEqual(len(z_filename_list), 5) self.assertEqual(z_filename_list, expected_z_filename_list)
def setUp(self): """Sets up the objects used throughout the test.""" self._temp_directory = tempfile.mkdtemp() self._storage_filename = os.path.join(self._temp_directory, 'plaso.db') self._tag_input_filename = os.path.join(self._temp_directory, 'input1.tag') tag_input_file = open(self._tag_input_filename, 'wb') tag_input_file.write('\n'.join([ 'Test Tag', ' filename contains \'/tmp/whoaaaa\'', ' parser is \'TestEvent\' and stuff is \'dude\''])) tag_input_file.close() pfilter.TimeRangeCache.ResetTimeConstraints() # TODO: add upper queue limit. test_queue = multi_process.MultiProcessingQueue() test_queue_producer = queue.ItemQueueProducer(test_queue) test_queue_producer.ProduceItems([ TestEvent(0), TestEvent(1000), TestEvent(2000000, '/tmp/whoaaaaa'), TestEvent(2500000, '/tmp/whoaaaaa'), TestEvent(5000000, '/tmp/whoaaaaa', 'dude')]) test_queue_producer.SignalEndOfInput() storage_writer = storage.StorageFileWriter( test_queue, self._storage_filename) storage_writer.WriteEventObjects() self._storage_file = storage.StorageFile(self._storage_filename) self._storage_file.SetStoreLimit()
def testPushPopItem(self): """Tests the PushItem and PopItem functions.""" test_queue = multi_process.MultiProcessingQueue() for item in self._ITEMS: test_queue.PushItem(item) test_queue.SignalEndOfInput() test_queue_consumer = test_lib.TestQueueConsumer(test_queue) test_queue_consumer.ConsumeItems() self.assertEqual(test_queue_consumer.number_of_items, len(self._ITEMS))
def testPushPopItem(self): """Tests the PushItem and PopItem functions.""" # A timeout is used to prevent the multi processing queue to close and # stop blocking the current process test_queue = multi_process.MultiProcessingQueue(timeout=0.1) for item in self._ITEMS: test_queue.PushItem(item) test_queue_consumer = engine_test_lib.TestQueueConsumer(test_queue) test_queue_consumer.ConsumeItems() self.assertEqual(test_queue_consumer.number_of_items, len(self._ITEMS))
def testPushPopItem(self): """Tests the PushItem and PopItem functions.""" test_queue = multi_process.MultiProcessingQueue() for item in self._ITEMS: test_queue.PushItem(item) try: self.assertEqual(len(test_queue), len(self._ITEMS)) except NotImplementedError: # On Mac OS X because of broken sem_getvalue() return test_queue.SignalEndOfInput() test_queue_consumer = test_lib.TestQueueConsumer(test_queue) test_queue_consumer.ConsumeItems() self.assertEqual(test_queue_consumer.number_of_items, len(self._ITEMS))
def GetAnalysisPluginsAndEventQueues(self, analysis_plugins_string): """Return a list of analysis plugins and event queues. Args: analysis_plugins_string: comma separated string with names of analysis plugins to load. Returns: A tuple of two lists, one containing list of analysis plugins and the other a list of event queues. """ if not analysis_plugins_string: return [], [] event_producers = [] # These are the queues analysis plugins will read from. analysis_plugin_input_queues = [] analysis_plugins_list = [ name.strip() for name in analysis_plugins_string.split(u',') ] for _ in range(0, len(analysis_plugins_list)): if self._use_zeromq: output_queue = zeromq_queue.ZeroMQPushBindQueue() # Open the queue so it can bind to a random port, and we can get the # port number to use in the input queue. output_queue.Open() queue_port = output_queue.port input_queue = zeromq_queue.ZeroMQPullConnectQueue( port=queue_port, delay_open=True) analysis_plugin_input_queues.append(input_queue) else: input_queue = multi_process.MultiProcessingQueue(timeout=5) analysis_plugin_input_queues.append(input_queue) output_queue = input_queue event_producers.append(queue.ItemQueueProducer(output_queue)) analysis_plugins = analysis_manager.AnalysisPluginManager.LoadPlugins( analysis_plugins_list, analysis_plugin_input_queues) analysis_plugins = list(analysis_plugins) return analysis_plugins, event_producers
def testStorageWriter(self): """Test the storage writer.""" event_objects = test_lib.CreateTestEventObjects() # The storage writer is normally run in a separate thread. # For the purpose of this test it has to be run in sequence, # hence the call to WriteEventObjects after all the event objects # have been queued up. # TODO: add upper queue limit. # A timeout is used to prevent the multi processing queue to close and # stop blocking the current process. test_queue = multi_process.MultiProcessingQueue(timeout=0.1) test_queue_producer = plaso_queue.ItemQueueProducer(test_queue) test_queue_producer.ProduceItems(event_objects) test_queue_producer.SignalAbort() preprocessing_object = event.PreprocessObject() with shared_test_lib.TempDirectory() as temp_directory: temp_file = os.path.join(temp_directory, u'plaso.db') storage_writer = zip_file.ZIPStorageFileWriter( test_queue, temp_file, preprocessing_object) storage_writer.WriteEventObjects() storage_file = zipfile.ZipFile(temp_file, mode='r', compression=zipfile.ZIP_DEFLATED) expected_filename_list = [ u'information.dump', u'plaso_index.000001', u'plaso_proto.000001', u'plaso_timestamps.000001', u'serializer.txt' ] filename_list = sorted(storage_file.namelist()) self.assertEqual(len(filename_list), 5) self.assertEqual(filename_list, expected_filename_list)
def ProcessStorage(self, output_module, storage_file, analysis_plugins, event_queue_producers, deduplicate_events=True, preferred_encoding=u'utf-8', time_slice=None, use_time_slicer=False): """Processes a plaso storage file. Args: output_module: an output module (instance of OutputModule). storage_file: the storage file object (instance of StorageFile). analysis_plugins: list of analysis plugin objects (instance of AnalysisPlugin). event_queue_producers: list of event queue producer objects (instance of ItemQueueProducer). deduplicate_events: optional boolean value to indicate if the event objects should be deduplicated. The default is True. preferred_encoding: optional preferred encoding. The default is "utf-8". time_slice: optional time slice object (instance of TimeSlice). The default is None. use_time_slicer: optional boolean value to indicate the 'time slicer' should be used. The default is False. The 'time slicer' will provide a context of events around an event of interest. Returns: A counter (an instance of counter.Counter) that contains the analysis plugin results or None. Raises: RuntimeError: if a non-recoverable situation is encountered. """ if time_slice: if time_slice.event_timestamp: pfilter.TimeRangeCache.SetLowerTimestamp( time_slice.start_timestamp) pfilter.TimeRangeCache.SetUpperTimestamp( time_slice.end_timestamp) elif use_time_slicer: self._filter_buffer = bufferlib.CircularBuffer( time_slice.duration) with storage_file: storage_file.SetStoreLimit(self._filter_object) # TODO: allow for single processing. # TODO: add upper queue limit. analysis_output_queue = multi_process.MultiProcessingQueue( timeout=5) if analysis_plugins: logging.info(u'Starting analysis plugins.') # Within all preprocessing objects, try to get the last one that has # time zone information stored in it, the highest chance of it # containing the information we are seeking (defaulting to the last # one). pre_objs = storage_file.GetStorageInformation() pre_obj = pre_objs[-1] for obj in pre_objs: if getattr(obj, u'time_zone_str', u''): pre_obj = obj # Fill in the collection information. pre_obj.collection_information = {} if preferred_encoding: cmd_line = u' '.join(sys.argv) try: pre_obj.collection_information[ u'cmd_line'] = cmd_line.decode(preferred_encoding) except UnicodeDecodeError: pass pre_obj.collection_information[u'file_processed'] = ( self._storage_file) pre_obj.collection_information[ u'method'] = u'Running Analysis Plugins' analysis_plugin_names = [ plugin.NAME for plugin in analysis_plugins ] pre_obj.collection_information[ u'plugins'] = analysis_plugin_names time_of_run = timelib.Timestamp.GetNow() pre_obj.collection_information[u'time_of_run'] = time_of_run pre_obj.counter = collections.Counter() # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj knowledge_base_object = knowledge_base.KnowledgeBase( pre_obj=pre_obj) # Now we need to start all the plugins. for analysis_plugin in analysis_plugins: analysis_report_queue_producer = queue.ItemQueueProducer( analysis_output_queue) completion_event = multiprocessing.Event() analysis_mediator_object = analysis_mediator.AnalysisMediator( analysis_report_queue_producer, knowledge_base_object, data_location=self._data_location, completion_event=completion_event) analysis_process = multiprocessing.Process( name=u'Analysis {0:s}'.format( analysis_plugin.plugin_name), target=analysis_plugin.RunPlugin, args=(analysis_mediator_object, )) process_info = PsortAnalysisProcess( completion_event, analysis_plugin, analysis_process) self._analysis_process_info.append(process_info) analysis_process.start() logging.info(u'Plugin: [{0:s}] started.'.format( analysis_plugin.plugin_name)) else: event_queue_producers = [] output_buffer = output_interface.EventBuffer( output_module, deduplicate_events) with output_buffer: counter = self.ProcessOutput( storage_file, output_buffer, my_filter=self._filter_object, filter_buffer=self._filter_buffer, analysis_queues=event_queue_producers) for information in storage_file.GetStorageInformation(): if hasattr(information, u'counter'): counter[u'Stored Events'] += information.counter[u'total'] if not self._quiet_mode: logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. self._ProcessAnalysisPlugins(analysis_plugins, analysis_output_queue, storage_file, counter, preferred_encoding=preferred_encoding) if self._output_file_object: self._output_file_object.close() self._output_file_object = None if self._filter_object and not counter[u'Limited By']: counter[u'Filter By Date'] = (counter[u'Stored Events'] - counter[u'Events Included'] - counter[u'Events Filtered Out']) return counter
def ProcessStorage(self, output_module, storage_file, storage_file_path, analysis_plugins, event_queue_producers, command_line_arguments=None, deduplicate_events=True, preferred_encoding=u'utf-8', time_slice=None, use_time_slicer=False): """Processes a plaso storage file. Args: output_module: an output module (instance of OutputModule). storage_file: the storage file object (instance of StorageFile). storage_file_path: string containing the path of the storage file. analysis_plugins: list of analysis plugin objects (instance of AnalysisPlugin). event_queue_producers: list of event queue producer objects (instance of ItemQueueProducer). command_line_arguments: optional string of the command line arguments or None if not set. deduplicate_events: optional boolean value to indicate if the event objects should be deduplicated. preferred_encoding: optional preferred encoding. time_slice: optional time slice object (instance of TimeSlice). use_time_slicer: optional boolean value to indicate the 'time slicer' should be used. The 'time slicer' will provide a context of events around an event of interest. Returns: A counter (an instance of collections.Counter) that tracks the number of events extracted from storage, and the analysis plugin results. Raises: RuntimeError: if a non-recoverable situation is encountered. """ time_slice = None if time_slice: if time_slice.event_timestamp is not None: time_slice = storage_time_range.TimeRange( time_slice.start_timestamp, time_slice.end_timestamp) elif use_time_slicer: self._filter_buffer = bufferlib.CircularBuffer( time_slice.duration) with storage_file: # TODO: allow for single processing. # TODO: add upper queue limit. analysis_queue_port = None if self._use_zeromq: analysis_report_incoming_queue = zeromq_queue.ZeroMQPullBindQueue( delay_open=False, port=None, linger_seconds=5) analysis_queue_port = analysis_report_incoming_queue.port else: analysis_report_incoming_queue = multi_process.MultiProcessingQueue( timeout=5) pre_obj = self._GetLastGoodPreprocess(storage_file) if pre_obj is None: pre_obj = event.PreprocessObject() if analysis_plugins: self._StartAnalysisPlugins( storage_file_path, analysis_plugins, pre_obj, analysis_queue_port=analysis_queue_port, analysis_report_incoming_queue= analysis_report_incoming_queue, command_line_arguments=command_line_arguments) # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj else: event_queue_producers = [] output_buffer = output_event_buffer.EventBuffer( output_module, deduplicate_events) with output_buffer: counter = self.ProcessEventsFromStorage( storage_file, output_buffer, analysis_queues=event_queue_producers, filter_buffer=self._filter_buffer, my_filter=self._filter_object, time_slice=time_slice) for information in storage_file.GetStorageInformation(): if hasattr(information, u'counter'): counter[u'Stored Events'] += information.counter[u'total'] if not self._quiet_mode: logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. self._ProcessAnalysisPlugins(analysis_plugins, analysis_report_incoming_queue, storage_file, counter, preferred_encoding=preferred_encoding) if self._filter_object and not counter[u'Limited By']: counter[u'Filter By Date'] = (counter[u'Stored Events'] - counter[u'Events Included'] - counter[u'Events Filtered Out']) return counter
def ProcessStorage(self, options): """Open a storage file and processes the events within. Args: options: the command line arguments (instance of argparse.Namespace). Returns: A counter. Raises: RuntimeError: if a non-recoverable situation is encountered. """ counter = None slice_option = getattr(options, u'slice', None) if slice_option: timezone = getattr(options, u'timezone', u'UTC') if timezone == u'UTC': zone = pytz.utc else: zone = pytz.timezone(timezone) timestamp = timelib.Timestamp.FromTimeString(slice_option, timezone=zone) # Convert number of minutes to microseconds. range_operator = self._slice_size * 60 * 1000000 # Set the time range. pfilter.TimeRangeCache.SetLowerTimestamp(timestamp - range_operator) pfilter.TimeRangeCache.SetUpperTimestamp(timestamp + range_operator) analysis_plugins = getattr(options, u'analysis_plugins', u'') if analysis_plugins: read_only = False else: read_only = True try: storage_file = self.OpenStorageFile(read_only=read_only) except IOError as exception: raise RuntimeError( u'Unable to open storage file: {0:s} with error: {1:s}.'.format( self._storage_file_path, exception)) with storage_file: storage_file.SetStoreLimit(self._filter_object) if self._output_filename: output_stream = self._output_filename else: output_stream = sys.stdout formatter_mediator = self.GetFormatMediator() try: formatter_mediator.SetPreferredLanguageIdentifier( self._preferred_language) except (KeyError, TypeError) as exception: raise RuntimeError(exception) try: # TODO: move this into a factory function? output_module_class = output_manager.OutputManager.GetOutputClass( self._output_format) output_module = output_module_class( storage_file, formatter_mediator, filehandle=output_stream, config=options, filter_use=self._filter_object) except IOError as exception: raise RuntimeError( u'Unable to create output module with error: {0:s}'.format( exception)) if not output_module: raise RuntimeError(u'Missing output module.') if analysis_plugins: logging.info(u'Starting analysis plugins.') # Within all preprocessing objects, try to get the last one that has # time zone information stored in it, the highest chance of it # containing the information we are seeking (defaulting to the last # one). pre_objs = storage_file.GetStorageInformation() pre_obj = pre_objs[-1] for obj in pre_objs: if getattr(obj, u'time_zone_str', u''): pre_obj = obj # Fill in the collection information. pre_obj.collection_information = {} encoding = getattr(pre_obj, u'preferred_encoding', None) if encoding: cmd_line = u' '.join(sys.argv) try: pre_obj.collection_information[u'cmd_line'] = cmd_line.decode( encoding) except UnicodeDecodeError: pass pre_obj.collection_information[u'file_processed'] = ( self._storage_file_path) pre_obj.collection_information[u'method'] = u'Running Analysis Plugins' pre_obj.collection_information[u'plugins'] = analysis_plugins time_of_run = timelib.Timestamp.GetNow() pre_obj.collection_information[u'time_of_run'] = time_of_run pre_obj.counter = collections.Counter() # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj # Start queues and load up plugins. # TODO: add upper queue limit. analysis_output_queue = multi_process.MultiProcessingQueue() event_queue_producers = [] event_queues = [] analysis_plugins_list = [ name.strip() for name in analysis_plugins.split(u',')] for _ in xrange(0, len(analysis_plugins_list)): # TODO: add upper queue limit. analysis_plugin_queue = multi_process.MultiProcessingQueue() event_queues.append(analysis_plugin_queue) event_queue_producers.append( queue.ItemQueueProducer(event_queues[-1])) knowledge_base_object = knowledge_base.KnowledgeBase() analysis_plugins = analysis.LoadPlugins( analysis_plugins_list, event_queues, options) # Now we need to start all the plugins. for analysis_plugin in analysis_plugins: analysis_report_queue_producer = queue.ItemQueueProducer( analysis_output_queue) analysis_context_object = analysis_context.AnalysisContext( analysis_report_queue_producer, knowledge_base_object) analysis_process = multiprocessing.Process( name=u'Analysis {0:s}'.format(analysis_plugin.plugin_name), target=analysis_plugin.RunPlugin, args=(analysis_context_object,)) self._analysis_processes.append(analysis_process) analysis_process.start() logging.info( u'Plugin: [{0:s}] started.'.format(analysis_plugin.plugin_name)) else: event_queue_producers = [] deduplicate_events = getattr(options, u'dedup', True) output_buffer = output_interface.EventBuffer( output_module, deduplicate_events) with output_buffer: counter = self.ProcessOutput( storage_file, output_buffer, my_filter=self._filter_object, filter_buffer=self._filter_buffer, analysis_queues=event_queue_producers) for information in storage_file.GetStorageInformation(): if hasattr(information, u'counter'): counter[u'Stored Events'] += information.counter[u'total'] if not getattr(options, u'quiet', False): logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. if analysis_plugins: logging.info(u'Processing data from analysis plugins.') for event_queue_producer in event_queue_producers: event_queue_producer.SignalEndOfInput() # Wait for all analysis plugins to complete. for number, analysis_process in enumerate(self._analysis_processes): logging.debug( u'Waiting for analysis plugin: {0:d} to complete.'.format(number)) if analysis_process.is_alive(): analysis_process.join(10) else: logging.warning(u'Plugin {0:d} already stopped.'.format(number)) analysis_process.terminate() logging.debug(u'All analysis plugins are now stopped.') # Close the output queue. analysis_output_queue.SignalEndOfInput() # Go over each output. analysis_queue_consumer = PsortAnalysisReportQueueConsumer( analysis_output_queue, storage_file, self._filter_expression, self.preferred_encoding) analysis_queue_consumer.ConsumeItems() if analysis_queue_consumer.tags: storage_file.StoreTagging(analysis_queue_consumer.tags) # TODO: analysis_queue_consumer.anomalies: for item, value in analysis_queue_consumer.counter.iteritems(): counter[item] = value if self._filter_object and not counter[u'Limited By']: counter[u'Filter By Date'] = ( counter[u'Stored Events'] - counter[u'Events Included'] - counter[u'Events Filtered Out']) return counter