def testRequestAndBufferedReplyQueues(self): """Tests REQ and buffered REP queue pairs.""" reply_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( name='requestbufferedreply_replybind', delay_open=False, linger_seconds=1) request_queue = zeromq_queue.ZeroMQRequestConnectQueue( name='requestbufferedreply_requestconnect', delay_open=False, port=reply_queue.port, linger_seconds=1) self._testItemTransferred(reply_queue, request_queue) reply_queue.Close() request_queue.Close() request_queue = ZeroMQRequestBindQueue( name='requestbufferedreply_requestbind', delay_open=False, linger_seconds=1) reply_queue = ZeroMQBufferedReplyConnectQueue( name='requestbufferedreply_replyconnect', delay_open=False, port=request_queue.port, linger_seconds=0) self._testItemTransferred(reply_queue, request_queue) reply_queue.Close() request_queue.Close()
def testMain(self): """Tests the _Main function.""" output_task_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=True, linger_seconds=0, maximum_items=1, name='test output task queue', timeout_seconds=self._QUEUE_TIMEOUT) output_task_queue.Open() input_task_queue = zeromq_queue.ZeroMQRequestConnectQueue( delay_open=True, linger_seconds=0, name='test input task queue', port=output_task_queue.port, timeout_seconds=self._QUEUE_TIMEOUT) configuration = configurations.ProcessingConfiguration() test_process = worker_process.WorkerProcess(input_task_queue, None, None, None, None, configuration, name='TestWorker') test_process.start() output_task_queue.PushItem(plaso_queue.QueueAbort(), block=False) output_task_queue.Close(abort=True)
def testBufferedReplyQueue(self): """Tests for the buffered reply queue.""" test_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( name='bufferedreply_bind', delay_open=False, linger_seconds=1) test_queue.PushItem('This is a test item.') test_queue.Close(abort=True) with self.assertRaises(errors.QueueAlreadyClosed): test_queue.PushItem('This shouldn\'t work')
def testBufferedReplyQueue(self): """Tests for the buffered reply queue.""" test_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=False) test_queue.PushItem(u'This is a test item.') test_queue.Empty() test_queue.Close() with self.assertRaises(errors.QueueAlreadyClosed): test_queue.PushItem(u'This shouldn\'t work')
def testRequestAndBufferedReplyQueues(self): """Tests REQ and buffered REP queue pairs.""" reply_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=False) request_queue = zeromq_queue.ZeroMQRequestConnectQueue( delay_open=False, port=reply_queue.port) self._testItemTransferred(reply_queue, request_queue) request_queue = zeromq_queue.ZeroMQRequestBindQueue(delay_open=False) reply_queue = zeromq_queue.ZeroMQBufferedReplyConnectQueue( delay_open=False, port=request_queue.port) self._testItemTransferred(reply_queue, request_queue)
def testEmptyBufferedQueues(self): """Tests the Empty method for buffered queues.""" queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( name='requestbufferedreply_replybind', delay_open=False, linger_seconds=1, buffer_max_size=3, timeout_seconds=2, buffer_timeout_seconds=1) try: while True: queue.PushItem('item', block=False) except errors.QueueFull: # Queue is now full pass with self.assertRaises(errors.QueueFull): queue.PushItem('item', block=False) queue.Empty() # We should now be able to push another item without an exception. queue.PushItem('item') queue.Empty() queue.Close()
def ProcessSources(self, session_identifier, source_path_specs, storage_writer, processing_configuration, enable_sigsegv_handler=False, number_of_worker_processes=0, status_update_callback=None, worker_memory_limit=None): """Processes the sources and extract events. Args: session_identifier (str): identifier of the session. source_path_specs (list[dfvfs.PathSpec]): path specifications of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. processing_configuration (ProcessingConfiguration): processing configuration. enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler should be enabled. number_of_worker_processes (Optional[int]): number of worker processes. status_update_callback (Optional[function]): callback function for status updates. worker_memory_limit (Optional[int]): maximum amount of memory a worker is allowed to consume, where None represents the default memory limit and 0 represents no limit. Returns: ProcessingStatus: processing status. """ if number_of_worker_processes < 1: # One worker for each "available" CPU (minus other processes). # The number here is derived from the fact that the engine starts up: # * A main process. # # If we want to utilize all CPUs on the system we therefore need to start # up workers that amounts to the total number of CPUs - the other # processes. try: cpu_count = multiprocessing.cpu_count() - 1 if cpu_count <= self._WORKER_PROCESSES_MINIMUM: cpu_count = self._WORKER_PROCESSES_MINIMUM elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM: cpu_count = self._WORKER_PROCESSES_MAXIMUM except NotImplementedError: logger.error(( 'Unable to determine number of CPUs defaulting to {0:d} worker ' 'processes.').format(self._WORKER_PROCESSES_MINIMUM)) cpu_count = self._WORKER_PROCESSES_MINIMUM number_of_worker_processes = cpu_count self._enable_sigsegv_handler = enable_sigsegv_handler self._number_of_worker_processes = number_of_worker_processes if worker_memory_limit is None: self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT else: self._worker_memory_limit = worker_memory_limit # Keep track of certain values so we can spawn new extraction workers. self._processing_configuration = processing_configuration self._debug_output = processing_configuration.debug_output self._log_filename = processing_configuration.log_filename self._session_identifier = session_identifier self._status_update_callback = status_update_callback self._storage_writer = storage_writer # Set up the task queue. if not self._use_zeromq: self._task_queue = multi_process_queue.MultiProcessingQueue( maximum_number_of_queued_items=self._maximum_number_of_tasks) else: task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=True, linger_seconds=0, maximum_items=1, name='main_task_queue', timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS) self._task_queue = task_outbound_queue # The ZeroMQ backed queue must be started first, so we can save its port. # TODO: raises: attribute-defined-outside-init # self._task_queue.name = 'Task queue' self._task_queue.Open() self._task_queue_port = self._task_queue.port self._StartProfiling(self._processing_configuration.profiling) self._task_manager.StartProfiling( self._processing_configuration.profiling, self._name) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) # Set up the storage writer before the worker processes. storage_writer.StartTaskStorage() for worker_number in range(number_of_worker_processes): # First argument to _StartWorkerProcess is not used. extraction_process = self._StartWorkerProcess('', storage_writer) if not extraction_process: logger.error('Unable to create worker process: {0:d}'.format( worker_number)) self._StartStatusUpdateThread() try: # Open the storage file after creating the worker processes otherwise # the ZIP storage file will remain locked as long as the worker processes # are alive. storage_writer.Open() storage_writer.WriteSessionStart() try: storage_writer.WritePreprocessingInformation( self.knowledge_base) self._ProcessSources(source_path_specs, storage_writer) finally: storage_writer.WriteSessionCompletion(aborted=self._abort) storage_writer.Close() finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) self._task_manager.StopProfiling() self._StopProfiling() try: self._StopExtractionProcesses(abort=self._abort) except KeyboardInterrupt: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) # The task queue should be closed by _StopExtractionProcesses, this # close is a failsafe, primarily due to MultiProcessingQueue's # blocking behavior. self._task_queue.Close(abort=True) if self._processing_status.error_path_specs: task_storage_abort = True else: task_storage_abort = self._abort try: storage_writer.StopTaskStorage(abort=task_storage_abort) except (IOError, OSError) as exception: logger.error( 'Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Processing aborted.') self._processing_status.aborted = True else: logger.debug('Processing completed.') # Reset values. self._enable_sigsegv_handler = None self._number_of_worker_processes = None self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT self._processing_configuration = None self._session_identifier = None self._status_update_callback = None self._storage_writer = None return self._processing_status
def ProcessSources(self, session, source_path_specs, storage_writer, processing_configuration, enable_sigsegv_handler=False, status_update_callback=None, storage_file_path=None): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. source_path_specs (list[dfvfs.PathSpec]): path specifications of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. processing_configuration (ProcessingConfiguration): processing configuration. enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler should be enabled. status_update_callback (Optional[function]): callback function for status updates. storage_file_path (Optional[str]): path to the session storage file. Returns: ProcessingStatus: processing status. """ self._enable_sigsegv_handler = enable_sigsegv_handler # Keep track of certain values so we can spawn new extraction workers. self._processing_configuration = processing_configuration self._debug_output = processing_configuration.debug_output self._log_filename = processing_configuration.log_filename self._session = session self._status_update_callback = status_update_callback self._storage_file_path = storage_file_path self._storage_writer = storage_writer self._task_storage_format = processing_configuration.task_storage_format # Set up the task queue. task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=True, linger_seconds=0, maximum_items=1, name='main_task_queue', timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS) self._task_queue = task_outbound_queue # The ZeroMQ backed queue must be started first, so we can save its port. # TODO: raises: attribute-defined-outside-init # self._task_queue.name = 'Task queue' self._task_queue.Open() self._task_queue_port = self._task_queue.port # Set up the task storage before the worker processes. self._StartTaskStorage(self._task_storage_format) for worker_number in range(self._number_of_worker_processes): process_name = 'Worker_{0:02d}'.format(self._last_worker_number) worker_process = self._StartWorkerProcess(process_name) if not worker_process: logger.error('Unable to create worker process: {0:d}'.format( worker_number)) self._StartProfiling(self._processing_configuration.profiling) self._task_manager.StartProfiling( self._processing_configuration.profiling, self._name) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) self._StartStatusUpdateThread() try: self._ProcessSources(source_path_specs, storage_writer) finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) self._task_manager.StopProfiling() self._StopProfiling() try: self._StopExtractionProcesses(abort=self._abort) except KeyboardInterrupt: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) # The task queue should be closed by _StopExtractionProcesses, this # close is a failsafe. self._task_queue.Close(abort=True) if self._processing_status.error_path_specs: task_storage_abort = True else: task_storage_abort = self._abort try: self._StopTaskStorage(self._task_storage_format, abort=task_storage_abort) except (IOError, OSError) as exception: logger.error( 'Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Processing aborted.') self._processing_status.aborted = True else: logger.debug('Processing completed.') # Update the status view one last time. self._UpdateStatus() # Reset values. self._enable_sigsegv_handler = None self._processing_configuration = None self._session = None self._status_update_callback = None self._storage_file_path = None self._storage_writer = None self._task_storage_format = None return self._processing_status
def ProcessSources( self, session_identifier, source_path_specs, storage_writer, enable_sigsegv_handler=False, filter_find_specs=None, filter_object=None, hasher_names_string=None, mount_path=None, number_of_worker_processes=0, parser_filter_expression=None, preferred_year=None, process_archives=False, process_compressed_streams=True, status_update_callback=None, show_memory_usage=False, temporary_directory=None, text_prepend=None, yara_rules_string=None): """Processes the sources and extract event objects. Args: session_identifier (str): identifier of the session. source_path_specs (list[dfvfs.PathSpec]): path specifications of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler should be enabled. filter_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications used in path specification extraction. filter_object (Optional[objectfilter.Filter]): filter object. hasher_names_string (Optional[str]): comma separated string of names of hashers to use during processing. mount_path (Optional[str]): mount path. number_of_worker_processes (Optional[int]): number of worker processes. parser_filter_expression (Optional[str]): parser filter expression, where None represents all parsers and plugins. preferred_year (Optional[int]): preferred year. process_archives (Optional[bool]): True if archive files should be scanned for file entries. process_compressed_streams (Optional[bool]): True if file content in compressed streams should be processed. show_memory_usage (Optional[bool]): True if memory information should be included in status updates. status_update_callback (Optional[function]): callback function for status updates. temporary_directory (Optional[str]): path of the directory for temporary files. text_prepend (Optional[str]): text to prepend to every event. yara_rules_string (Optional[str]): unparsed yara rule definitions. Returns: ProcessingStatus: processing status. """ if number_of_worker_processes < 1: # One worker for each "available" CPU (minus other processes). # The number here is derived from the fact that the engine starts up: # * A main process. # # If we want to utilize all CPUs on the system we therefore need to start # up workers that amounts to the total number of CPUs - the other # processes. try: cpu_count = multiprocessing.cpu_count() - 1 if cpu_count <= self._WORKER_PROCESSES_MINIMUM: cpu_count = self._WORKER_PROCESSES_MINIMUM elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM: cpu_count = self._WORKER_PROCESSES_MAXIMUM except NotImplementedError: logging.error(( u'Unable to determine number of CPUs defaulting to {0:d} worker ' u'processes.').format(self._WORKER_PROCESSES_MINIMUM)) cpu_count = self._WORKER_PROCESSES_MINIMUM number_of_worker_processes = cpu_count self._enable_sigsegv_handler = enable_sigsegv_handler self._number_of_worker_processes = number_of_worker_processes self._show_memory_usage = show_memory_usage # Keep track of certain values so we can spawn new extraction workers. self._filter_find_specs = filter_find_specs self._filter_object = filter_object self._hasher_names_string = hasher_names_string self._mount_path = mount_path self._parser_filter_expression = parser_filter_expression self._preferred_year = preferred_year self._process_archives = process_archives self._process_compressed_streams = process_compressed_streams self._session_identifier = session_identifier self._status_update_callback = status_update_callback self._storage_writer = storage_writer self._temporary_directory = temporary_directory self._text_prepend = text_prepend self._yara_rules_string = yara_rules_string # Set up the task queue. if not self._use_zeromq: self._task_queue = multi_process_queue.MultiProcessingQueue( maximum_number_of_queued_items=self._maximum_number_of_tasks) else: task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=True, linger_seconds=0, maximum_items=1, name=u'main_task_queue', timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS) self._task_queue = task_outbound_queue # The ZeroMQ backed queue must be started first, so we can save its port. # TODO: raises: attribute-defined-outside-init # self._task_queue.name = u'Task queue' self._task_queue.Open() self._task_queue_port = self._task_queue.port self._StartProfiling() if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) # Set up the storage writer before the worker processes. storage_writer.StartTaskStorage() for _ in range(number_of_worker_processes): extraction_process = self._StartExtractionWorkerProcess(storage_writer) self._StartMonitoringProcess(extraction_process.pid) self._StartStatusUpdateThread() try: # Open the storage file after creating the worker processes otherwise # the ZIP storage file will remain locked as long as the worker processes # are alive. storage_writer.Open() storage_writer.WriteSessionStart() try: storage_writer.WritePreprocessingInformation(self.knowledge_base) self._ProcessSources( source_path_specs, storage_writer, filter_find_specs=filter_find_specs) finally: storage_writer.WriteSessionCompletion(aborted=self._abort) storage_writer.Close() finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) self._StopProfiling() try: self._StopExtractionProcesses(abort=self._abort) except KeyboardInterrupt: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) # The task queue should be closed by _StopExtractionProcesses, this # close is a failsafe, primarily due to MultiProcessingQueue's # blocking behaviour. self._task_queue.Close(abort=True) if self._processing_status.error_path_specs: task_storage_abort = True else: task_storage_abort = self._abort try: storage_writer.StopTaskStorage(abort=task_storage_abort) except (IOError, OSError) as exception: logging.error(u'Unable to stop task storage with error: {0:s}'.format( exception)) if self._abort: logging.debug(u'Processing aborted.') self._processing_status.aborted = True else: logging.debug(u'Processing completed.') # Reset values. self._enable_sigsegv_handler = None self._number_of_worker_processes = None self._show_memory_usage = None self._filter_find_specs = None self._filter_object = None self._hasher_names_string = None self._mount_path = None self._parser_filter_expression = None self._preferred_year = None self._process_archives = None self._process_compressed_streams = None self._session_identifier = None self._status_update_callback = None self._storage_writer = None self._text_prepend = None return self._processing_status