def _StartWorkerProcess(self, process_name): """Creates, starts, monitors and registers a worker process. Args: process_name (str): process name. Returns: MultiProcessWorkerProcess: extraction worker process or None if the process could not be started. """ logger.debug('Starting worker process {0:s}'.format(process_name)) queue_name = '{0:s} task queue'.format(process_name) task_queue = zeromq_queue.ZeroMQRequestConnectQueue( delay_open=True, linger_seconds=0, name=queue_name, port=self._task_queue_port, timeout_seconds=self._TASK_QUEUE_TIMEOUT_SECONDS) process = extraction_process.ExtractionWorkerProcess( task_queue, self.collection_filters_helper, self.knowledge_base, self._processing_configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, name=process_name) # Remove all possible log handlers to prevent a child process from logging # to the main process log file and garbling the log. The log handlers are # recreated after the worker process has been started. for handler in logging.root.handlers: logging.root.removeHandler(handler) handler.close() process.start() loggers.ConfigureLogging(debug_output=self._debug_output, filename=self._log_filename, mode='a', quiet_mode=self._quiet_mode) try: self._StartMonitoringProcess(process) except (IOError, KeyError) as exception: pid = process.pid logger.error( ('Unable to monitor replacement worker process: {0:s} ' '(PID: {1:d}) with error: {2!s}').format( process_name, pid, exception)) self._TerminateProcess(process) return None self._RegisterProcess(process) self._last_worker_number += 1 return process
def _StartWorkerProcess(self, process_name): """Creates, starts, monitors and registers a worker process. Args: process_name (str): process name. Returns: MultiProcessWorkerProcess: extraction worker process or None on error. """ analysis_plugin = self._analysis_plugins.get(process_name, None) if not analysis_plugin: logger.error('Missing analysis plugin: {0:s}'.format(process_name)) return None queue_name = '{0:s} output event queue'.format(process_name) output_event_queue = zeromq_queue.ZeroMQPushBindQueue( name=queue_name, timeout_seconds=self._QUEUE_TIMEOUT) # Open the queue so it can bind to a random port, and we can get the # port number to use in the input queue. output_event_queue.Open() self._event_queues[process_name] = output_event_queue queue_name = '{0:s} input event queue'.format(process_name) input_event_queue = zeromq_queue.ZeroMQPullConnectQueue( name=queue_name, delay_open=True, port=output_event_queue.port, timeout_seconds=self._QUEUE_TIMEOUT) process = analysis_process.AnalysisProcess( input_event_queue, self._knowledge_base, self._session, analysis_plugin, self._processing_configuration, data_location=self._data_location, event_filter_expression=self._event_filter_expression, name=process_name) process.start() logger.info('Started analysis plugin: {0:s} (PID: {1:d}).'.format( process_name, process.pid)) try: self._StartMonitoringProcess(process) except (IOError, KeyError) as exception: logger.error( ('Unable to monitor analysis plugin: {0:s} (PID: {1:d}) ' 'with error: {2!s}').format(process_name, process.pid, exception)) process.terminate() return None self._RegisterProcess(process) return process
def _StartAnalysisProcesses(self, analysis_plugins): """Starts the analysis processes. Args: analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that should be run and their names. """ logger.info('Starting analysis plugins.') for analysis_plugin in analysis_plugins.values(): self._analysis_plugins[analysis_plugin.NAME] = analysis_plugin process = self._StartWorkerProcess(analysis_plugin.NAME) if not process: logger.error('Unable to create analysis process: {0:s}'.format( analysis_plugin.NAME)) logger.info('Analysis plugins running')
def _KillProcess(self, pid): """Issues a SIGKILL or equivalent to the process. Args: pid (int): process identifier (PID). """ if sys.platform.startswith('win'): process_terminate = 1 handle = ctypes.windll.kernel32.OpenProcess( process_terminate, False, pid) ctypes.windll.kernel32.TerminateProcess(handle, -1) ctypes.windll.kernel32.CloseHandle(handle) else: try: os.kill(pid, signal.SIGKILL) except OSError as exception: logger.error('Unable to kill process {0:d} with error: {1!s}'.format( pid, exception))
def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec): """Processes a path specification. Args: extraction_worker (worker.ExtractionWorker): extraction worker. parser_mediator (ParserMediator): parser mediator. path_spec (dfvfs.PathSpec): path specification. """ excluded_find_specs = None if self._collection_filters_helper: excluded_find_specs = (self._collection_filters_helper. excluded_file_system_find_specs) self._current_display_name = parser_mediator.GetDisplayNameForPathSpec( path_spec) try: self._CacheFileSystem(path_spec) extraction_worker.ProcessPathSpec( parser_mediator, path_spec, excluded_find_specs=excluded_find_specs) except dfvfs_errors.CacheFullError: # TODO: signal engine of failure. self._abort = True logger.error(( 'ABORT: detected cache full error while processing path spec: ' '{0:s}').format(self._current_display_name)) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning( ('unable to process path specification with error: ' '{0!s}').format(exception), path_spec=path_spec) if self._processing_configuration.debug_output: logger.warning(( 'Unhandled exception while processing path specification: ' '{0:s}.').format(self._current_display_name)) logger.exception(exception)
def _StartProcessStatusRPCServer(self): """Starts the process status RPC server.""" if self._rpc_server: return self._rpc_server = plaso_xmlrpc.XMLProcessStatusRPCServer( self._GetStatus) hostname = 'localhost' # Try the PID as port number first otherwise pick something random # between 1024 and 60000. if self._pid < 1024 or self._pid > 60000: port = random.randint(1024, 60000) else: port = self._pid if not self._rpc_server.Start(hostname, port): port = 0 for _ in range(self._NUMBER_OF_RPC_SERVER_START_ATTEMPTS): port = random.randint(1024, 60000) if self._rpc_server.Start(hostname, port): break port = 0 if not port: logger.error( ('Unable to start a process status RPC server for {0!s} ' '(PID: {1:d})').format(self._name, self._pid)) self._rpc_server = None return self.rpc_port.value = port logger.debug('Process: {0!s} process status RPC server started'.format( self._name))
def _CheckStatusAnalysisProcess(self, pid): """Checks the status of an analysis process. Args: pid (int): process ID (PID) of a registered analysis process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) if pid in self._completed_analysis_processes: status_indicator = definitions.STATUS_INDICATOR_COMPLETED process_status = {'processing_status': status_indicator} used_memory = 0 else: process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format(process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) if status_indicator == definitions.STATUS_INDICATOR_COMPLETED: self._completed_analysis_processes.add(pid) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.STATUS_INDICATOR_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.STATUS_INDICATOR_KILLED process_status = { 'processing_status': processing_status_string } self._UpdateProcessingStatus(pid, process_status, used_memory) if status_indicator in definitions.ERROR_STATUS_INDICATORS: logger.error( ('Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format(process.name, pid, status_indicator)) self._TerminateProcessByPid(pid)
def ProcessSources(self, source_configurations, storage_writer, session_identifier, processing_configuration, enable_sigsegv_handler=False, status_update_callback=None, storage_file_path=None): """Processes the sources and extract events. Args: source_configurations (list[SourceConfigurationArtifact]): configurations of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. session_identifier (str): the identifier of the session the tasks are part of. processing_configuration (ProcessingConfiguration): processing configuration. enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler should be enabled. status_update_callback (Optional[function]): callback function for status updates. storage_file_path (Optional[str]): path to the session storage file. Returns: ProcessingStatus: processing status. """ self._enable_sigsegv_handler = enable_sigsegv_handler # Keep track of certain values so we can spawn new extraction workers. self._processing_configuration = processing_configuration self._debug_output = processing_configuration.debug_output self._log_filename = processing_configuration.log_filename self._status_update_callback = status_update_callback self._storage_file_path = storage_file_path self._storage_writer = storage_writer self._task_storage_format = processing_configuration.task_storage_format # Set up the task queue. task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=True, linger_seconds=0, maximum_items=1, name='main_task_queue', timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS) self._task_queue = task_outbound_queue # The ZeroMQ backed queue must be started first, so we can save its port. # TODO: raises: attribute-defined-outside-init # self._task_queue.name = 'Task queue' self._task_queue.Open() self._task_queue_port = self._task_queue.port # Set up the task storage before the worker processes. self._StartTaskStorage(self._task_storage_format) for worker_number in range(self._number_of_worker_processes): process_name = 'Worker_{0:02d}'.format(self._last_worker_number) worker_process = self._StartWorkerProcess(process_name) if not worker_process: logger.error('Unable to create worker process: {0:d}'.format( worker_number)) self._StartProfiling(self._processing_configuration.profiling) self._task_manager.StartProfiling( self._processing_configuration.profiling, self._name) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) self._StartStatusUpdateThread() self._parsers_counter = collections.Counter({ parser_count.name: parser_count for parser_count in self._storage_writer.GetAttributeContainers( 'parser_count') }) try: self._ProcessSources(source_configurations, storage_writer, session_identifier) finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) self._task_manager.StopProfiling() self._StopProfiling() try: self._StopExtractionProcesses(abort=self._abort) except KeyboardInterrupt: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) # The task queue should be closed by _StopExtractionProcesses, this # close is a failsafe. self._task_queue.Close(abort=True) if self._processing_status.error_path_specs: task_storage_abort = True else: task_storage_abort = self._abort try: self._StopTaskStorage(self._task_storage_format, session_identifier, abort=task_storage_abort) except (IOError, OSError) as exception: logger.error( 'Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Processing aborted.') self._processing_status.aborted = True else: logger.debug('Processing completed.') # Update the status view one last time. self._UpdateStatus() # Reset values. self._enable_sigsegv_handler = None self._processing_configuration = None self._status_update_callback = None self._storage_file_path = None self._storage_writer = None self._task_storage_format = None return self._processing_status
def _MergeTaskStorage(self, storage_writer, session_identifier, maximum_number_of_containers=0): """Merges a task storage with the session storage. This function checks all task stores that are ready to merge and updates the scheduled tasks. Note that to prevent this function holding up the task scheduling loop only the first available task storage is merged. Args: storage_writer (StorageWriter): storage writer for a session storage used to merge task storage. session_identifier (str): the identifier of the session the tasks are part of. maximum_number_of_containers (Optional[int]): maximum number of containers to merge, where 0 represent no limit. Returns: int: number of containers merged. """ if self._processing_profiler: self._processing_profiler.StartTiming('merge_check') task_identifiers = self._GetProcessedTaskIdentifiers( self._task_storage_format, session_identifier) for task_identifier in task_identifiers: try: task = self._task_manager.GetProcessedTaskByIdentifier( task_identifier) self._task_manager.SampleTaskStatus(task, 'processed') to_merge = self._task_manager.CheckTaskToMerge(task) if not to_merge: self._RemoveProcessedTaskStorage(self._task_storage_format, task) self._task_manager.RemoveTask(task) self._task_manager.SampleTaskStatus( task, 'removed_processed') else: self._PrepareMergeTaskStorage(self._task_storage_format, task) self._task_manager.UpdateTaskAsPendingMerge(task) except KeyError as exception: logger.error( 'Unable to retrieve task: {0:s} to prepare it to be merged ' 'with error: {1!s}.'.format(task_identifier, exception)) continue if self._processing_profiler: self._processing_profiler.StopTiming('merge_check') number_of_containers = 0 task = None if not self._storage_merge_reader_on_hold: task = self._task_manager.GetTaskPendingMerge(self._merge_task) if task or self._storage_merge_reader: self._status = definitions.STATUS_INDICATOR_MERGING if self._processing_profiler: self._processing_profiler.StartTiming('merge') if task: if self._storage_merge_reader: self._merge_task_on_hold = self._merge_task self._storage_merge_reader_on_hold = self._storage_merge_reader self._task_manager.SampleTaskStatus( self._merge_task_on_hold, 'merge_on_hold') self._merge_task = task try: self._storage_merge_reader = self._StartMergeTaskStorage( self._storage_writer, self._task_storage_format, task) self._task_manager.SampleTaskStatus(task, 'merge_started') except IOError as exception: logger.error( ('Unable to merge results of task: {0:s} ' 'with error: {1!s}').format(task.identifier, exception)) self._storage_merge_reader = None if self._storage_merge_reader: fully_merged = self._storage_merge_reader.MergeAttributeContainers( maximum_number_of_containers=maximum_number_of_containers) number_of_containers = self._storage_merge_reader.number_of_containers else: # TODO: Do something more sensible when this happens, perhaps # retrying the task once that is implemented. For now, we mark the task # as fully merged because we can't continue with it. fully_merged = True if self._processing_profiler: self._processing_profiler.StopTiming('merge') if fully_merged: self._storage_merge_reader.Close() for key, value in self._storage_merge_reader.parsers_counter.items( ): parser_count = self._parsers_counter.get(key, None) if parser_count: parser_count.number_of_events += value self._storage_writer.UpdateAttributeContainer( parser_count) else: parser_count = counts.ParserCount( name=key, number_of_events=value) self._parsers_counter[key] = parser_count self._storage_writer.AddAttributeContainer( parser_count) self._RemoveMergeTaskStorage(self._task_storage_format, self._merge_task) try: self._task_manager.CompleteTask(self._merge_task) except KeyError as exception: logger.error( 'Unable to complete task: {0:s} with error: {1!s}'. format(self._merge_task.identifier, exception)) if not self._storage_merge_reader_on_hold: self._merge_task = None self._storage_merge_reader = None else: self._merge_task = self._merge_task_on_hold self._storage_merge_reader = self._storage_merge_reader_on_hold self._merge_task_on_hold = None self._storage_merge_reader_on_hold = None self._task_manager.SampleTaskStatus( self._merge_task, 'merge_resumed') self._status = definitions.STATUS_INDICATOR_RUNNING self._number_of_produced_events = storage_writer.number_of_events self._number_of_produced_sources = storage_writer.number_of_event_sources self._number_of_produced_extraction_warnings = ( storage_writer.number_of_extraction_warnings) return number_of_containers
def _MergeAttributeContainer(self, storage_writer, merge_helper, container): """Merges an attribute container from a task store into the storage writer. Args: storage_writer (StorageWriter): storage writer. merge_helper (ExtractionTaskMergeHelper): helper to merge attribute containers. container (AttributeContainer): attribute container. """ if container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT: event_data_identifier = container.GetEventDataIdentifier() event_data_lookup_key = event_data_identifier.CopyToString() event_data_identifier = merge_helper.GetAttributeContainerIdentifier( event_data_lookup_key) if event_data_identifier: container.SetEventDataIdentifier(event_data_identifier) else: identifier = container.GetIdentifier() identifier_string = identifier.CopyToString() # TODO: store this as a merge warning so this is preserved # in the storage file. logger.error( ('Unable to merge event attribute container: {0:s} since ' 'corresponding event data: {1:s} could not be found.' ).format(identifier_string, event_data_lookup_key)) return elif container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT_DATA: event_data_stream_identifier = container.GetEventDataStreamIdentifier( ) event_data_stream_lookup_key = None if event_data_stream_identifier: event_data_stream_lookup_key = ( event_data_stream_identifier.CopyToString()) event_data_stream_identifier = ( merge_helper.GetAttributeContainerIdentifier( event_data_stream_lookup_key)) if event_data_stream_identifier: container.SetEventDataStreamIdentifier( event_data_stream_identifier) elif event_data_stream_lookup_key: identifier = container.GetIdentifier() identifier_string = identifier.CopyToString() # TODO: store this as a merge warning so this is preserved # in the storage file. logger.error(( 'Unable to merge event data attribute container: {0:s} since ' 'corresponding event data stream: {1:s} could not be ' 'found.').format(identifier_string, event_data_stream_lookup_key)) return elif container.CONTAINER_TYPE == 'windows_eventlog_message_string': message_file_identifier = container.GetMessageFileIdentifier() message_file_lookup_key = message_file_identifier.CopyToString() message_file_identifier = merge_helper.GetAttributeContainerIdentifier( message_file_lookup_key) if message_file_identifier: container.SetMessageFileIdentifier(message_file_identifier) else: identifier = container.GetIdentifier() identifier_string = identifier.CopyToString() # TODO: store this as a merge warning so this is preserved # in the storage file. logger.error(( 'Unable to merge Windows EventLog message string attribute ' 'container: {0:s} since corresponding Windows EventLog message ' 'file: {1:s} could not be found.').format( identifier_string, message_file_lookup_key)) return lookup_key = None if container.CONTAINER_TYPE in (self._CONTAINER_TYPE_EVENT_DATA, self._CONTAINER_TYPE_EVENT_DATA_STREAM, 'windows_eventlog_message_file'): # Preserve the lookup key before adding it to the attribute container # store. identifier = container.GetIdentifier() lookup_key = identifier.CopyToString() storage_writer.AddAttributeContainer(container) if lookup_key: identifier = container.GetIdentifier() merge_helper.SetAttributeContainerIdentifier( lookup_key, identifier) if container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT: parser_name = merge_helper.event_data_parser_mappings.get( event_data_lookup_key, 'N/A') self._parsers_counter[parser_name] += 1 self._parsers_counter['total'] += 1 self._number_of_produced_events += 1 elif container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT_DATA: parser_name = container.parser.split('/')[-1] merge_helper.event_data_parser_mappings[lookup_key] = parser_name elif container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT_SOURCE: self._number_of_produced_sources += 1
def _Main(self): """The main loop.""" # We need a resolver context per process to prevent multi processing # issues with file objects stored in images. self._resolver_context = context.Context() for credential_configuration in self._processing_configuration.credentials: resolver.Resolver.key_chain.SetCredential( credential_configuration.path_spec, credential_configuration.credential_type, credential_configuration.credential_data) self._parser_mediator = self._CreateParserMediator( self._knowledge_base, self._resolver_context, self._processing_configuration) # We need to initialize the parser and hasher objects after the process # has forked otherwise on Windows the "fork" will fail with # a PickleError for Python modules that cannot be pickled. self._extraction_worker = worker.EventExtractionWorker( parser_filter_expression=( self._processing_configuration.parser_filter_expression)) self._extraction_worker.SetExtractionConfiguration( self._processing_configuration.extraction) self._parser_mediator.StartProfiling( self._processing_configuration.profiling, self._name, self._process_information) self._StartProfiling(self._processing_configuration.profiling) if self._analyzers_profiler: self._extraction_worker.SetAnalyzersProfiler( self._analyzers_profiler) if self._processing_profiler: self._extraction_worker.SetProcessingProfiler( self._processing_profiler) logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format( self._name, self._pid)) self._status = definitions.STATUS_INDICATOR_RUNNING try: logger.debug( '{0!s} (PID: {1:d}) started monitoring task queue.'.format( self._name, self._pid)) while not self._abort: try: task = self._task_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug( 'ConsumeItems exiting with exception: {0!s}.'.format( type(exception))) break if isinstance(task, plaso_queue.QueueAbort): logger.debug( 'ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessTask(task) logger.debug( '{0!s} (PID: {1:d}) stopped monitoring task queue.'.format( self._name, self._pid)) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True if self._analyzers_profiler: self._extraction_worker.SetAnalyzersProfiler(None) if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(None) self._StopProfiling() self._parser_mediator.StopProfiling() self._extraction_worker = None self._file_system_cache = [] self._parser_mediator = None self._resolver_context = None if self._abort: self._status = definitions.STATUS_INDICATOR_ABORTED else: self._status = definitions.STATUS_INDICATOR_COMPLETED logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format( self._name, self._pid)) try: self._task_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format( self.name))
def _GetEventDataContentIdentifier(self, event_data, event_data_stream): """Retrieves the event data content identifier. Args: event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. Returns: str: identifier of the event data content. """ event_data_identifier = event_data.GetIdentifier() lookup_key = event_data_identifier.CopyToString() content_identifier = self._event_data_content_identifier_cache.get( lookup_key, None) if not content_identifier: event_attributes = list(event_data.GetAttributes()) if event_data_stream: event_data_stream_attributes = event_data_stream.GetAttributes( ) event_attributes.extend(event_data_stream_attributes) attributes = ['data_type: {0:s}'.format(event_data.data_type)] for attribute_name, attribute_value in sorted(event_attributes): if (attribute_name in self._IDENTIFIER_EXCLUDED_ATTRIBUTES or attribute_value is None): continue # Note that support for event_data.pathspec is kept for backwards # compatibility. The current value is event_data_stream.path_spec. if attribute_name in ('path_spec', 'pathspec'): attribute_value = attribute_value.comparable elif isinstance(attribute_value, dict): attribute_value = sorted(attribute_value.items()) elif isinstance(attribute_value, set): attribute_value = sorted(list(attribute_value)) elif isinstance(attribute_value, bytes): attribute_value = repr(attribute_value) try: attribute_string = '{0:s}: {1!s}'.format( attribute_name, attribute_value) except UnicodeDecodeError: logger.error('Failed to decode attribute {0:s}'.format( attribute_name)) attributes.append(attribute_string) content_identifier = ', '.join(attributes) if len(self._event_data_content_identifier_cache) >= ( self._MAXIMUM_CACHED_IDENTIFIERS): self._event_data_content_identifier_cache.popitem(last=True) self._event_data_content_identifier_cache[ lookup_key] = content_identifier self._event_data_content_identifier_cache.move_to_end(lookup_key, last=False) return content_identifier
def _CheckStatusWorkerProcess(self, pid): """Checks the status of a worker process. If a worker process is not responding the process is terminated and a replacement process is started. Args: pid (int): process ID (PID) of a registered worker process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format( process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.STATUS_INDICATOR_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.STATUS_INDICATOR_KILLED process_status = { 'processing_status': processing_status_string} self._UpdateProcessingStatus(pid, process_status, used_memory) # _UpdateProcessingStatus can also change the status of the worker, # So refresh the status if applicable. for worker_status in self._processing_status.workers_status: if worker_status.pid == pid: status_indicator = worker_status.status break if status_indicator in definitions.ERROR_STATUS_INDICATORS: logger.error(( 'Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format(process.name, pid, status_indicator)) self._TerminateProcessByPid(pid) replacement_process = None for replacement_process_attempt in range( self._MAXIMUM_REPLACEMENT_RETRIES): logger.info(( 'Attempt: {0:d} to start replacement worker process for ' '{1:s}').format(replacement_process_attempt + 1, process.name)) replacement_process = self._StartWorkerProcess(process.name) if replacement_process: break time.sleep(self._REPLACEMENT_WORKER_RETRY_DELAY) if not replacement_process: logger.error( 'Unable to create replacement worker process for: {0:s}'.format( process.name))
def _Main(self): """The main loop.""" self._StartProfiling(self._processing_configuration.profiling) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format( self._name, self._pid)) # Creating the threading event in the constructor will cause a pickle # error on Windows when an analysis process is created. self._foreman_status_wait_event = threading.Event() self._status = definitions.STATUS_INDICATOR_ANALYZING task = tasks.Task() task.storage_format = definitions.STORAGE_FORMAT_SQLITE # TODO: temporary solution. task.identifier = self._analysis_plugin.plugin_name self._task = task task_storage_writer = self._storage_factory.CreateTaskStorageWriter( definitions.STORAGE_FORMAT_SQLITE) if self._serializers_profiler: task_storage_writer.SetSerializersProfiler( self._serializers_profiler) if self._storage_profiler: task_storage_writer.SetStorageProfiler(self._storage_profiler) storage_file_path = self._GetTaskStorageFilePath( definitions.STORAGE_FORMAT_SQLITE, task) task_storage_writer.Open(path=storage_file_path) self._analysis_mediator = self._CreateAnalysisMediator( self._session, self._knowledge_base, self._processing_configuration, self._data_location) self._analysis_mediator.SetStorageWriter(task_storage_writer) # TODO: set event_filter_expression in mediator. task_storage_writer.AddAttributeContainer(task) try: logger.debug( '{0!s} (PID: {1:d}) started monitoring event queue.'.format( self._name, self._pid)) while not self._abort: try: queued_object = self._event_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug( 'ConsumeItems exiting with exception {0!s}.'.format( type(exception))) break if isinstance(queued_object, plaso_queue.QueueAbort): logger.debug( 'ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessEvent(self._analysis_mediator, *queued_object) self._number_of_consumed_events += 1 logger.debug( '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format( self._name, self._pid)) if not self._abort: self._status = definitions.STATUS_INDICATOR_REPORTING self._analysis_mediator.ProduceAnalysisReport( self._analysis_plugin) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True finally: task.aborted = self._abort task_storage_writer.UpdateAttributeContainer(task) task_storage_writer.Close() if self._serializers_profiler: task_storage_writer.SetSerializersProfiler(None) if self._storage_profiler: task_storage_writer.SetStorageProfiler(None) try: self._FinalizeTaskStorageWriter(definitions.STORAGE_FORMAT_SQLITE, task) except IOError as exception: logger.warning( 'Unable to finalize task storage with error: {0!s}'.format( exception)) if self._abort: self._status = definitions.STATUS_INDICATOR_ABORTED else: self._status = definitions.STATUS_INDICATOR_COMPLETED logger.debug('Wait for foreman status wait event') self._foreman_status_wait_event.clear() self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format( self._name, self._pid)) self._StopProfiling() self._analysis_mediator = None self._foreman_status_wait_event = None self._task = None try: self._event_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format( self.name))
def _MergeTaskStorage(self, storage_writer, session_identifier): """Merges a task storage with the session storage. This function checks all task stores that are ready to merge and updates the scheduled tasks. Note that to prevent this function holding up the task scheduling loop only the first available task storage is merged. Args: storage_writer (StorageWriter): storage writer for a session storage used to merge task storage. session_identifier (str): the identifier of the session the tasks are part of. """ if self._processing_profiler: self._processing_profiler.StartTiming('merge_check') task_identifiers = self._GetProcessedTaskIdentifiers( self._task_storage_format, session_identifier) for task_identifier in task_identifiers: try: task = self._task_manager.GetProcessedTaskByIdentifier( task_identifier) self._task_manager.SampleTaskStatus(task, 'processed') to_merge = self._task_manager.CheckTaskToMerge(task) if not to_merge: self._RemoveProcessedTaskStorage(self._task_storage_format, task) self._task_manager.RemoveTask(task) self._task_manager.SampleTaskStatus( task, 'removed_processed') else: self._PrepareMergeTaskStorage(self._task_storage_format, task) self._task_manager.UpdateTaskAsPendingMerge(task) except KeyError as exception: logger.error( 'Unable to retrieve task: {0:s} to prepare it to be merged ' 'with error: {1!s}.'.format(task_identifier, exception)) continue if self._processing_profiler: self._processing_profiler.StopTiming('merge_check') task = None if not self._task_merge_helper_on_hold: task = self._task_manager.GetTaskPendingMerge(self._merge_task) if task or self._task_merge_helper: self._status = definitions.STATUS_INDICATOR_MERGING if self._processing_profiler: self._processing_profiler.StartTiming('merge') if task: if self._task_merge_helper: self._merge_task_on_hold = self._merge_task self._task_merge_helper_on_hold = self._task_merge_helper self._task_manager.SampleTaskStatus( self._merge_task_on_hold, 'merge_on_hold') self._merge_task = task try: task_storage_reader = self._GetMergeTaskStorage( self._task_storage_format, task) self._task_merge_helper = merge_helpers.ExtractionTaskMergeHelper( task_storage_reader, task.identifier) self._task_manager.SampleTaskStatus(task, 'merge_started') except IOError as exception: logger.error( ('Unable to merge results of task: {0:s} ' 'with error: {1!s}').format(task.identifier, exception)) self._task_merge_helper = None if self._task_merge_helper: merge_duration = time.time() number_of_containers = self._MergeAttributeContainers( storage_writer, self._task_merge_helper, maximum_number_of_containers=self. _maximum_number_of_containers) merge_duration = time.time() - merge_duration fully_merged = self._task_merge_helper.fully_merged if merge_duration > 0.0 and number_of_containers > 0: # Limit the number of attribute containers from a single task-based # storage file that are merged per loop to keep tasks flowing. containers_per_second = number_of_containers / merge_duration maximum_number_of_containers = int(0.5 * containers_per_second) if fully_merged: self._maximum_number_of_containers = max( self._maximum_number_of_containers, maximum_number_of_containers) else: self._maximum_number_of_containers = maximum_number_of_containers else: # TODO: Do something more sensible when this happens, perhaps # retrying the task once that is implemented. For now, we mark the task # as fully merged because we can't continue with it. fully_merged = True if self._processing_profiler: self._processing_profiler.StopTiming('merge') if fully_merged: self._task_merge_helper.Close() self._RemoveMergeTaskStorage(self._task_storage_format, self._merge_task) try: self._task_manager.CompleteTask(self._merge_task) except KeyError as exception: logger.error( 'Unable to complete task: {0:s} with error: {1!s}'. format(self._merge_task.identifier, exception)) if not self._task_merge_helper_on_hold: self._merge_task = None self._task_merge_helper = None else: self._merge_task = self._merge_task_on_hold self._task_merge_helper = self._task_merge_helper_on_hold self._merge_task_on_hold = None self._task_merge_helper_on_hold = None self._task_manager.SampleTaskStatus( self._merge_task, 'merge_resumed') self._status = definitions.STATUS_INDICATOR_RUNNING
def _UpdateProcessingStatus(self, pid, process_status, used_memory): """Updates the processing status. Args: pid (int): process identifier (PID) of the worker process. process_status (dict[str, object]): status values received from the worker process. used_memory (int): size of used memory in bytes. Raises: KeyError: if the process is not registered with the engine. """ self._RaiseIfNotRegistered(pid) if not process_status: return process = self._processes_per_pid[pid] status_indicator = process_status.get('processing_status', None) self._RaiseIfNotMonitored(pid) display_name = process_status.get('display_name', '') number_of_consumed_event_tags = process_status.get( 'number_of_consumed_event_tags', None) number_of_produced_event_tags = process_status.get( 'number_of_produced_event_tags', None) number_of_consumed_events = process_status.get( 'number_of_consumed_events', None) number_of_produced_events = process_status.get( 'number_of_produced_events', None) number_of_consumed_reports = process_status.get( 'number_of_consumed_reports', None) number_of_produced_reports = process_status.get( 'number_of_produced_reports', None) number_of_consumed_sources = process_status.get( 'number_of_consumed_sources', None) number_of_produced_sources = process_status.get( 'number_of_produced_sources', None) if status_indicator != definitions.STATUS_INDICATOR_IDLE: last_activity_timestamp = process_status.get( 'last_activity_timestamp', 0.0) if last_activity_timestamp: last_activity_timestamp += self._worker_timeout current_timestamp = time.time() if current_timestamp > last_activity_timestamp: logger.error(( 'Process {0:s} (PID: {1:d}) has not reported activity within ' 'the timeout period.').format(process.name, pid)) status_indicator = definitions.STATUS_INDICATOR_NOT_RESPONDING self._processing_status.UpdateWorkerStatus( process.name, status_indicator, pid, used_memory, display_name, number_of_consumed_sources, number_of_produced_sources, number_of_consumed_events, number_of_produced_events, number_of_consumed_event_tags, number_of_produced_event_tags, number_of_consumed_reports, number_of_produced_reports)
def AnalyzeEvents(self, session, knowledge_base_object, storage_writer, data_location, analysis_plugins, processing_configuration, event_filter=None, event_filter_expression=None, status_update_callback=None, storage_file_path=None): """Analyzes events in a Plaso storage. Args: session (Session): session in which the events are analyzed. knowledge_base_object (KnowledgeBase): contains information from the source data needed for processing. storage_writer (StorageWriter): storage writer. data_location (str): path to the location that data files should be loaded from. analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that should be run and their names. processing_configuration (ProcessingConfiguration): processing configuration. event_filter (Optional[EventObjectFilter]): event filter. event_filter_expression (Optional[str]): event filter expression. status_update_callback (Optional[function]): callback function for status updates. storage_file_path (Optional[str]): path to the session storage file. Returns: ProcessingStatus: processing status. Raises: KeyboardInterrupt: if a keyboard interrupt was raised. ValueError: if analysis plugins are missing. """ if not analysis_plugins: raise ValueError('Missing analysis plugins') abort_kill = False keyboard_interrupt = False queue_full = False self._analysis_plugins = {} self._data_location = data_location self._event_filter_expression = event_filter_expression self._events_status = processing_status.EventsStatus() self._knowledge_base = knowledge_base_object self._processing_configuration = processing_configuration self._session = session self._status_update_callback = status_update_callback self._storage_file_path = storage_file_path stored_event_labels_counter = {} if storage_writer.HasAttributeContainers('event_label_count'): stored_event_labels_counter = { event_label_count.label: event_label_count for event_label_count in storage_writer.GetAttributeContainers( 'event_label_count') } self._event_labels_counter = collections.Counter() if storage_writer.HasAttributeContainers('parser_count'): parsers_counter = { parser_count.name: parser_count.number_of_events for parser_count in storage_writer.GetAttributeContainers( 'parser_count') } total_number_of_events = parsers_counter['total'] else: total_number_of_events = 0 for stored_session in storage_writer.GetSessions(): total_number_of_events += stored_session.parsers_counter[ 'total'] self._events_status.total_number_of_events = total_number_of_events # Set up the storage writer before the analysis processes. self._StartTaskStorage(definitions.STORAGE_FORMAT_SQLITE) self._StartAnalysisProcesses(analysis_plugins) self._StartProfiling(self._processing_configuration.profiling) # Start the status update thread after open of the storage writer # so we don't have to clean up the thread if the open fails. self._StartStatusUpdateThread() try: self._AnalyzeEvents(storage_writer, analysis_plugins, event_filter=event_filter) for key, value in self._event_labels_counter.items(): event_label_count = stored_event_labels_counter.get(key, None) if event_label_count: event_label_count.number_of_events += value storage_writer.UpdateAttributeContainer(event_label_count) else: event_label_count = counts.EventLabelCount( label=key, number_of_events=value) storage_writer.AddAttributeContainer(event_label_count) self._status = definitions.STATUS_INDICATOR_FINALIZING except errors.QueueFull: queue_full = True self._abort = True except KeyboardInterrupt: keyboard_interrupt = True self._abort = True finally: self._processing_status.aborted = self._abort session.aborted = self._abort # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() self._StopProfiling() # Update the status view one last time before the analysis processses are # stopped. self._UpdateStatus() if queue_full: # TODO: handle abort on queue full more elegant. abort_kill = True else: try: self._StopAnalysisProcesses(abort=self._abort) except KeyboardInterrupt: keyboard_interrupt = True abort_kill = True if abort_kill: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) try: self._StopTaskStorage(definitions.STORAGE_FORMAT_SQLITE, session.identifier, abort=self._abort) except (IOError, OSError) as exception: logger.error( 'Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Analysis aborted.') else: logger.debug('Analysis completed.') # Update the status view one last time. self._UpdateStatus() # Reset values. self._analysis_plugins = {} self._data_location = None self._event_filter_expression = None self._knowledge_base = None self._processing_configuration = None self._session = None self._status_update_callback = None self._storage_file_path = None if keyboard_interrupt: raise KeyboardInterrupt return self._processing_status
def __init__(self, maximum_number_of_tasks=None, number_of_worker_processes=0, worker_memory_limit=None, worker_timeout=None): """Initializes an engine. Args: maximum_number_of_tasks (Optional[int]): maximum number of concurrent tasks, where 0 represents no limit. number_of_worker_processes (Optional[int]): number of worker processes. worker_memory_limit (Optional[int]): maximum amount of memory a worker is allowed to consume, where None represents the default memory limit and 0 represents no limit. worker_timeout (Optional[float]): number of minutes before a worker process that is not providing status updates is considered inactive, where None or 0.0 represents the default timeout. """ if maximum_number_of_tasks is None: maximum_number_of_tasks = self._MAXIMUM_NUMBER_OF_TASKS if number_of_worker_processes < 1: # One worker for each "available" CPU (minus other processes). # The number here is derived from the fact that the engine starts up: # * A main process. # # If we want to utilize all CPUs on the system we therefore need to start # up workers that amounts to the total number of CPUs - the other # processes. try: cpu_count = multiprocessing.cpu_count() - 1 if cpu_count <= self._WORKER_PROCESSES_MINIMUM: cpu_count = self._WORKER_PROCESSES_MINIMUM elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM: cpu_count = self._WORKER_PROCESSES_MAXIMUM except NotImplementedError: logger.error(( 'Unable to determine number of CPUs defaulting to {0:d} worker ' 'processes.').format(self._WORKER_PROCESSES_MINIMUM)) cpu_count = self._WORKER_PROCESSES_MINIMUM number_of_worker_processes = cpu_count if worker_memory_limit is None: worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT if not worker_timeout: worker_timeout = definitions.DEFAULT_WORKER_TIMEOUT super(ExtractionMultiProcessEngine, self).__init__() self._enable_sigsegv_handler = False self._last_worker_number = 0 self._maximum_number_of_containers = 50 self._maximum_number_of_tasks = maximum_number_of_tasks self._merge_task = None self._merge_task_on_hold = None self._number_of_consumed_events = 0 self._number_of_consumed_event_tags = 0 self._number_of_consumed_extraction_warnings = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_produced_events = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_extraction_warnings = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_worker_processes = number_of_worker_processes self._parsers_counter = None self._path_spec_extractor = extractors.PathSpecExtractor() self._resolver_context = context.Context() self._status = definitions.STATUS_INDICATOR_IDLE self._storage_merge_reader = None self._storage_merge_reader_on_hold = None self._task_manager = task_manager.TaskManager() self._task_queue = None self._task_queue_port = None self._task_storage_format = None self._worker_memory_limit = worker_memory_limit self._worker_timeout = worker_timeout