def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec): """Processes a path specification. Args: extraction_worker (worker.ExtractionWorker): extraction worker. parser_mediator (ParserMediator): parser mediator. path_spec (dfvfs.PathSpec): path specification. """ self._current_display_name = parser_mediator.GetDisplayNameForPathSpec( path_spec) try: extraction_worker.ProcessPathSpec(parser_mediator, path_spec) except dfvfs_errors.CacheFullError: # TODO: signal engine of failure. self._abort = True logger.error(( 'ABORT: detected cache full error while processing path spec: ' '{0:s}').format(self._current_display_name)) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionError(( 'unable to process path specification with error: ' '{0!s}').format(exception), path_spec=path_spec) if self._processing_configuration.debug_output: logger.warning(( 'Unhandled exception while processing path specification: ' '{0:s}.').format(self._current_display_name)) logger.exception(exception)
def __init__(self, maximum_number_of_queued_items=0, timeout=None): """Initializes a multi-processing queue. Args: maximum_number_of_queued_items (Optional[int]): maximum number of queued items, where 0 represents no limit. timeout (Optional[float]): number of seconds for the get to time out, where None will block until a new item is put onto the queue. """ super(MultiProcessingQueue, self).__init__() self._timeout = timeout # maxsize contains the maximum number of items allowed to be queued, # where 0 represents unlimited. # We need to check that we aren't asking for a bigger queue than the # platform supports, which requires access to this internal # multiprocessing value. # pylint: disable=no-member,protected-access queue_max_length = _multiprocessing.SemLock.SEM_VALUE_MAX # pylint: enable=no-member,protected-access if maximum_number_of_queued_items > queue_max_length: logger.warning(( 'Requested maximum queue size: {0:d} is larger than the maximum ' 'size supported by the system. Defaulting to: {1:d}').format( maximum_number_of_queued_items, queue_max_length)) maximum_number_of_queued_items = queue_max_length # This queue appears not to be FIFO. self._queue = multiprocessing.Queue(maxsize=maximum_number_of_queued_items)
def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec): """Processes a path specification. Args: extraction_worker (worker.ExtractionWorker): extraction worker. parser_mediator (ParserMediator): parser mediator. path_spec (dfvfs.PathSpec): path specification. """ self._current_display_name = parser_mediator.GetDisplayNameForPathSpec( path_spec) try: extraction_worker.ProcessPathSpec(parser_mediator, path_spec) except dfvfs_errors.CacheFullError: # TODO: signal engine of failure. self._abort = True logger.error(( 'ABORT: detected cache full error while processing path spec: ' '{0:s}').format(self._current_display_name)) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning(( 'unable to process path specification with error: ' '{0!s}').format(exception), path_spec=path_spec) if self._processing_configuration.debug_output: logger.warning(( 'Unhandled exception while processing path specification: ' '{0:s}.').format(self._current_display_name)) logger.exception(exception)
def _AbortTerminate(self): """Aborts all registered processes by sending a SIGTERM or equivalent.""" for pid, process in iter(self._processes_per_pid.items()): if not process.is_alive(): continue logger.warning('Terminating process: {0:s} (PID: {1:d}).'.format( process.name, pid)) process.terminate()
def _AbortKill(self): """Aborts all registered processes by sending a SIGKILL or equivalent.""" for pid, process in iter(self._processes_per_pid.items()): if not process.is_alive(): continue logger.warning('Killing process: {0:s} (PID: {1:d}).'.format( process.name, pid)) self._KillProcess(pid)
def _TerminateProcess(self, process): """Terminate a process. Args: process (MultiProcessBaseProcess): process to terminate. """ pid = process.pid logger.warning('Terminating process: (PID: {0:d}).'.format(pid)) process.terminate() # Wait for the process to exit. process.join(timeout=self._PROCESS_JOIN_TIMEOUT) if process.is_alive(): logger.warning('Killing process: (PID: {0:d}).'.format(pid)) self._KillProcess(pid)
def CallFunction(self): """Calls the function via RPC.""" if self._xmlrpc_proxy is None: return None rpc_call = getattr(self._xmlrpc_proxy, self._RPC_FUNCTION_NAME, None) if rpc_call is None: return None try: return rpc_call() # pylint: disable=not-callable except (expat.ExpatError, SocketServer.socket.error, xmlrpclib.Fault) as exception: logger.warning( 'Unable to make RPC call with error: {0!s}'.format(exception)) return None
def CallFunction(self): """Calls the function via RPC.""" if self._xmlrpc_proxy is None: return None rpc_call = getattr(self._xmlrpc_proxy, self._RPC_FUNCTION_NAME, None) if rpc_call is None: return None try: return rpc_call() # pylint: disable=not-callable except ( expat.ExpatError, SocketServer.socket.error, xmlrpclib.Fault) as exception: logger.warning('Unable to make RPC call with error: {0!s}'.format( exception)) return None
def _ProcessEvent(self, mediator, event): """Processes an event. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event. """ try: self._analysis_plugin.ExamineEvent(mediator, event) except Exception as exception: # pylint: disable=broad-except self.SignalAbort() # TODO: write analysis error. if self._debug_output: logger.warning('Unhandled exception while processing event object.') logger.exception(exception)
def _StopExtractionProcesses(self, abort=False): """Stops the extraction processes. Args: abort (bool): True to indicated the stop is issued on abort. """ logger.debug('Stopping extraction processes.') self._StopMonitoringProcesses() # Note that multiprocessing.Queue is very sensitive regarding # blocking on either a get or a put. So we try to prevent using # any blocking behavior. if abort: # Signal all the processes to abort. self._AbortTerminate() logger.debug('Emptying task queue.') self._task_queue.Empty() # Wake the processes to make sure that they are not blocking # waiting for the queue new items. for _ in self._processes_per_pid: try: self._task_queue.PushItem(plaso_queue.QueueAbort(), block=False) except errors.QueueFull: logger.warning( 'Task queue full, unable to push abort message.') # Try waiting for the processes to exit normally. self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT) self._task_queue.Close(abort=abort) if not abort: # Check if the processes are still alive and terminate them if necessary. self._AbortTerminate() self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT) self._task_queue.Close(abort=True) # Kill any lingering processes. self._AbortKill()
def _ProcessEvent(self, mediator, event, event_data, event_data_stream): """Processes an event. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. """ try: self._analysis_plugin.ExamineEvent(mediator, event, event_data, event_data_stream) except Exception as exception: # pylint: disable=broad-except # TODO: write analysis error and change logger to debug only. logger.warning( 'Unhandled exception while processing event object.') logger.exception(exception)
def Open(self, hostname, port): """Opens a RPC communication channel to the server. Args: hostname: the hostname or IP address to connect to for requests. port: the port to connect to for requests. Returns: A boolean indicating if the communication channel was established. """ server_url = 'http://{0:s}:{1:d}'.format(hostname, port) try: self._xmlrpc_proxy = xmlrpclib.ServerProxy(server_url, allow_none=True) except SocketServer.socket.error as exception: logger.warning( ('Unable to connect to RPC server on {0:s}:{1:d} with error: ' '{2:s}').format(hostname, port, exception)) return False return True
def _StopExtractionProcesses(self, abort=False): """Stops the extraction processes. Args: abort (bool): True to indicated the stop is issued on abort. """ logger.debug('Stopping extraction processes.') self._StopMonitoringProcesses() # Note that multiprocessing.Queue is very sensitive regarding # blocking on either a get or a put. So we try to prevent using # any blocking behavior. if abort: # Signal all the processes to abort. self._AbortTerminate() logger.debug('Emptying task queue.') self._task_queue.Empty() # Wake the processes to make sure that they are not blocking # waiting for the queue new items. for _ in self._processes_per_pid: try: self._task_queue.PushItem(plaso_queue.QueueAbort(), block=False) except errors.QueueFull: logger.warning('Task queue full, unable to push abort message.') # Try waiting for the processes to exit normally. self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT) self._task_queue.Close(abort=abort) if not abort: # Check if the processes are still alive and terminate them if necessary. self._AbortTerminate() self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT) self._task_queue.Close(abort=True) # Kill any lingering processes. self._AbortKill()
def _Open(self, hostname, port): """Opens the RPC communication channel for clients. Args: hostname (str): hostname or IP address to connect to for requests. port (int): port to connect to for requests. Returns: bool: True if the communication channel was successfully opened. """ try: self._xmlrpc_server = SimpleXMLRPCServer.SimpleXMLRPCServer( (hostname, port), logRequests=False, allow_none=True) except SocketServer.socket.error as exception: logger.warning( ('Unable to bind a RPC server on {0:s}:{1:d} with error: ' '{2!s}').format(hostname, port, exception)) return False self._xmlrpc_server.register_function(self._callback, self._RPC_FUNCTION_NAME) return True
def _Open(self, hostname, port): """Opens the RPC communication channel for clients. Args: hostname (str): hostname or IP address to connect to for requests. port (int): port to connect to for requests. Returns: bool: True if the communication channel was successfully opened. """ try: self._xmlrpc_server = SimpleXMLRPCServer.SimpleXMLRPCServer( (hostname, port), logRequests=False, allow_none=True) except SocketServer.socket.error as exception: logger.warning(( 'Unable to bind a RPC server on {0:s}:{1:d} with error: ' '{2!s}').format(hostname, port, exception)) return False self._xmlrpc_server.register_function( self._callback, self._RPC_FUNCTION_NAME) return True
def UpdateTaskAsPendingMerge(self, task): """Updates the task manager to reflect the task is ready to be merged. Args: task (Task): task. Raises: KeyError: if the task was not processing or abandoned. """ with self._lock: is_processing = task.identifier in self._tasks_processing is_abandoned = task.identifier in self._tasks_abandoned is_queued = task.identifier in self._tasks_queued if not (is_queued or is_abandoned or is_processing): raise KeyError('Status of task {0:s} is unknown.'.format( task.identifier)) self._tasks_pending_merge.PushTask(task) task.UpdateProcessingTime() self._UpdateLastestProcessingTime(task) if is_queued: del self._tasks_queued[task.identifier] if is_processing: del self._tasks_processing[task.identifier] if is_abandoned: del self._tasks_abandoned[task.identifier] if is_abandoned: logger.warning( 'Previously abandoned task {0:s} is now pending merge.'.format( task.identifier)) else: logger.debug('Task {0:s} is pending merge.'.format( task.identifier))
def Open(self, hostname, port): """Opens a RPC communication channel to the server. Args: hostname (str): hostname or IP address to connect to for requests. port (int): port to connect to for requests. Returns: bool: True if the communication channel was established. """ server_url = 'http://{0:s}:{1:d}'.format(hostname, port) try: self._xmlrpc_proxy = xmlrpclib.ServerProxy( server_url, allow_none=True) except SocketServer.socket.error as exception: logger.warning(( 'Unable to connect to RPC server on {0:s}:{1:d} with error: ' '{2!s}').format(hostname, port, exception)) return False return True
def _Main(self): """The main loop.""" self._StartProfiling(self._processing_configuration.profiling) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler( self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format( self._name, self._pid)) # Creating the threading event in the constructor will cause a pickle # error on Windows when an analysis process is created. self._foreman_status_wait_event = threading.Event() self._status = definitions.PROCESSING_STATUS_ANALYZING task = tasks.Task() # TODO: temporary solution. task.identifier = self._analysis_plugin.plugin_name self._task = task storage_writer = self._storage_writer.CreateTaskStorage(task) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) storage_writer.Open() self._analysis_mediator = analysis_mediator.AnalysisMediator( storage_writer, self._knowledge_base, data_location=self._data_location) # TODO: set event_filter_expression in mediator. storage_writer.WriteTaskStart() try: logger.debug( '{0!s} (PID: {1:d}) started monitoring event queue.'.format( self._name, self._pid)) while not self._abort: try: event = self._event_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug( 'ConsumeItems exiting with exception {0:s}.'.format( type(exception))) break if isinstance(event, plaso_queue.QueueAbort): logger.debug( 'ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessEvent(self._analysis_mediator, event) self._number_of_consumed_events += 1 if self._guppy_memory_profiler: self._guppy_memory_profiler.Sample() logger.debug( '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format( self._name, self._pid)) if not self._abort: self._status = definitions.PROCESSING_STATUS_REPORTING self._analysis_mediator.ProduceAnalysisReport( self._analysis_plugin) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True finally: storage_writer.WriteTaskCompletion(aborted=self._abort) storage_writer.Close() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) try: self._storage_writer.FinalizeTaskStorage(task) except IOError: pass if self._abort: self._status = definitions.PROCESSING_STATUS_ABORTED else: self._status = definitions.PROCESSING_STATUS_COMPLETED self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format( self._name, self._pid)) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() self._analysis_mediator = None self._foreman_status_wait_event = None self._storage_writer = None self._task = None try: self._event_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format( self.name))
def _CheckStatusWorkerProcess(self, pid): """Checks the status of a worker process. If a worker process is not responding the process is terminated and a replacement process is started. Args: pid (int): process ID (PID) of a registered worker process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format( process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.PROCESSING_STATUS_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.PROCESSING_STATUS_KILLED process_status = { 'processing_status': processing_status_string} self._UpdateProcessingStatus(pid, process_status, used_memory) # _UpdateProcessingStatus can also change the status of the worker, # So refresh the status if applicable. for worker_status in self._processing_status.workers_status: if worker_status.pid == pid: status_indicator = worker_status.status break if status_indicator in definitions.PROCESSING_ERROR_STATUS: logger.error(( 'Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format(process.name, pid, status_indicator)) self._TerminateProcessByPid(pid) logger.info('Starting replacement worker process for {0:s}'.format( process.name)) replacement_process_attempts = 0 replacement_process = None while replacement_process_attempts < self._MAXIMUM_REPLACEMENT_RETRIES: replacement_process_attempts += 1 replacement_process = self._StartWorkerProcess( process.name, self._storage_writer) if not replacement_process: time.sleep(self._REPLACEMENT_WORKER_RETRY_DELAY) break if not replacement_process: logger.error( 'Unable to create replacement worker process for: {0:s}'.format( process.name))
def _GetEventIdentifiers(self, event, event_data, event_data_stream): """Retrieves different identifiers of the event. The event data attributes and values can be represented as a string and used for sorting and uniquely identifying events. This function determines multiple identifiers: * an identifier of the attributes and values without the timestamp description (or usage). This is referred to as the MACB group identifier. * an identifier of the attributes and values including the timestamp description (or usage). This is referred to as the event content identifier. The identifier without the timestamp description can be used to group events that have the same MACB (modification, access, change, birth) timestamps. The PsortEventHeap will store these events individually and relies on PsortMultiProcessEngine to do the actual grouping of events. Args: event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. Returns: tuple: containing: str: identifier of the event MACB group or None if the event cannot be grouped. str: identifier of the event content. """ attributes = [] attribute_string = 'data_type: {0:s}'.format(event_data.data_type) attributes.append(attribute_string) event_attributes = list(event_data.GetAttributes()) if event_data_stream: event_data_stream_attributes = event_data_stream.GetAttributes() event_attributes.extend(event_data_stream_attributes) for attribute_name, attribute_value in sorted(event_attributes): # The filestat parser operates on file entry level and has no event data # stream with a path specification. Therefore we need filename and inode # to make sure events of different file entries are considered unique. if event_data.data_type == 'fs:stat' and attribute_name in ( 'filename', 'inode'): pass elif attribute_name in self._IDENTIFIER_EXCLUDED_ATTRIBUTES: continue if not attribute_value: continue # Note that support for event_data.pathspec is kept for backwards # compatibility. The current value is event_data_stream.path_spec. if attribute_name in ('path_spec', 'pathspec'): attribute_value = attribute_value.comparable elif isinstance(attribute_value, dict): attribute_value = sorted(attribute_value.items()) elif isinstance(attribute_value, set): attribute_value = sorted(list(attribute_value)) elif isinstance(attribute_value, bytes): attribute_value = repr(attribute_value) try: attribute_string = '{0:s}: {1!s}'.format( attribute_name, attribute_value) except UnicodeDecodeError: logger.error('Failed to decode attribute {0:s}'.format( attribute_name)) attributes.append(attribute_string) # The 'atime', 'ctime', 'crtime', 'mtime' are included for backwards # compatibility with the filestat parser. if event.timestamp_desc in ( 'atime', 'ctime', 'crtime', 'mtime', definitions.TIME_DESCRIPTION_LAST_ACCESS, definitions.TIME_DESCRIPTION_CHANGE, definitions.TIME_DESCRIPTION_CREATION, definitions.TIME_DESCRIPTION_MODIFICATION): macb_group_identifier = ', '.join(attributes) else: macb_group_identifier = None timestamp_desc = event.timestamp_desc if timestamp_desc is None: logger.warning('Missing timestamp_desc attribute') timestamp_desc = definitions.TIME_DESCRIPTION_UNKNOWN attributes.insert(0, timestamp_desc) content_identifier = ', '.join(attributes) return macb_group_identifier, content_identifier
def _CheckStatusAnalysisProcess(self, pid): """Checks the status of an analysis process. Args: pid (int): process ID (PID) of a registered analysis process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) if pid in self._completed_analysis_processes: status_indicator = definitions.STATUS_INDICATOR_COMPLETED process_status = { 'processing_status': status_indicator} used_memory = 0 else: process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format( process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) if status_indicator == definitions.STATUS_INDICATOR_COMPLETED: self._completed_analysis_processes.add(pid) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.STATUS_INDICATOR_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.STATUS_INDICATOR_KILLED process_status = { 'processing_status': processing_status_string} self._UpdateProcessingStatus(pid, process_status, used_memory) if status_indicator in definitions.ERROR_STATUS_INDICATORS: logger.error(( 'Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format( process.name, pid, status_indicator)) self._TerminateProcessByPid(pid)
def _CheckStatusAnalysisProcess(self, pid): """Checks the status of an analysis process. Args: pid (int): process ID (PID) of a registered analysis process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) if pid in self._completed_analysis_processes: status_indicator = definitions.PROCESSING_STATUS_COMPLETED process_status = { 'processing_status': status_indicator} used_memory = 0 else: process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format( process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) if status_indicator == definitions.PROCESSING_STATUS_COMPLETED: self._completed_analysis_processes.add(pid) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.PROCESSING_STATUS_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.PROCESSING_STATUS_KILLED process_status = { 'processing_status': processing_status_string} self._UpdateProcessingStatus(pid, process_status, used_memory) if status_indicator in definitions.PROCESSING_ERROR_STATUS: logger.error(( 'Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format( process.name, pid, status_indicator)) self._TerminateProcessByPid(pid)
def _Main(self): """The main loop.""" # We need a resolver context per process to prevent multi processing # issues with file objects stored in images. resolver_context = context.Context() for credential_configuration in self._processing_configuration.credentials: resolver.Resolver.key_chain.SetCredential( credential_configuration.path_spec, credential_configuration.credential_type, credential_configuration.credential_data) self._parser_mediator = parsers_mediator.ParserMediator( None, self._knowledge_base, preferred_year=self._processing_configuration.preferred_year, resolver_context=resolver_context, temporary_directory=self._processing_configuration.temporary_directory) self._parser_mediator.SetEventExtractionConfiguration( self._processing_configuration.event_extraction) self._parser_mediator.SetInputSourceConfiguration( self._processing_configuration.input_source) # We need to initialize the parser and hasher objects after the process # has forked otherwise on Windows the "fork" will fail with # a PickleError for Python modules that cannot be pickled. self._extraction_worker = worker.EventExtractionWorker( parser_filter_expression=( self._processing_configuration.parser_filter_expression)) self._extraction_worker.SetExtractionConfiguration( self._processing_configuration.extraction) self._parser_mediator.StartProfiling( self._processing_configuration.profiling, self._name, self._process_information) self._StartProfiling(self._processing_configuration.profiling) if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(self._processing_profiler) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format( self._name, self._pid)) self._status = definitions.PROCESSING_STATUS_RUNNING try: logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format( self._name, self._pid)) while not self._abort: try: task = self._task_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug('ConsumeItems exiting with exception {0:s}.'.format( type(exception))) break if isinstance(task, plaso_queue.QueueAbort): logger.debug('ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessTask(task) logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format( self._name, self._pid)) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(None) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() self._parser_mediator.StopProfiling() self._extraction_worker = None self._parser_mediator = None self._storage_writer = None if self._abort: self._status = definitions.PROCESSING_STATUS_ABORTED else: self._status = definitions.PROCESSING_STATUS_COMPLETED logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format( self._name, self._pid)) try: self._task_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format(self.name))
def _Main(self): """The main loop.""" # We need a resolver context per process to prevent multi processing # issues with file objects stored in images. resolver_context = context.Context() for credential_configuration in self._processing_configuration.credentials: resolver.Resolver.key_chain.SetCredential( credential_configuration.path_spec, credential_configuration.credential_type, credential_configuration.credential_data) self._parser_mediator = parsers_mediator.ParserMediator( None, self._knowledge_base, artifacts_filter_helper=self._artifacts_filter_helper, preferred_year=self._processing_configuration.preferred_year, resolver_context=resolver_context, temporary_directory=self._processing_configuration.temporary_directory) self._parser_mediator.SetEventExtractionConfiguration( self._processing_configuration.event_extraction) self._parser_mediator.SetInputSourceConfiguration( self._processing_configuration.input_source) # We need to initialize the parser and hasher objects after the process # has forked otherwise on Windows the "fork" will fail with # a PickleError for Python modules that cannot be pickled. self._extraction_worker = worker.EventExtractionWorker( parser_filter_expression=( self._processing_configuration.parser_filter_expression)) self._extraction_worker.SetExtractionConfiguration( self._processing_configuration.extraction) self._parser_mediator.StartProfiling( self._processing_configuration.profiling, self._name, self._process_information) self._StartProfiling(self._processing_configuration.profiling) if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(self._processing_profiler) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format( self._name, self._pid)) self._status = definitions.STATUS_INDICATOR_RUNNING try: logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format( self._name, self._pid)) while not self._abort: try: task = self._task_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug('ConsumeItems exiting with exception {0:s}.'.format( type(exception))) break if isinstance(task, plaso_queue.QueueAbort): logger.debug('ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessTask(task) logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format( self._name, self._pid)) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(None) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() self._parser_mediator.StopProfiling() self._extraction_worker = None self._parser_mediator = None self._storage_writer = None if self._abort: self._status = definitions.STATUS_INDICATOR_ABORTED else: self._status = definitions.STATUS_INDICATOR_COMPLETED logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format( self._name, self._pid)) try: self._task_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format(self.name))
def _Main(self): """The main loop.""" self._StartProfiling(self._processing_configuration.profiling) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format( self._name, self._pid)) # Creating the threading event in the constructor will cause a pickle # error on Windows when an analysis process is created. self._foreman_status_wait_event = threading.Event() self._status = definitions.PROCESSING_STATUS_ANALYZING task = tasks.Task() # TODO: temporary solution. task.identifier = self._analysis_plugin.plugin_name self._task = task storage_writer = self._storage_writer.CreateTaskStorage(task) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) storage_writer.Open() self._analysis_mediator = analysis_mediator.AnalysisMediator( storage_writer, self._knowledge_base, data_location=self._data_location) # TODO: set event_filter_expression in mediator. storage_writer.WriteTaskStart() try: logger.debug( '{0!s} (PID: {1:d}) started monitoring event queue.'.format( self._name, self._pid)) while not self._abort: try: event = self._event_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug('ConsumeItems exiting with exception {0:s}.'.format( type(exception))) break if isinstance(event, plaso_queue.QueueAbort): logger.debug('ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessEvent(self._analysis_mediator, event) self._number_of_consumed_events += 1 if self._guppy_memory_profiler: self._guppy_memory_profiler.Sample() logger.debug( '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format( self._name, self._pid)) if not self._abort: self._status = definitions.PROCESSING_STATUS_REPORTING self._analysis_mediator.ProduceAnalysisReport(self._analysis_plugin) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True finally: storage_writer.WriteTaskCompletion(aborted=self._abort) storage_writer.Close() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) try: self._storage_writer.FinalizeTaskStorage(task) except IOError: pass if self._abort: self._status = definitions.PROCESSING_STATUS_ABORTED else: self._status = definitions.PROCESSING_STATUS_COMPLETED self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format( self._name, self._pid)) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() self._analysis_mediator = None self._foreman_status_wait_event = None self._storage_writer = None self._task = None try: self._event_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format(self.name))