def ProduceExtractionWarning(self, message, path_spec=None): """Produces an extraction warning. Args: message (str): message of the warning. path_spec (Optional[dfvfs.PathSpec]): path specification, where None will use the path specification of current file entry set in the mediator. Raises: RuntimeError: when storage writer is not set. """ if not self._storage_writer: raise RuntimeError('Storage writer not set.') if not path_spec and self._file_entry: path_spec = self._file_entry.path_spec parser_chain = self.GetParserChain() warning = warnings.ExtractionWarning(message=message, parser_chain=parser_chain, path_spec=path_spec) self._storage_writer.AddWarning(warning) self._number_of_warnings += 1 self.last_activity_timestamp = time.time()
def testGetAttributeNames(self): """Tests the GetAttributeNames function.""" attribute_container = warnings.ExtractionWarning() attribute_names = sorted(attribute_container.GetAttributeNames()) self.assertEqual(attribute_names, ['message', 'parser_chain', 'path_spec'])
def testAddWarning(self): """Tests the AddWarning function.""" extraction_warning = warnings.ExtractionWarning( message='Test extraction warning') with shared_test_lib.TempDirectory() as temp_directory: temp_file = os.path.join(temp_directory, 'plaso.sqlite') storage_file = sqlite_file.SQLiteStorageFile() storage_file.Open(path=temp_file, read_only=False) storage_file.AddWarning(extraction_warning) storage_file.Close()
def testAddExtractionWarning(self): """Tests the AddExtractionWarning function.""" session = sessions.Session() warning = warnings.ExtractionWarning(message='Test extraction warning') storage_writer = fake_writer.FakeStorageWriter(session) storage_writer.Open() storage_writer.AddExtractionWarning(warning) storage_writer.Close() with self.assertRaises(IOError): storage_writer.AddExtractionWarning(warning)
def _GetExtractionErrorsAsWarnings(self): """Retrieves errors from from the store, and converts them to warnings. This method is for backwards compatibility with pre-20190309 storage format stores which used ExtractionError attribute containers. Yields: ExtractionWarning: extraction warnings. """ for extraction_error in self._GetAttributeContainers( self._CONTAINER_TYPE_EXTRACTION_ERROR): error_attributes = extraction_error.CopyToDict() warning = warnings.ExtractionWarning() warning.CopyFromDict(error_attributes) yield warning
def testGetWarnings(self): """Tests the GetWarnings function.""" extraction_warning = warnings.ExtractionWarning( message='Test extraction warning') with shared_test_lib.TempDirectory() as temp_directory: temp_file = os.path.join(temp_directory, 'plaso.sqlite') storage_file = sqlite_file.SQLiteStorageFile() storage_file.Open(path=temp_file, read_only=False) storage_file.AddWarning(extraction_warning) storage_file.Close() storage_file = sqlite_file.SQLiteStorageFile() storage_file.Open(path=temp_file) test_warnings = list(storage_file.GetWarnings()) self.assertEqual(len(test_warnings), 1) storage_file.Close()
def _ScheduleTasks(self, storage_writer): """Schedules tasks. Args: storage_writer (StorageWriter): storage writer for a session storage. """ logger.debug('Task scheduler started') self._status = definitions.STATUS_INDICATOR_RUNNING # TODO: make tasks persistent. # TODO: protect task scheduler loop by catch all and # handle abort path. event_source_heap = _EventSourceHeap() self._FillEventSourceHeap(storage_writer, event_source_heap, start_with_first=True) event_source = event_source_heap.PopEventSource() task = None while event_source or self._task_manager.HasPendingTasks(): if self._abort: break try: if not task: task = self._task_manager.CreateRetryTask() if not task and event_source: task = self._task_manager.CreateTask( self._session_identifier) task.file_entry_type = event_source.file_entry_type task.path_spec = event_source.path_spec event_source = None self._number_of_consumed_sources += 1 if self._guppy_memory_profiler: self._guppy_memory_profiler.Sample() if task: if self._ScheduleTask(task): logger.debug( 'Scheduled task {0:s} for path specification {1:s}' .format(task.identifier, task.path_spec.comparable)) self._task_manager.SampleTaskStatus(task, 'scheduled') task = None else: self._task_manager.SampleTaskStatus( task, 'schedule_attempted') self._MergeTaskStorage(storage_writer) if not event_source_heap.IsFull(): self._FillEventSourceHeap(storage_writer, event_source_heap) if not task and not event_source: event_source = event_source_heap.PopEventSource() except KeyboardInterrupt: self._abort = True self._processing_status.aborted = True if self._status_update_callback: self._status_update_callback(self._processing_status) for task in self._task_manager.GetFailedTasks(): warning = warnings.ExtractionWarning( message='Worker failed to process path specification', path_spec=task.path_spec) self._storage_writer.AddWarning(warning) self._processing_status.error_path_specs.append(task.path_spec) self._status = definitions.STATUS_INDICATOR_IDLE if self._abort: logger.debug('Task scheduler aborted') else: logger.debug('Task scheduler stopped')
def _ScheduleTasks(self, storage_writer, session_identifier): """Schedules tasks. Args: storage_writer (StorageWriter): storage writer for a session storage. session_identifier (str): the identifier of the session the tasks are part of. """ logger.debug('Task scheduler started') self._status = definitions.STATUS_INDICATOR_RUNNING # TODO: make tasks persistent. # TODO: protect task scheduler loop by catch all and # handle abort path. event_source_heap = _EventSourceHeap() self._FillEventSourceHeap(storage_writer, event_source_heap, start_with_first=True) event_source = event_source_heap.PopEventSource() task = None has_pending_tasks = True while event_source or has_pending_tasks: if self._abort: break try: if not task: task = self._task_manager.CreateRetryTask() if not task and event_source: task = self._task_manager.CreateTask( session_identifier, storage_format=self._task_storage_format) task.file_entry_type = event_source.file_entry_type task.path_spec = event_source.path_spec event_source = None self._number_of_consumed_sources += 1 if task: if not self._ScheduleTask(task): self._task_manager.SampleTaskStatus( task, 'schedule_attempted') else: path_spec_string = self._GetPathSpecificationString( task.path_spec) logger.debug( 'Scheduled task: {0:s} for path specification: {1:s}' .format(task.identifier, path_spec_string.replace('\n', ' '))) self._task_manager.SampleTaskStatus(task, 'scheduled') task = None # Limit the number of attribute containers from a single task-based # storage file that are merged per loop to keep tasks flowing. merge_duration = time.time() number_of_containers = self._MergeTaskStorage( storage_writer, session_identifier, maximum_number_of_containers=self. _maximum_number_of_containers) merge_duration = time.time() - merge_duration if merge_duration > 0.0 and number_of_containers > 0: containers_per_second = number_of_containers / merge_duration self._maximum_number_of_containers = int( 0.5 * containers_per_second) if not event_source_heap.IsFull(): self._FillEventSourceHeap(storage_writer, event_source_heap) else: logger.debug('Event source heap is full.') if not task and not event_source: event_source = event_source_heap.PopEventSource() has_pending_tasks = self._task_manager.HasPendingTasks() except KeyboardInterrupt: if self._debug_output: traceback.print_exc() self._abort = True self._processing_status.aborted = True if self._status_update_callback: self._status_update_callback(self._processing_status) for task in self._task_manager.GetFailedTasks(): warning = warnings.ExtractionWarning( message='Worker failed to process path specification', path_spec=task.path_spec) self._storage_writer.AddAttributeContainer(warning) self._processing_status.error_path_specs.append(task.path_spec) self._status = definitions.STATUS_INDICATOR_IDLE if self._abort: logger.debug('Task scheduler aborted') else: logger.debug('Task scheduler stopped')