def testProcessSources(self): """Tests the ProcessSources function.""" test_artifacts_path = self._GetTestFilePath(['artifacts']) self._SkipIfPathNotExists(test_artifacts_path) test_file_path = self._GetTestFilePath(['ímynd.dd']) self._SkipIfPathNotExists(test_file_path) test_engine = extraction_engine.SingleProcessEngine() resolver_context = context.Context() os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=os_path_spec) source_configuration = artifacts.SourceConfigurationArtifact( path_spec=source_path_spec) session = sessions.Session() configuration = configurations.ProcessingConfiguration() configuration.parser_filter_expression = 'filestat' storage_writer = fake_writer.FakeStorageWriter() storage_writer.Open() try: test_engine.PreprocessSources(test_artifacts_path, None, [source_path_spec], session, storage_writer) processing_status = test_engine.ProcessSources( [source_configuration], storage_writer, resolver_context, configuration) parsers_counter = collections.Counter({ parser_count.name: parser_count.number_of_events for parser_count in storage_writer.GetAttributeContainers( 'parser_count') }) finally: storage_writer.Close() self.assertFalse(processing_status.aborted) self.assertEqual(storage_writer.number_of_events, 15) self.assertEqual(storage_writer.number_of_extraction_warnings, 0) self.assertEqual(storage_writer.number_of_recovery_warnings, 0) expected_parsers_counter = collections.Counter({ 'filestat': 15, 'total': 15 }) self.assertEqual(parsers_counter, expected_parsers_counter)
def testGetAttributeNames(self): """Tests the GetAttributeNames function.""" attribute_container = artifacts.SourceConfigurationArtifact() expected_attribute_names = [ 'mount_path', 'path_spec', 'system_configuration' ] attribute_names = sorted(attribute_container.GetAttributeNames()) self.assertEqual(attribute_names, expected_attribute_names)
def GetSourceConfigurationArtifacts(self, session_identifier=None): """Retrieves the knowledge base as a source configuration artifacts. Args: session_identifier (Optional[str])): session identifier, where None represents the active session. Returns: list[SourceConfigurationArtifact]: source configuration artifacts. """ source_configuration = artifacts.SourceConfigurationArtifact() # TODO: set path_spec source_configuration.system_configuration = ( self._GetSystemConfigurationArtifact( session_identifier=session_identifier)) return [source_configuration]
def testProcessSources(self): """Tests the PreprocessSources and ProcessSources function.""" artifacts_path = shared_test_lib.GetTestFilePath(['artifacts']) self._SkipIfPathNotExists(artifacts_path) test_engine = extraction_engine.ExtractionMultiProcessEngine( maximum_number_of_tasks=100) test_file_path = self._GetTestFilePath(['ímynd.dd']) self._SkipIfPathNotExists(test_file_path) os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=os_path_spec) source_configuration = artifacts.SourceConfigurationArtifact( path_spec=source_path_spec) session = sessions.Session() configuration = configurations.ProcessingConfiguration() configuration.parser_filter_expression = 'filestat' configuration.task_storage_format = definitions.STORAGE_FORMAT_SQLITE with shared_test_lib.TempDirectory() as temp_directory: temp_file = os.path.join(temp_directory, 'storage.plaso') storage_writer = sqlite_writer.SQLiteStorageFileWriter() storage_writer.Open(path=temp_file) try: test_engine.PreprocessSources(artifacts_path, None, [source_path_spec], session, storage_writer) processing_status = test_engine.ProcessSources( [source_configuration], storage_writer, session.identifier, configuration, storage_file_path=temp_directory) number_of_events = storage_writer.GetNumberOfAttributeContainers( 'event') number_of_extraction_warnings = ( storage_writer.GetNumberOfAttributeContainers( 'extraction_warning')) number_of_recovery_warnings = ( storage_writer.GetNumberOfAttributeContainers( 'recovery_warning')) parsers_counter = collections.Counter({ parser_count.name: parser_count.number_of_events for parser_count in storage_writer.GetAttributeContainers( 'parser_count') }) finally: storage_writer.Close() self.assertFalse(processing_status.aborted) self.assertEqual(number_of_events, 15) self.assertEqual(number_of_extraction_warnings, 0) self.assertEqual(number_of_recovery_warnings, 0) expected_parsers_counter = collections.Counter({ 'filestat': 15, 'total': 15 }) self.assertEqual(parsers_counter, expected_parsers_counter)
def _ProcessSources(self, session, storage_writer): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer for a session storage. Returns: ProcessingStatus: processing status. Raises: BadConfigOption: if an invalid collection filter was specified. """ is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_extraction_engine.SingleProcessEngine() else: extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine, session, storage_writer) self._expanded_parser_filter_expression = ( self._GetExpandedParserFilterExpression( extraction_engine.knowledge_base)) enabled_parser_names = self._expanded_parser_filter_expression.split( ',') number_of_enabled_parsers = len(enabled_parser_names) force_parser = False if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE and not is_archive and number_of_enabled_parsers == 1): force_parser = True self._extract_winevt_resources = False elif ('winevt' not in enabled_parser_names and 'winevtx' not in enabled_parser_names): self._extract_winevt_resources = False elif (self._extract_winevt_resources and 'pe' not in enabled_parser_names): logger.warning( 'A Windows EventLog parser is enabled in combination with ' 'extraction of Windows EventLog resources, but the Portable ' 'Executable (PE) parser is disabled. Therefore Windows EventLog ' 'resources cannot be extracted.') self._extract_winevt_resources = False configuration = self._CreateExtractionProcessingConfiguration() try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) session_configuration = self._CreateExtractionSessionConfiguration( session, enabled_parser_names) storage_writer.AddAttributeContainer(session_configuration) source_configurations = [] for path_spec in self._source_path_specs: source_configuration = artifacts.SourceConfigurationArtifact( path_spec=path_spec) source_configurations.append(source_configuration) # TODO: improve to detect more than 1 system configurations. # TODO: improve to add volumes to system configuration. system_configuration = ( extraction_engine.knowledge_base.GetSystemConfigurationArtifact()) storage_writer.AddAttributeContainer(system_configuration) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, self._resolver_context, configuration, force_parser=force_parser, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, session.identifier, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback, storage_file_path=self._storage_file_path) return processing_status