def testProcessSources(self): """Tests the ProcessSources function.""" test_artifacts_path = self._GetTestFilePath(['artifacts']) self._SkipIfPathNotExists(test_artifacts_path) test_file_path = self._GetTestFilePath(['ímynd.dd']) self._SkipIfPathNotExists(test_file_path) test_engine = extraction_engine.SingleProcessEngine() resolver_context = context.Context() os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=os_path_spec) source_configuration = artifacts.SourceConfigurationArtifact( path_spec=source_path_spec) session = sessions.Session() configuration = configurations.ProcessingConfiguration() configuration.parser_filter_expression = 'filestat' storage_writer = fake_writer.FakeStorageWriter() storage_writer.Open() try: test_engine.PreprocessSources(test_artifacts_path, None, [source_path_spec], session, storage_writer) processing_status = test_engine.ProcessSources( [source_configuration], storage_writer, resolver_context, configuration) parsers_counter = collections.Counter({ parser_count.name: parser_count.number_of_events for parser_count in storage_writer.GetAttributeContainers( 'parser_count') }) finally: storage_writer.Close() self.assertFalse(processing_status.aborted) self.assertEqual(storage_writer.number_of_events, 15) self.assertEqual(storage_writer.number_of_extraction_warnings, 0) self.assertEqual(storage_writer.number_of_recovery_warnings, 0) expected_parsers_counter = collections.Counter({ 'filestat': 15, 'total': 15 }) self.assertEqual(parsers_counter, expected_parsers_counter)
def testProcessSources(self): """Tests the ProcessSources function.""" test_artifacts_path = self._GetTestFilePath(['artifacts']) self._SkipIfPathNotExists(test_artifacts_path) test_file_path = self._GetTestFilePath(['ímynd.dd']) self._SkipIfPathNotExists(test_file_path) registry = artifacts_registry.ArtifactDefinitionsRegistry() reader = artifacts_reader.YamlArtifactsReader() registry.ReadFromDirectory(reader, test_artifacts_path) test_engine = extraction_engine.SingleProcessEngine() resolver_context = context.Context() os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=os_path_spec) session = sessions.Session() configuration = configurations.ProcessingConfiguration() configuration.parser_filter_expression = 'filestat' storage_writer = fake_writer.FakeStorageWriter() storage_writer.Open() try: test_engine.PreprocessSources(registry, [source_path_spec], session, storage_writer) test_engine.ProcessSources(session, [source_path_spec], storage_writer, resolver_context, configuration) finally: storage_writer.Close() self.assertEqual(storage_writer.number_of_events, 15) self.assertEqual(storage_writer.number_of_extraction_warnings, 0) self.assertEqual(storage_writer.number_of_recovery_warnings, 0) expected_parsers_counter = collections.Counter({ 'filestat': 15, 'total': 15 }) self.assertEqual(session.parsers_counter, expected_parsers_counter)
def _ProcessSources(self, session, storage_writer): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer for a session storage. Returns: ProcessingStatus: processing status. Raises: BadConfigOption: if an invalid collection filter was specified. """ is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_extraction_engine.SingleProcessEngine() else: extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine, session, storage_writer) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) session.enabled_parser_names = ( configuration.parser_filter_expression.split(',')) session.parser_filter_expression = self._parser_filter_expression self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) # TODO: set mount path in knowledge base with # extraction_engine.knowledge_base.SetMountPath() extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) # TODO: decouple session and storage writer? session.source_configurations = ( extraction_engine.knowledge_base.GetSourceConfigurationArtifacts()) storage_writer.WriteSessionConfiguration(session) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) if single_process_mode: force_parser = False number_of_parsers = len( configuration.parser_filter_expression.split(',')) if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE and not is_archive and number_of_parsers == 1): force_parser = True logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, self._resolver_context, configuration, force_parser=force_parser, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback, storage_file_path=self._storage_file_path) return processing_status
def _ProcessSources(self, session, storage_writer): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer for a session storage. Returns: ProcessingStatus: processing status. Raises: BadConfigOption: if an invalid collection filter was specified. """ is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_extraction_engine.SingleProcessEngine() else: extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine, session, storage_writer) self._expanded_parser_filter_expression = ( self._GetExpandedParserFilterExpression( extraction_engine.knowledge_base)) enabled_parser_names = self._expanded_parser_filter_expression.split( ',') number_of_enabled_parsers = len(enabled_parser_names) force_parser = False if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE and not is_archive and number_of_enabled_parsers == 1): force_parser = True self._extract_winevt_resources = False elif ('winevt' not in enabled_parser_names and 'winevtx' not in enabled_parser_names): self._extract_winevt_resources = False elif (self._extract_winevt_resources and 'pe' not in enabled_parser_names): logger.warning( 'A Windows EventLog parser is enabled in combination with ' 'extraction of Windows EventLog resources, but the Portable ' 'Executable (PE) parser is disabled. Therefore Windows EventLog ' 'resources cannot be extracted.') self._extract_winevt_resources = False configuration = self._CreateExtractionProcessingConfiguration() try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) session_configuration = self._CreateExtractionSessionConfiguration( session, enabled_parser_names) storage_writer.AddAttributeContainer(session_configuration) source_configurations = [] for path_spec in self._source_path_specs: source_configuration = artifacts.SourceConfigurationArtifact( path_spec=path_spec) source_configurations.append(source_configuration) # TODO: improve to detect more than 1 system configurations. # TODO: improve to add volumes to system configuration. system_configuration = ( extraction_engine.knowledge_base.GetSystemConfigurationArtifact()) storage_writer.AddAttributeContainer(system_configuration) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, self._resolver_context, configuration, force_parser=force_parser, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, session.identifier, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback, storage_file_path=self._storage_file_path) return processing_status