def _CreateEngine(self, single_process_mode): """Creates an engine based on the front end settings. Args: single_process_mode (bool): True if the front-end should run in single process mode. Returns: BaseEngine: engine. """ if single_process_mode: engine = single_process.SingleProcessEngine( debug_output=self._debug_mode, enable_profiling=self._enable_profiling, profiling_directory=self._profiling_directory, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type) else: engine = multi_process_engine.TaskMultiProcessEngine( debug_output=self._debug_mode, enable_profiling=self._enable_profiling, profiling_directory=self._profiling_directory, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type, use_zeromq=self._use_zeromq) return engine
def testProcessSources(self): """Tests the ProcessSources function.""" test_artifacts_path = self._GetTestFilePath(['artifacts']) self._SkipIfPathNotExists(test_artifacts_path) test_file_path = self._GetTestFilePath(['ímynd.dd']) self._SkipIfPathNotExists(test_file_path) registry = artifacts_registry.ArtifactDefinitionsRegistry() reader = artifacts_reader.YamlArtifactsReader() registry.ReadFromDirectory(reader, test_artifacts_path) test_engine = single_process.SingleProcessEngine() resolver_context = context.Context() session = sessions.Session() os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=os_path_spec) test_engine.PreprocessSources(registry, [source_path_spec]) storage_writer = fake_writer.FakeStorageWriter(session) configuration = configurations.ProcessingConfiguration() configuration.parser_filter_expression = 'filestat' test_engine.ProcessSources([source_path_spec], storage_writer, resolver_context, configuration) self.assertEqual(storage_writer.number_of_events, 15)
def testProcessSources(self): """Tests the ProcessSources function.""" test_engine = single_process.SingleProcessEngine() resolver_context = context.Context() session = sessions.Session() source_path = self._GetTestFilePath([u'ímynd.dd']) os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', parent=os_path_spec) test_engine.PreprocessSources([source_path_spec]) storage_writer = fake_storage.FakeStorageWriter(session) configuration = configurations.ProcessingConfiguration() configuration.parser_filter_expression = u'filestat' test_engine.ProcessSources([source_path_spec], storage_writer, resolver_context, configuration) self.assertEqual(len(storage_writer.events), 15)
def _InitializeSingleProcessModeEngine(self): """Initializes the single process mode engine. Returns: The engine object (instance of Engine). """ engine = single_process.SingleProcessEngine(self._queue_size) engine.SetEnableDebugOutput(self._debug_mode) engine.SetEnableProfiling( self._enable_profiling, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type) engine.SetProcessArchiveFiles(self._process_archive_files) if self._filter_object: engine.SetFilterObject(self._filter_object) if self._mount_path: engine.SetMountPath(self._mount_path) if self._text_prepend: engine.SetTextPrepend(self._text_prepend) # TODO: add support to handle multiple partitions. engine.SetSource( self.GetSourcePathSpec(), resolver_context=self._resolver_context) return engine
def testCreateExtractionWorker(self): """Tests the CreateExtractionWorker function.""" test_engine = single_process.SingleProcessEngine( maximum_number_of_queued_items=100) test_extraction_worker = test_engine._CreateExtractionWorker(0) self.assertNotEqual(test_extraction_worker, None) self.assertIsInstance( test_extraction_worker, single_process.SingleProcessEventExtractionWorker)
def testCreateCollector(self): """Tests the CreateCollector function.""" resolver_context = context.Context() test_engine = single_process.SingleProcessEngine( maximum_number_of_queued_items=100) test_collector = test_engine._CreateCollector( filter_find_specs=None, include_directory_stat=False, resolver_context=resolver_context) self.assertNotEqual(test_collector, None) self.assertIsInstance( test_collector, single_process.SingleProcessCollector)
def _CreateEngine(self, single_process_mode): """Creates an engine based on the front end settings. Args: single_process_mode (bool): True if the front-end should run in single process mode. Returns: BaseEngine: engine. """ if single_process_mode: engine = single_process.SingleProcessEngine() else: engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) return engine
def testEngine(self): """Test the engine functionality.""" resolver_context = context.Context() test_engine = single_process.SingleProcessEngine( maximum_number_of_queued_items=25000) self.assertNotEqual(test_engine, None) source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd') os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', parent=os_path_spec) test_engine.SetSource(source_path_spec, resolver_context=resolver_context) self.assertFalse(test_engine.SourceIsDirectory()) self.assertFalse(test_engine.SourceIsFile()) self.assertTrue(test_engine.SourceIsStorageMediaImage()) test_searcher = test_engine.GetSourceFileSystemSearcher( resolver_context=resolver_context) self.assertNotEqual(test_searcher, None) self.assertIsInstance(test_searcher, file_system_searcher.FileSystemSearcher) test_engine.PreprocessSource('Windows') test_collector = test_engine.CreateCollector( False, vss_stores=None, filter_find_specs=None, resolver_context=resolver_context) self.assertNotEqual(test_collector, None) self.assertIsInstance(test_collector, single_process.SingleProcessCollector) test_extraction_worker = test_engine.CreateExtractionWorker(0) self.assertNotEqual(test_extraction_worker, None) self.assertIsInstance( test_extraction_worker, single_process.SingleProcessEventExtractionWorker)
def testProcessSources(self): """Tests the PreprocessSource and ProcessSources function.""" test_engine = single_process.SingleProcessEngine( maximum_number_of_queued_items=100) source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd') os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', parent=os_path_spec) test_engine.PreprocessSource([source_path_spec], u'Windows') parser_filter_string = u'filestat' storage_writer = test_lib.TestStorageWriter(test_engine.event_object_queue) test_engine.ProcessSources( [source_path_spec], storage_writer, parser_filter_string=parser_filter_string) self.assertEqual(len(storage_writer.event_objects), 15)
def testGetSourceFileSystem(self): """Tests the GetSourceFileSystem function.""" resolver_context = context.Context() test_engine = single_process.SingleProcessEngine( maximum_number_of_queued_items=100) source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd') os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', parent=os_path_spec) test_file_system, test_mount_point = test_engine.GetSourceFileSystem( source_path_spec, resolver_context=resolver_context) self.assertNotEqual(test_file_system, None) self.assertIsInstance(test_file_system, file_system.FileSystem) self.assertNotEqual(test_mount_point, None) self.assertIsInstance(test_mount_point, path_spec.PathSpec) test_file_system.Close()
def ExtractEventsFromSources(self): """Processes the sources and extract events. This is a stripped down copy of tools/log2timeline.py that doesn't support the full set of flags. The defaults for these are hard coded in the constructor of this class. Raises: SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path) scan_context = self.ScanSource() source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation(self._source_path, source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write(u'\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write(u'Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_zip_file.ZIPStorageFileWriter( session, self._storage_file_path) configuration = self._CreateProcessingConfiguration() single_process_mode = self._single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if (self._force_preprocessing or source_type in self._SOURCE_TYPES_TO_PREPROCESS): self._PreprocessSources(extraction_engine) if not configuration.parser_filter_expression: operating_system = extraction_engine.knowledge_base.GetValue( u'operating_system') operating_system_product = extraction_engine.knowledge_base.GetValue( u'operating_system_product') operating_system_version = extraction_engine.knowledge_base.GetValue( u'operating_system_version') parser_filter_expression = ( self._parsers_manager.GetPresetForOperatingSystem( operating_system, operating_system_product, operating_system_version)) if parser_filter_expression: logging.info( u'Parser filter expression changed to: {0:s}'.format( parser_filter_expression)) configuration.parser_filter_expression = parser_filter_expression session.enabled_parser_names = list( self._parsers_manager.GetParserAndPluginNames( parser_filter_expression=configuration. parser_filter_expression)) session.parser_filter_expression = configuration.parser_filter_expression if session.preferred_time_zone: try: extraction_engine.knowledge_base.SetTimeZone( session.preferred_time_zone) except ValueError: # pylint: disable=protected-access logging.warning( u'Unsupported time zone: {0:s}, defaulting to {1:s}'. format(session.preferred_time_zone, extraction_engine.knowledge_base._time_zone.zone)) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_find_specs = frontend_utils.BuildFindSpecsFromFile( configuration.filter_file, environment_variables=environment_variables) processing_status = None if single_process_mode: logging.debug(u'Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logging.debug(u'Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def ProcessSources( self, source_path_specs, source_type, enable_sigsegv_handler=False, filter_file=None, hasher_names_string=None, parser_filter_string=None, preferred_encoding=u'utf-8', single_process_mode=False, status_update_callback=None, storage_serializer_format=definitions.SERIALIZER_FORMAT_PROTOBUF, timezone=pytz.UTC): """Processes the sources. Args: source_path_specs: list of path specifications (instances of dfvfs.PathSpec) to process. source_type: the dfVFS source type definition. enable_sigsegv_handler: optional boolean value to indicate the SIGSEGV handler should be enabled. The default is False. filter_file: optional path to a file that contains find specifications. The default is None. hasher_names_string: optional comma separated string of names of hashers to enable. The default is None. parser_filter_string: optional parser filter string. The default is None. preferred_encoding: optional preferred encoding. The default is UTF-8. single_process_mode: optional boolean value to indicate if the front-end should run in single process mode. The default is False. status_update_callback: optional callback function for status updates. The default is None. storage_serializer_format: optional storage serializer format. The default is protobuf. timezone: optional preferred timezone. The default is UTC. Returns: The processing status (instance of ProcessingStatus) or None. Raises: SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ # If the source is a directory or a storage media image # run pre-processing. # TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_ # to definitions.SOURCE_TYPE_. if source_type in [ source_scanner.SourceScannerContext.SOURCE_TYPE_DIRECTORY, source_scanner.SourceScannerContext. SOURCE_TYPE_STORAGE_MEDIA_DEVICE, source_scanner. SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_IMAGE ]: self.SetEnablePreprocessing(True) else: self.SetEnablePreprocessing(False) self._CheckStorageFile(self._storage_file_path) self._single_process_mode = single_process_mode # TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_ # to definitions.SOURCE_TYPE_. if source_type == source_scanner.SourceScannerContext.SOURCE_TYPE_FILE: # No need to multi process a single file source. self._single_process_mode = True if self._single_process_mode: self._engine = single_process.SingleProcessEngine(self._queue_size) else: self._engine = multi_process.MultiProcessEngine( maximum_number_of_queued_items=self._queue_size) self._engine.SetEnableDebugOutput(self._debug_mode) self._engine.SetEnableProfiling( self._enable_profiling, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type) pre_obj = self._PreprocessSource(source_path_specs, source_type) self._operating_system = getattr(pre_obj, u'guessed_os', None) if not parser_filter_string: guessed_os = self._operating_system os_version = getattr(pre_obj, u'osversion', u'') parser_filter_string = self._GetParserFilterPreset( os_guess=guessed_os, os_version=os_version) if parser_filter_string: logging.info( u'Parser filter expression changed to: {0:s}'.format( parser_filter_string)) self._parser_names = [] for _, parser_class in parsers_manager.ParsersManager.GetParsers( parser_filter_string=parser_filter_string): self._parser_names.append(parser_class.NAME) if u'filestat' in self._parser_names: include_directory_stat = True else: include_directory_stat = False self._hasher_names = [] hasher_manager = hashers_manager.HashersManager for hasher_name in hasher_manager.GetHasherNamesFromString( hasher_names_string=hasher_names_string): self._hasher_names.append(hasher_name) self._PreprocessSetTimezone(pre_obj, timezone=timezone) if filter_file: filter_find_specs = engine_utils.BuildFindSpecsFromFile( filter_file, pre_obj=pre_obj) else: filter_find_specs = None self._PreprocessSetCollectionInformation( pre_obj, source_type, self._engine, filter_file=filter_file, parser_filter_string=parser_filter_string, preferred_encoding=preferred_encoding) if self._output_module: storage_writer = storage.BypassStorageWriter( self._engine.event_object_queue, self._storage_file_path, output_module_string=self._output_module, pre_obj=pre_obj) else: storage_writer = storage.FileStorageWriter( self._engine.event_object_queue, self._storage_file_path, buffer_size=self._buffer_size, pre_obj=pre_obj, serializer_format=storage_serializer_format) storage_writer.SetEnableProfiling( self._enable_profiling, profiling_type=self._profiling_type) processing_status = None try: if self._single_process_mode: logging.debug(u'Starting extraction in single process mode.') processing_status = self._engine.ProcessSources( source_path_specs, storage_writer, filter_find_specs=filter_find_specs, filter_object=self._filter_object, hasher_names_string=hasher_names_string, include_directory_stat=include_directory_stat, mount_path=self._mount_path, parser_filter_string=parser_filter_string, process_archive_files=self._process_archive_files, resolver_context=self._resolver_context, status_update_callback=status_update_callback, text_prepend=self._text_prepend) else: logging.debug(u'Starting extraction in multi process mode.') # TODO: pass number_of_extraction_workers. processing_status = self._engine.ProcessSources( source_path_specs, storage_writer, enable_sigsegv_handler=enable_sigsegv_handler, filter_find_specs=filter_find_specs, filter_object=self._filter_object, hasher_names_string=hasher_names_string, include_directory_stat=include_directory_stat, mount_path=self._mount_path, parser_filter_string=parser_filter_string, process_archive_files=self._process_archive_files, status_update_callback=status_update_callback, show_memory_usage=self._show_worker_memory_information, text_prepend=self._text_prepend) except KeyboardInterrupt: self._CleanUpAfterAbort() raise errors.UserAbort # TODO: check if this still works and if still needed. except Exception as exception: if not self._single_process_mode: raise # The tool should generally not be run in single process mode # for other reasons than to debug. Hence the general error # catching. logging.error( u'An uncaught exception occurred: {0:s}.\n{1:s}'.format( exception, traceback.format_exc())) if self._debug_mode: pdb.post_mortem() return processing_status
def setUp(self): """Makes preparations before running an individual test.""" self._test_engine = single_process.SingleProcessEngine( maximum_number_of_queued_items=100)
def ExtractEventsFromSources(self): """Processes the sources and extract events. This is a stripped down copy of tools/log2timeline.py that doesn't support the full set of flags. The defaults for these are hard coded in the constructor of this class. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported or an invalid collection filter was specified. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine() # If the source is a directory or a storage media image # run pre-processing. if source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported or an invalid collection filter was specified. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year, text_prepend=self._text_prepend) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a storage media image or device, or directory # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) session.enabled_parser_names = ( configuration.parser_filter_expression.split(',')) session.parser_filter_expression = self._parser_filter_expression self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) # TODO: set mount path in knowledge base with # extraction_engine.knowledge_base.SetMountPath() extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, self._resolver_context, configuration, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) if self._storage_format == definitions.STORAGE_FORMAT_SQLITE: storage_writer = storage_sqlite_file.SQLiteStorageFileWriter( session, self._storage_file_path) else: storage_writer = storage_zip_file.ZIPStorageFileWriter( session, self._storage_file_path) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration() if not configuration.parser_filter_expression: operating_system = extraction_engine.knowledge_base.GetValue( 'operating_system') operating_system_product = extraction_engine.knowledge_base.GetValue( 'operating_system_product') operating_system_version = extraction_engine.knowledge_base.GetValue( 'operating_system_version') parser_filter_expression = ( parsers_manager.ParsersManager.GetPresetForOperatingSystem( operating_system, operating_system_product, operating_system_version)) if parser_filter_expression: logging.info('Parser filter expression changed to: {0:s}'.format( parser_filter_expression)) configuration.parser_filter_expression = parser_filter_expression names_generator = parsers_manager.ParsersManager.GetParserAndPluginNames( parser_filter_expression=parser_filter_expression) session.enabled_parser_names = list(names_generator) session.parser_filter_expression = parser_filter_expression # Note session.preferred_time_zone will default to UTC but # self._preferred_time_zone is None when not set. if self._preferred_time_zone: try: extraction_engine.knowledge_base.SetTimeZone(self._preferred_time_zone) except ValueError: # pylint: disable=protected-access logging.warning( 'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( self._preferred_time_zone, extraction_engine.knowledge_base._time_zone.zone)) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_file_object = filter_file.FilterFile(configuration.filter_file) filter_find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) processing_status = None if single_process_mode: logging.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logging.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback, worker_memory_limit=self._worker_memory_limit) self._status_view.PrintExtractionSummary(processing_status)
def ProcessSources(self, source_path_specs, source_type, command_line_arguments=None, enable_sigsegv_handler=False, filter_file=None, hasher_names_string=None, number_of_extraction_workers=0, preferred_encoding=u'utf-8', parser_filter_expression=None, single_process_mode=False, status_update_callback=None, timezone=pytz.UTC): """Processes the sources. Args: source_path_specs: list of path specifications (instances of dfvfs.PathSpec) to process. source_type: the dfVFS source type definition. command_line_arguments: optional string of the command line arguments or None if not set. enable_sigsegv_handler: optional boolean value to indicate the SIGSEGV handler should be enabled. filter_file: optional path to a file that contains find specifications. hasher_names_string: optional comma separated string of names of hashers to enable. number_of_extraction_workers: the number of extraction workers to run. If 0, the number will be selected automatically. preferred_encoding: optional preferred encoding. parser_filter_expression: optional string containing the parser filter expression, where None represents all parsers and plugins. single_process_mode: optional boolean value to indicate if the front-end should run in single process mode. status_update_callback: optional callback function for status updates. timezone: optional preferred timezone. Returns: The processing status (instance of ProcessingStatus) or None. Raises: SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ # If the source is a directory or a storage media image # run pre-processing. if source_type in [ dfvfs_definitions.SOURCE_TYPE_DIRECTORY, dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_DEVICE, dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_IMAGE ]: self.SetEnablePreprocessing(True) else: self.SetEnablePreprocessing(False) self._CheckStorageFile(self._storage_file_path) self._single_process_mode = single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. self._single_process_mode = True if self._single_process_mode: self._engine = single_process.SingleProcessEngine(self._queue_size) else: self._engine = multi_process.MultiProcessEngine( maximum_number_of_queued_items=self._queue_size, use_zeromq=self._use_zeromq) self._engine.SetEnableDebugOutput(self._debug_mode) self._engine.SetEnableProfiling( self._enable_profiling, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type) pre_obj = self._PreprocessSources(source_path_specs, source_type) self._operating_system = getattr(pre_obj, u'guessed_os', None) if not parser_filter_expression: guessed_os = self._operating_system os_version = getattr(pre_obj, u'osversion', u'') parser_filter_expression = self._GetParserFilterPreset( os_guess=guessed_os, os_version=os_version) if parser_filter_expression: logging.info( u'Parser filter expression changed to: {0:s}'.format( parser_filter_expression)) self._parser_names = [] for _, parser_class in parsers_manager.ParsersManager.GetParsers( parser_filter_expression=parser_filter_expression): self._parser_names.append(parser_class.NAME) self._hasher_names = [] hasher_manager = hashers_manager.HashersManager for hasher_name in hasher_manager.GetHasherNamesFromString( hasher_names_string=hasher_names_string): self._hasher_names.append(hasher_name) self._PreprocessSetTimezone(pre_obj, timezone=timezone) if filter_file: filter_find_specs = engine_utils.BuildFindSpecsFromFile( filter_file, pre_obj=pre_obj) else: filter_find_specs = None # TODO: deprecate the need for this function. self._PreprocessSetCollectionInformation(pre_obj) session_start = self._CreateSessionStart( command_line_arguments=command_line_arguments, filter_file=filter_file, parser_filter_expression=parser_filter_expression, preferred_encoding=preferred_encoding) storage_writer = storage_zip_file.ZIPStorageFileWriter( self._storage_file_path, pre_obj, buffer_size=self._buffer_size) storage_writer.SetEnableProfiling(self._enable_profiling, profiling_type=self._profiling_type) storage_writer.Open() storage_writer.WriteSessionStart(session_start) processing_status = None try: if self._single_process_mode: logging.debug(u'Starting extraction in single process mode.') processing_status = self._engine.ProcessSources( source_path_specs, storage_writer, filter_find_specs=filter_find_specs, filter_object=self._filter_object, hasher_names_string=hasher_names_string, mount_path=self._mount_path, parser_filter_expression=parser_filter_expression, process_archive_files=self._process_archive_files, resolver_context=self._resolver_context, status_update_callback=status_update_callback, text_prepend=self._text_prepend) else: logging.debug(u'Starting extraction in multi process mode.') # TODO: pass number_of_extraction_workers. processing_status = self._engine.ProcessSources( source_path_specs, storage_writer, enable_sigsegv_handler=enable_sigsegv_handler, filter_find_specs=filter_find_specs, filter_object=self._filter_object, hasher_names_string=hasher_names_string, mount_path=self._mount_path, number_of_extraction_workers=number_of_extraction_workers, parser_filter_expression=parser_filter_expression, process_archive_files=self._process_archive_files, status_update_callback=status_update_callback, show_memory_usage=self._show_worker_memory_information, text_prepend=self._text_prepend) except KeyboardInterrupt: self._CleanUpAfterAbort() raise errors.UserAbort # TODO: check if this still works and if still needed. except Exception as exception: # pylint: disable=broad-except if not self._single_process_mode: raise # The tool should generally not be run in single process mode # for other reasons than to debug. Hence the general error # catching. logging.error( u'An uncaught exception occurred: {0:s}.\n{1:s}'.format( exception, traceback.format_exc())) if self._debug_mode: pdb.post_mortem() return processing_status
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format(self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_file_object = filter_file.FilterFile(configuration.filter_file) filter_find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback, worker_memory_limit=self._worker_memory_limit) self._status_view.PrintExtractionSummary(processing_status)
def _StartSingleThread(self, options): """Starts everything up in a single process. This should not normally be used, since running the tool in a single process buffers up everything into memory until the storage is called. Just to make it clear, this starts up the collection, completes that before calling the worker that extracts all EventObjects and stores them in memory. when that is all done, the storage function is called to drain the buffer. Hence the tool's excessive use of memory in this mode and the reason why it is not suggested to be used except for debugging reasons (and mostly to get into the debugger). This is therefore mostly useful during debugging sessions for some limited parsing. Args: options: the command line arguments (instance of argparse.Namespace). """ self._engine = single_process.SingleProcessEngine(self._queue_size) self._engine.SetEnableDebugOutput(self._debug_mode) self._engine.SetEnableProfiling( self._enable_profiling, profiling_sample_rate=self._profiling_sample_rate) self._engine.SetProcessArchiveFiles(self._process_archive_files) if self._filter_object: self._engine.SetFilterObject(self._filter_object) if self._mount_path: self._engine.SetMountPath(self._mount_path) if self._text_prepend: self._engine.SetTextPrepend(self._text_prepend) # TODO: add support to handle multiple partitions. self._engine.SetSource( self.GetSourcePathSpec(), resolver_context=self._resolver_context) logging.debug(u'Starting preprocessing.') pre_obj = self.PreprocessSource(options) logging.debug(u'Preprocessing done.') # TODO: make sure parsers option is not set by preprocessing. parser_filter_string = getattr(options, 'parsers', '') self._parser_names = [] for _, parser_class in parsers_manager.ParsersManager.GetParsers( parser_filter_string=parser_filter_string): self._parser_names.append(parser_class.NAME) self._PreprocessSetCollectionInformation(options, pre_obj) if 'filestat' in self._parser_names: include_directory_stat = True else: include_directory_stat = False filter_file = getattr(options, 'file_filter', None) if filter_file: filter_find_specs = engine_utils.BuildFindSpecsFromFile( filter_file, pre_obj=pre_obj) else: filter_find_specs = None self._collector = self._engine.CreateCollector( include_directory_stat, vss_stores=self._vss_stores, filter_find_specs=filter_find_specs, resolver_context=self._resolver_context) self._DebugPrintCollector(options) if self._output_module: storage_writer = storage.BypassStorageWriter( self._engine.storage_queue, self._storage_file_path, output_module_string=self._output_module, pre_obj=pre_obj) else: storage_writer = storage.StorageFileWriter( self._engine.storage_queue, self._storage_file_path, buffer_size=self._buffer_size, pre_obj=pre_obj, serializer_format=self._storage_serializer_format) hasher_names_string = getattr(options, u'hashers', u'') try: self._engine.ProcessSource( self._collector, storage_writer, parser_filter_string=parser_filter_string, hasher_names_string=hasher_names_string) except KeyboardInterrupt: self._CleanUpAfterAbort() raise errors.UserAbort(u'Process source aborted.') finally: self._resolver_context.Empty()