def testProcessSources(self): """Tests the PreprocessSources and ProcessSources function.""" test_engine = task_engine.TaskMultiProcessEngine( maximum_number_of_tasks=100) source_path = self._GetTestFilePath([u'ímynd.dd']) os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', parent=os_path_spec) test_engine.PreprocessSources([source_path_spec]) session = sessions.Session() with shared_test_lib.TempDirectory() as temp_directory: temp_file = os.path.join(temp_directory, u'storage.plaso') storage_writer = storage_zip_file.ZIPStorageFileWriter( session, temp_file) test_engine.ProcessSources(session.identifier, [source_path_spec], storage_writer, parser_filter_expression=u'filestat')
def testProcessSources(self): """Tests the PreprocessSources and ProcessSources function.""" registry = artifacts_registry.ArtifactDefinitionsRegistry() reader = artifacts_reader.YamlArtifactsReader() path = shared_test_lib.GetTestFilePath(['artifacts']) registry.ReadFromDirectory(reader, path) test_engine = task_engine.TaskMultiProcessEngine( maximum_number_of_tasks=100) source_path = self._GetTestFilePath(['ímynd.dd']) os_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=os_path_spec) test_engine.PreprocessSources(registry, [source_path_spec]) session = sessions.Session() configuration = configurations.ProcessingConfiguration() configuration.parser_filter_expression = 'filestat' with shared_test_lib.TempDirectory() as temp_directory: temp_file = os.path.join(temp_directory, 'storage.plaso') storage_writer = storage_zip_file.ZIPStorageFileWriter( session, temp_file) test_engine.ProcessSources(session.identifier, [source_path_spec], storage_writer, configuration)
def _CreateEngine(self, single_process_mode): """Creates an engine based on the front end settings. Args: single_process_mode (bool): True if the front-end should run in single process mode. Returns: BaseEngine: engine. """ if single_process_mode: engine = single_process.SingleProcessEngine( debug_output=self._debug_mode, enable_profiling=self._enable_profiling, profiling_directory=self._profiling_directory, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type) else: engine = multi_process_engine.TaskMultiProcessEngine( debug_output=self._debug_mode, enable_profiling=self._enable_profiling, profiling_directory=self._profiling_directory, profiling_sample_rate=self._profiling_sample_rate, profiling_type=self._profiling_type, use_zeromq=self._use_zeromq) return engine
def _CreateEngine(self, single_process_mode): """Creates an engine based on the front end settings. Args: single_process_mode (bool): True if the front-end should run in single process mode. Returns: BaseEngine: engine. """ if single_process_mode: engine = single_process.SingleProcessEngine() else: engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) return engine
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format(self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_file_object = filter_file.FilterFile(configuration.filter_file) filter_find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback, worker_memory_limit=self._worker_memory_limit) self._status_view.PrintExtractionSummary(processing_status)
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported or an invalid collection filter was specified. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year, text_prepend=self._text_prepend) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a storage media image or device, or directory # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) session.enabled_parser_names = ( configuration.parser_filter_expression.split(',')) session.parser_filter_expression = self._parser_filter_expression self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) # TODO: set mount path in knowledge base with # extraction_engine.knowledge_base.SetMountPath() extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, self._resolver_context, configuration, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def ExtractEventsFromSources(self): """Processes the sources and extract events. This is a stripped down copy of tools/log2timeline.py that doesn't support the full set of flags. The defaults for these are hard coded in the constructor of this class. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported or an invalid collection filter was specified. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine() # If the source is a directory or a storage media image # run pre-processing. if source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def ExtractEventsFromSources(self): """Processes the sources and extract events. This is a stripped down copy of tools/log2timeline.py that doesn't support the full set of flags. The defaults for these are hard coded in the constructor of this class. Raises: SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path) scan_context = self.ScanSource() source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation(self._source_path, source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write(u'\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write(u'Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_zip_file.ZIPStorageFileWriter( session, self._storage_file_path) configuration = self._CreateProcessingConfiguration() single_process_mode = self._single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if (self._force_preprocessing or source_type in self._SOURCE_TYPES_TO_PREPROCESS): self._PreprocessSources(extraction_engine) if not configuration.parser_filter_expression: operating_system = extraction_engine.knowledge_base.GetValue( u'operating_system') operating_system_product = extraction_engine.knowledge_base.GetValue( u'operating_system_product') operating_system_version = extraction_engine.knowledge_base.GetValue( u'operating_system_version') parser_filter_expression = ( self._parsers_manager.GetPresetForOperatingSystem( operating_system, operating_system_product, operating_system_version)) if parser_filter_expression: logging.info( u'Parser filter expression changed to: {0:s}'.format( parser_filter_expression)) configuration.parser_filter_expression = parser_filter_expression session.enabled_parser_names = list( self._parsers_manager.GetParserAndPluginNames( parser_filter_expression=configuration. parser_filter_expression)) session.parser_filter_expression = configuration.parser_filter_expression if session.preferred_time_zone: try: extraction_engine.knowledge_base.SetTimeZone( session.preferred_time_zone) except ValueError: # pylint: disable=protected-access logging.warning( u'Unsupported time zone: {0:s}, defaulting to {1:s}'. format(session.preferred_time_zone, extraction_engine.knowledge_base._time_zone.zone)) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_find_specs = frontend_utils.BuildFindSpecsFromFile( configuration.filter_file, environment_variables=environment_variables) processing_status = None if single_process_mode: logging.debug(u'Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logging.debug(u'Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) if self._storage_format == definitions.STORAGE_FORMAT_SQLITE: storage_writer = storage_sqlite_file.SQLiteStorageFileWriter( session, self._storage_file_path) else: storage_writer = storage_zip_file.ZIPStorageFileWriter( session, self._storage_file_path) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration() if not configuration.parser_filter_expression: operating_system = extraction_engine.knowledge_base.GetValue( 'operating_system') operating_system_product = extraction_engine.knowledge_base.GetValue( 'operating_system_product') operating_system_version = extraction_engine.knowledge_base.GetValue( 'operating_system_version') parser_filter_expression = ( parsers_manager.ParsersManager.GetPresetForOperatingSystem( operating_system, operating_system_product, operating_system_version)) if parser_filter_expression: logging.info('Parser filter expression changed to: {0:s}'.format( parser_filter_expression)) configuration.parser_filter_expression = parser_filter_expression names_generator = parsers_manager.ParsersManager.GetParserAndPluginNames( parser_filter_expression=parser_filter_expression) session.enabled_parser_names = list(names_generator) session.parser_filter_expression = parser_filter_expression # Note session.preferred_time_zone will default to UTC but # self._preferred_time_zone is None when not set. if self._preferred_time_zone: try: extraction_engine.knowledge_base.SetTimeZone(self._preferred_time_zone) except ValueError: # pylint: disable=protected-access logging.warning( 'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( self._preferred_time_zone, extraction_engine.knowledge_base._time_zone.zone)) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_file_object = filter_file.FilterFile(configuration.filter_file) filter_find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) processing_status = None if single_process_mode: logging.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logging.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback, worker_memory_limit=self._worker_memory_limit) self._status_view.PrintExtractionSummary(processing_status)