def testPenguinPipelineLocalWithTuner(self): LocalDagRunner().run( penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, enable_tuning=True, examplegen_input_config=None, examplegen_range_config=None, resolver_range_config=None, beam_pipeline_args=[])) self.assertTrue(fileio.exists(self._serving_model_dir)) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 10 # 9 components + 1 resolver metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) with metadata.Metadata(metadata_config) as m: artifact_count = len(m.store.get_artifacts()) execution_count = len(m.store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self.assertPipelineExecution(True)
def testPenguinPipelineLocalWithTuner(self): # TODO(b/180723394): Parameterize this test when Flax supports tuning. module_file = self._module_file_name('keras') LocalDagRunner().run( penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=None, enable_tuning=True, enable_bulk_inferrer=False, examplegen_input_config=None, examplegen_range_config=None, resolver_range_config=None, beam_pipeline_args=self._make_beam_pipeline_args())) self.assertTrue(fileio.exists(self._serving_model_dir)) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 10 # 9 components + 1 resolver metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) store = mlmd.MetadataStore(metadata_config) artifact_count = len(store.get_artifacts()) execution_count = len(store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self._assertPipelineExecution(has_tuner=True)
def testPenguinPipelineLocalWithImporter(self, model_framework): module_file = self._module_file_name(model_framework) LocalDagRunner().run( penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=self._schema_path, enable_tuning=False, enable_bulk_inferrer=False, examplegen_input_config=None, examplegen_range_config=None, resolver_range_config=None, beam_pipeline_args=[])) self.assertTrue(fileio.exists(self._serving_model_dir)) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 9 # 7 components + 1 resolver + 1 importer metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) store = mlmd.MetadataStore(metadata_config) artifact_count = len(store.get_artifacts()) execution_count = len(store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self._assertPipelineExecution(has_schema_gen=False)
def testPenguinPipelineLocal(self, model_framework): if model_framework == 'tfdf_experimental': # Skip if TFDF is not available or incompatible. try: importlib.import_module('tensorflow_decision_forests') except (ImportError, tf.errors.NotFoundError): self.skipTest('TensorflowDecisionForests is not available') module_file = self._module_file_name(model_framework) pipeline = penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=None, enable_tuning=False, enable_bulk_inferrer=False, examplegen_input_config=None, examplegen_range_config=None, resolver_range_config=None, beam_pipeline_args=self._make_beam_pipeline_args(), enable_transform_input_cache=False) logging.info('Starting the first pipeline run.') LocalDagRunner().run(pipeline) self.assertTrue(fileio.exists(self._serving_model_dir)) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 8 # 7 components + 1 resolver metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) store = mlmd.MetadataStore(metadata_config) artifact_count = len(store.get_artifacts()) execution_count = len(store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self._assertPipelineExecution() logging.info('Starting the second pipeline run. All components except ' 'Evaluator and Pusher will use cached results.') LocalDagRunner().run(pipeline) # Artifact count is increased by 3 caused by Evaluator and Pusher. self.assertLen(store.get_artifacts(), artifact_count + 3) artifact_count = len(store.get_artifacts()) self.assertLen(store.get_executions(), expected_execution_count * 2) logging.info('Starting the third pipeline run. ' 'All components will use cached results.') LocalDagRunner().run(pipeline) # Asserts cache execution. # Artifact count is unchanged. self.assertLen(store.get_artifacts(), artifact_count) self.assertLen(store.get_executions(), expected_execution_count * 3)
def testPenguinPipelineLocal(self, model_framework): module_file = self._module_file_name(model_framework) pipeline = penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, enable_tuning=False, examplegen_input_config=None, examplegen_range_config=None, resolver_range_config=None, beam_pipeline_args=[]) logging.info('Starting the first pipeline run.') LocalDagRunner().run(pipeline) self.assertTrue(fileio.exists(self._serving_model_dir)) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 9 # 8 components + 1 resolver metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) with metadata.Metadata(metadata_config) as m: artifact_count = len(m.store.get_artifacts()) execution_count = len(m.store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self._assertPipelineExecution(False) logging.info('Starting the second pipeline run. All components except ' 'Evaluator and Pusher will use cached results.') LocalDagRunner().run(pipeline) with metadata.Metadata(metadata_config) as m: # Artifact count is increased by 3 caused by Evaluator and Pusher. self.assertLen(m.store.get_artifacts(), artifact_count + 3) artifact_count = len(m.store.get_artifacts()) self.assertLen(m.store.get_executions(), expected_execution_count * 2) logging.info('Starting the third pipeline run. ' 'All components will use cached results.') LocalDagRunner().run(pipeline) # Asserts cache execution. with metadata.Metadata(metadata_config) as m: # Artifact count is unchanged. self.assertLen(m.store.get_artifacts(), artifact_count) self.assertLen(m.store.get_executions(), expected_execution_count * 3)
def testPenguinPipelineLocalConditionalWithoutPusher(self): module_file = self._module_file_name('keras') pipeline = penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=module_file, accuracy_threshold= 1.0, # Model evaluation will fail with 1.0 threshold serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=None, enable_tuning=False, enable_bulk_inferrer=False, examplegen_input_config=None, examplegen_range_config=None, resolver_range_config=None, beam_pipeline_args=self._make_beam_pipeline_args(), enable_transform_input_cache=False) logging.info('Starting the first pipeline run.') LocalDagRunner().run(pipeline) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 7 # Without pusher because evaluation fails metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) store = mlmd.MetadataStore(metadata_config) artifact_count = len(store.get_artifacts()) execution_count = len(store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self._assertPipelineExecution(has_pusher=False) logging.info('Starting the second pipeline run. All components except ' 'Evaluator will use cached results. Pusher will not run.') LocalDagRunner().run(pipeline) # Artifact count stays the same, because no new blessed model, # hence no new evaluation and no new pushed model. self.assertLen(store.get_artifacts(), artifact_count) self.assertLen(store.get_executions(), expected_execution_count * 2) logging.info('Starting the third pipeline run. ' 'All components will use cached results.') LocalDagRunner().run(pipeline) # Asserts cache execution. # Artifact count is unchanged. self.assertLen(store.get_artifacts(), artifact_count) self.assertLen(store.get_executions(), expected_execution_count * 3)
def run_pipeline(examplegen_range_config): LocalDagRunner().run( penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root_span, module_file=self._module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, enable_tuning=False, examplegen_input_config=examplegen_input_config, examplegen_range_config=examplegen_range_config, resolver_range_config=resolver_range_config, beam_pipeline_args=[]))
def run_pipeline(examplegen_range_config): LocalDagRunner().run( penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root_span, module_file=module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=None, enable_tuning=False, enable_bulk_inferrer=False, examplegen_input_config=examplegen_input_config, examplegen_range_config=examplegen_range_config, resolver_range_config=resolver_range_config, beam_pipeline_args=self._make_beam_pipeline_args(), enable_transform_input_cache=True))
def testPenguinPipelineLocalWithBulkInferrer(self, model_framework): if model_framework == 'tfdf_experimental': # Skip if TFDF is not available or incompatible. try: importlib.import_module('tensorflow_decision_forests') except (ImportError, tf.errors.NotFoundError): self.skipTest('TensorflowDecisionForests is not available') module_file = self._module_file_name(model_framework) LocalDagRunner().run( penguin_pipeline_local._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=None, enable_tuning=False, enable_bulk_inferrer=True, examplegen_input_config=None, examplegen_range_config=None, resolver_range_config=None, beam_pipeline_args=[], enable_transform_input_cache=False)) self.assertTrue(fileio.exists(self._serving_model_dir)) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 10 # 9 components + 1 resolver metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) store = mlmd.MetadataStore(metadata_config) artifact_count = len(store.get_artifacts()) execution_count = len(store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self._assertPipelineExecution(has_bulk_inferrer=True)