def testPenguinPipelineLocal(self, make_warmup): LocalDagRunner().run( penguin_pipeline_local_infraval._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=self._schema_path, beam_pipeline_args=[], make_warmup=make_warmup)) self.assertTrue(fileio.exists(self._serving_model_dir)) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 10 # 9 components + 1 resolver metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) with metadata.Metadata(metadata_config) as m: artifact_count = len(m.store.get_artifacts()) execution_count = len(m.store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self._assertPipelineExecution() self._assertInfraValidatorPassed() # Runs pipeline the second time. LocalDagRunner().run( penguin_pipeline_local_infraval._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=self._schema_path, beam_pipeline_args=[], make_warmup=make_warmup)) # All executions but Evaluator and Pusher are cached. with metadata.Metadata(metadata_config) as m: # Artifact count is increased by 3 caused by Evaluator and Pusher. self.assertLen(m.store.get_artifacts(), artifact_count + 3) artifact_count = len(m.store.get_artifacts()) self.assertLen(m.store.get_executions(), expected_execution_count * 2) # Runs pipeline the third time. LocalDagRunner().run( penguin_pipeline_local_infraval._create_pipeline( pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, accuracy_threshold=0.1, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, user_provided_schema_path=self._schema_path, beam_pipeline_args=[], make_warmup=make_warmup)) # Asserts cache execution. with metadata.Metadata(metadata_config) as m: # Artifact count is unchanged. self.assertLen(m.store.get_artifacts(), artifact_count) self.assertLen(m.store.get_executions(), expected_execution_count * 3)
components=[ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, infra_validator, pusher, ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), beam_pipeline_args=beam_pipeline_args) # To run this pipeline from the python CLI: # $python penguin_pipeline_local_infraval.py if __name__ == '__main__': absl.logging.set_verbosity(absl.logging.INFO) LocalDagRunner().run( _create_pipeline(pipeline_name=_pipeline_name, pipeline_root=_pipeline_root, data_root=_data_root, module_file=_module_file, serving_model_dir=_serving_model_dir, metadata_path=_metadata_path, beam_pipeline_args=_beam_pipeline_args))
def testPenguinPipelineSklearnLocal(self): LocalDagRunner().run( penguin_pipeline_sklearn_local._create_pipeline( pipeline_name=self._pipeline_name, pipeline_root=self._pipeline_root, data_root=self._data_root, trainer_module_file=self._trainer_module_file, evaluator_module_file=self._evaluator_module_file, serving_model_dir=self._serving_model_dir, metadata_path=self._metadata_path, beam_pipeline_args=[])) self.assertTrue(fileio.exists(self._serving_model_dir)) self.assertTrue(fileio.exists(self._metadata_path)) expected_execution_count = 8 # 7 components + 1 resolver metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) with metadata.Metadata(metadata_config) as m: artifact_count = len(m.store.get_artifacts()) execution_count = len(m.store.get_executions()) self.assertGreaterEqual(artifact_count, execution_count) self.assertEqual(expected_execution_count, execution_count) self.assertPipelineExecution() # Runs pipeline the second time. LocalDagRunner().run( penguin_pipeline_sklearn_local._create_pipeline( pipeline_name=self._pipeline_name, pipeline_root=self._pipeline_root, data_root=self._data_root, trainer_module_file=self._trainer_module_file, evaluator_module_file=self._evaluator_module_file, serving_model_dir=self._serving_model_dir, metadata_path=self._metadata_path, beam_pipeline_args=[])) with metadata.Metadata(metadata_config) as m: # Artifact count is increased by 3 caused by Evaluator and Pusher. self.assertEqual(artifact_count + 3, len(m.store.get_artifacts())) artifact_count = len(m.store.get_artifacts()) self.assertEqual(expected_execution_count * 2, len(m.store.get_executions())) # Runs pipeline the third time. LocalDagRunner().run( penguin_pipeline_sklearn_local._create_pipeline( pipeline_name=self._pipeline_name, pipeline_root=self._pipeline_root, data_root=self._data_root, trainer_module_file=self._trainer_module_file, evaluator_module_file=self._evaluator_module_file, serving_model_dir=self._serving_model_dir, metadata_path=self._metadata_path, beam_pipeline_args=[])) # Asserts cache execution. with metadata.Metadata(metadata_config) as m: # Artifact count is unchanged. self.assertEqual(artifact_count, len(m.store.get_artifacts())) self.assertEqual(expected_execution_count * 3, len(m.store.get_executions()))