Ejemplo n.º 1
0
    def testPenguinPipelineLocal(self, make_warmup):
        LocalDagRunner().run(
            penguin_pipeline_local_infraval._create_pipeline(
                pipeline_name=self._pipeline_name,
                data_root=self._data_root,
                module_file=self._module_file,
                accuracy_threshold=0.1,
                serving_model_dir=self._serving_model_dir,
                pipeline_root=self._pipeline_root,
                metadata_path=self._metadata_path,
                user_provided_schema_path=self._schema_path,
                beam_pipeline_args=[],
                make_warmup=make_warmup))

        self.assertTrue(fileio.exists(self._serving_model_dir))
        self.assertTrue(fileio.exists(self._metadata_path))
        expected_execution_count = 10  # 9 components + 1 resolver
        metadata_config = metadata.sqlite_metadata_connection_config(
            self._metadata_path)
        with metadata.Metadata(metadata_config) as m:
            artifact_count = len(m.store.get_artifacts())
            execution_count = len(m.store.get_executions())
            self.assertGreaterEqual(artifact_count, execution_count)
            self.assertEqual(expected_execution_count, execution_count)

        self._assertPipelineExecution()
        self._assertInfraValidatorPassed()

        # Runs pipeline the second time.
        LocalDagRunner().run(
            penguin_pipeline_local_infraval._create_pipeline(
                pipeline_name=self._pipeline_name,
                data_root=self._data_root,
                module_file=self._module_file,
                accuracy_threshold=0.1,
                serving_model_dir=self._serving_model_dir,
                pipeline_root=self._pipeline_root,
                metadata_path=self._metadata_path,
                user_provided_schema_path=self._schema_path,
                beam_pipeline_args=[],
                make_warmup=make_warmup))

        # All executions but Evaluator and Pusher are cached.
        with metadata.Metadata(metadata_config) as m:
            # Artifact count is increased by 3 caused by Evaluator and Pusher.
            self.assertLen(m.store.get_artifacts(), artifact_count + 3)
            artifact_count = len(m.store.get_artifacts())
            self.assertLen(m.store.get_executions(),
                           expected_execution_count * 2)

        # Runs pipeline the third time.
        LocalDagRunner().run(
            penguin_pipeline_local_infraval._create_pipeline(
                pipeline_name=self._pipeline_name,
                data_root=self._data_root,
                module_file=self._module_file,
                accuracy_threshold=0.1,
                serving_model_dir=self._serving_model_dir,
                pipeline_root=self._pipeline_root,
                metadata_path=self._metadata_path,
                user_provided_schema_path=self._schema_path,
                beam_pipeline_args=[],
                make_warmup=make_warmup))

        # Asserts cache execution.
        with metadata.Metadata(metadata_config) as m:
            # Artifact count is unchanged.
            self.assertLen(m.store.get_artifacts(), artifact_count)
            self.assertLen(m.store.get_executions(),
                           expected_execution_count * 3)
Ejemplo n.º 2
0
        components=[
            example_gen,
            statistics_gen,
            schema_gen,
            example_validator,
            transform,
            trainer,
            model_resolver,
            evaluator,
            infra_validator,
            pusher,
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
        beam_pipeline_args=beam_pipeline_args)


# To run this pipeline from the python CLI:
#   $python penguin_pipeline_local_infraval.py
if __name__ == '__main__':
    absl.logging.set_verbosity(absl.logging.INFO)
    LocalDagRunner().run(
        _create_pipeline(pipeline_name=_pipeline_name,
                         pipeline_root=_pipeline_root,
                         data_root=_data_root,
                         module_file=_module_file,
                         serving_model_dir=_serving_model_dir,
                         metadata_path=_metadata_path,
                         beam_pipeline_args=_beam_pipeline_args))
    def testPenguinPipelineSklearnLocal(self):
        LocalDagRunner().run(
            penguin_pipeline_sklearn_local._create_pipeline(
                pipeline_name=self._pipeline_name,
                pipeline_root=self._pipeline_root,
                data_root=self._data_root,
                trainer_module_file=self._trainer_module_file,
                evaluator_module_file=self._evaluator_module_file,
                serving_model_dir=self._serving_model_dir,
                metadata_path=self._metadata_path,
                beam_pipeline_args=[]))

        self.assertTrue(fileio.exists(self._serving_model_dir))
        self.assertTrue(fileio.exists(self._metadata_path))
        expected_execution_count = 8  # 7 components + 1 resolver
        metadata_config = metadata.sqlite_metadata_connection_config(
            self._metadata_path)
        with metadata.Metadata(metadata_config) as m:
            artifact_count = len(m.store.get_artifacts())
            execution_count = len(m.store.get_executions())
            self.assertGreaterEqual(artifact_count, execution_count)
            self.assertEqual(expected_execution_count, execution_count)

        self.assertPipelineExecution()

        # Runs pipeline the second time.
        LocalDagRunner().run(
            penguin_pipeline_sklearn_local._create_pipeline(
                pipeline_name=self._pipeline_name,
                pipeline_root=self._pipeline_root,
                data_root=self._data_root,
                trainer_module_file=self._trainer_module_file,
                evaluator_module_file=self._evaluator_module_file,
                serving_model_dir=self._serving_model_dir,
                metadata_path=self._metadata_path,
                beam_pipeline_args=[]))

        with metadata.Metadata(metadata_config) as m:
            # Artifact count is increased by 3 caused by Evaluator and Pusher.
            self.assertEqual(artifact_count + 3, len(m.store.get_artifacts()))
            artifact_count = len(m.store.get_artifacts())
            self.assertEqual(expected_execution_count * 2,
                             len(m.store.get_executions()))

        # Runs pipeline the third time.
        LocalDagRunner().run(
            penguin_pipeline_sklearn_local._create_pipeline(
                pipeline_name=self._pipeline_name,
                pipeline_root=self._pipeline_root,
                data_root=self._data_root,
                trainer_module_file=self._trainer_module_file,
                evaluator_module_file=self._evaluator_module_file,
                serving_model_dir=self._serving_model_dir,
                metadata_path=self._metadata_path,
                beam_pipeline_args=[]))

        # Asserts cache execution.
        with metadata.Metadata(metadata_config) as m:
            # Artifact count is unchanged.
            self.assertEqual(artifact_count, len(m.store.get_artifacts()))
            self.assertEqual(expected_execution_count * 3,
                             len(m.store.get_executions()))