def testPenguinPipelineLocalWithTuner(self):
        LocalDagRunner().run(
            penguin_pipeline_local._create_pipeline(
                pipeline_name=self._pipeline_name,
                data_root=self._data_root,
                module_file=self._module_file,
                accuracy_threshold=0.1,
                serving_model_dir=self._serving_model_dir,
                pipeline_root=self._pipeline_root,
                metadata_path=self._metadata_path,
                enable_tuning=True,
                examplegen_input_config=None,
                examplegen_range_config=None,
                resolver_range_config=None,
                beam_pipeline_args=[]))

        self.assertTrue(fileio.exists(self._serving_model_dir))
        self.assertTrue(fileio.exists(self._metadata_path))
        expected_execution_count = 10  # 9 components + 1 resolver
        metadata_config = metadata.sqlite_metadata_connection_config(
            self._metadata_path)
        with metadata.Metadata(metadata_config) as m:
            artifact_count = len(m.store.get_artifacts())
            execution_count = len(m.store.get_executions())
            self.assertGreaterEqual(artifact_count, execution_count)
            self.assertEqual(expected_execution_count, execution_count)

        self.assertPipelineExecution(True)
Ejemplo n.º 2
0
  def testPenguinPipelineLocalWithTuner(self):
    # TODO(b/180723394): Parameterize this test when Flax supports tuning.
    module_file = self._module_file_name('keras')
    LocalDagRunner().run(
        penguin_pipeline_local._create_pipeline(
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=module_file,
            accuracy_threshold=0.1,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._metadata_path,
            user_provided_schema_path=None,
            enable_tuning=True,
            enable_bulk_inferrer=False,
            examplegen_input_config=None,
            examplegen_range_config=None,
            resolver_range_config=None,
            beam_pipeline_args=self._make_beam_pipeline_args()))

    self.assertTrue(fileio.exists(self._serving_model_dir))
    self.assertTrue(fileio.exists(self._metadata_path))
    expected_execution_count = 10  # 9 components + 1 resolver
    metadata_config = metadata.sqlite_metadata_connection_config(
        self._metadata_path)
    store = mlmd.MetadataStore(metadata_config)
    artifact_count = len(store.get_artifacts())
    execution_count = len(store.get_executions())
    self.assertGreaterEqual(artifact_count, execution_count)
    self.assertEqual(expected_execution_count, execution_count)

    self._assertPipelineExecution(has_tuner=True)
Ejemplo n.º 3
0
  def testPenguinPipelineLocalWithImporter(self, model_framework):
    module_file = self._module_file_name(model_framework)
    LocalDagRunner().run(
        penguin_pipeline_local._create_pipeline(
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=module_file,
            accuracy_threshold=0.1,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._metadata_path,
            user_provided_schema_path=self._schema_path,
            enable_tuning=False,
            enable_bulk_inferrer=False,
            examplegen_input_config=None,
            examplegen_range_config=None,
            resolver_range_config=None,
            beam_pipeline_args=[]))

    self.assertTrue(fileio.exists(self._serving_model_dir))
    self.assertTrue(fileio.exists(self._metadata_path))
    expected_execution_count = 9  # 7 components + 1 resolver + 1 importer
    metadata_config = metadata.sqlite_metadata_connection_config(
        self._metadata_path)
    store = mlmd.MetadataStore(metadata_config)
    artifact_count = len(store.get_artifacts())
    execution_count = len(store.get_executions())
    self.assertGreaterEqual(artifact_count, execution_count)
    self.assertEqual(expected_execution_count, execution_count)

    self._assertPipelineExecution(has_schema_gen=False)
Ejemplo n.º 4
0
    def testPenguinPipelineLocal(self, model_framework):
        if model_framework == 'tfdf_experimental':
            # Skip if TFDF is not available or incompatible.
            try:
                importlib.import_module('tensorflow_decision_forests')
            except (ImportError, tf.errors.NotFoundError):
                self.skipTest('TensorflowDecisionForests is not available')
        module_file = self._module_file_name(model_framework)
        pipeline = penguin_pipeline_local._create_pipeline(
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=module_file,
            accuracy_threshold=0.1,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._metadata_path,
            user_provided_schema_path=None,
            enable_tuning=False,
            enable_bulk_inferrer=False,
            examplegen_input_config=None,
            examplegen_range_config=None,
            resolver_range_config=None,
            beam_pipeline_args=self._make_beam_pipeline_args(),
            enable_transform_input_cache=False)

        logging.info('Starting the first pipeline run.')
        LocalDagRunner().run(pipeline)

        self.assertTrue(fileio.exists(self._serving_model_dir))
        self.assertTrue(fileio.exists(self._metadata_path))
        expected_execution_count = 8  # 7 components + 1 resolver
        metadata_config = metadata.sqlite_metadata_connection_config(
            self._metadata_path)
        store = mlmd.MetadataStore(metadata_config)
        artifact_count = len(store.get_artifacts())
        execution_count = len(store.get_executions())
        self.assertGreaterEqual(artifact_count, execution_count)
        self.assertEqual(expected_execution_count, execution_count)

        self._assertPipelineExecution()

        logging.info('Starting the second pipeline run. All components except '
                     'Evaluator and Pusher will use cached results.')
        LocalDagRunner().run(pipeline)

        # Artifact count is increased by 3 caused by Evaluator and Pusher.
        self.assertLen(store.get_artifacts(), artifact_count + 3)
        artifact_count = len(store.get_artifacts())
        self.assertLen(store.get_executions(), expected_execution_count * 2)

        logging.info('Starting the third pipeline run. '
                     'All components will use cached results.')
        LocalDagRunner().run(pipeline)

        # Asserts cache execution.
        # Artifact count is unchanged.
        self.assertLen(store.get_artifacts(), artifact_count)
        self.assertLen(store.get_executions(), expected_execution_count * 3)
    def testPenguinPipelineLocal(self, model_framework):
        module_file = self._module_file_name(model_framework)
        pipeline = penguin_pipeline_local._create_pipeline(
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=module_file,
            accuracy_threshold=0.1,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._metadata_path,
            enable_tuning=False,
            examplegen_input_config=None,
            examplegen_range_config=None,
            resolver_range_config=None,
            beam_pipeline_args=[])

        logging.info('Starting the first pipeline run.')
        LocalDagRunner().run(pipeline)

        self.assertTrue(fileio.exists(self._serving_model_dir))
        self.assertTrue(fileio.exists(self._metadata_path))
        expected_execution_count = 9  # 8 components + 1 resolver
        metadata_config = metadata.sqlite_metadata_connection_config(
            self._metadata_path)
        with metadata.Metadata(metadata_config) as m:
            artifact_count = len(m.store.get_artifacts())
            execution_count = len(m.store.get_executions())
            self.assertGreaterEqual(artifact_count, execution_count)
            self.assertEqual(expected_execution_count, execution_count)

        self._assertPipelineExecution(False)

        logging.info('Starting the second pipeline run. All components except '
                     'Evaluator and Pusher will use cached results.')
        LocalDagRunner().run(pipeline)

        with metadata.Metadata(metadata_config) as m:
            # Artifact count is increased by 3 caused by Evaluator and Pusher.
            self.assertLen(m.store.get_artifacts(), artifact_count + 3)
            artifact_count = len(m.store.get_artifacts())
            self.assertLen(m.store.get_executions(),
                           expected_execution_count * 2)

        logging.info('Starting the third pipeline run. '
                     'All components will use cached results.')
        LocalDagRunner().run(pipeline)

        # Asserts cache execution.
        with metadata.Metadata(metadata_config) as m:
            # Artifact count is unchanged.
            self.assertLen(m.store.get_artifacts(), artifact_count)
            self.assertLen(m.store.get_executions(),
                           expected_execution_count * 3)
Ejemplo n.º 6
0
    def testPenguinPipelineLocalConditionalWithoutPusher(self):
        module_file = self._module_file_name('keras')
        pipeline = penguin_pipeline_local._create_pipeline(
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=module_file,
            accuracy_threshold=
            1.0,  # Model evaluation will fail with 1.0 threshold
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._metadata_path,
            user_provided_schema_path=None,
            enable_tuning=False,
            enable_bulk_inferrer=False,
            examplegen_input_config=None,
            examplegen_range_config=None,
            resolver_range_config=None,
            beam_pipeline_args=self._make_beam_pipeline_args(),
            enable_transform_input_cache=False)

        logging.info('Starting the first pipeline run.')
        LocalDagRunner().run(pipeline)

        self.assertTrue(fileio.exists(self._metadata_path))
        expected_execution_count = 7  # Without pusher because evaluation fails
        metadata_config = metadata.sqlite_metadata_connection_config(
            self._metadata_path)
        store = mlmd.MetadataStore(metadata_config)
        artifact_count = len(store.get_artifacts())
        execution_count = len(store.get_executions())
        self.assertGreaterEqual(artifact_count, execution_count)
        self.assertEqual(expected_execution_count, execution_count)

        self._assertPipelineExecution(has_pusher=False)

        logging.info('Starting the second pipeline run. All components except '
                     'Evaluator will use cached results. Pusher will not run.')
        LocalDagRunner().run(pipeline)

        # Artifact count stays the same, because no new blessed model,
        # hence no new evaluation and no new pushed model.
        self.assertLen(store.get_artifacts(), artifact_count)
        self.assertLen(store.get_executions(), expected_execution_count * 2)

        logging.info('Starting the third pipeline run. '
                     'All components will use cached results.')
        LocalDagRunner().run(pipeline)

        # Asserts cache execution.
        # Artifact count is unchanged.
        self.assertLen(store.get_artifacts(), artifact_count)
        self.assertLen(store.get_executions(), expected_execution_count * 3)
 def run_pipeline(examplegen_range_config):
     LocalDagRunner().run(
         penguin_pipeline_local._create_pipeline(
             pipeline_name=self._pipeline_name,
             data_root=self._data_root_span,
             module_file=self._module_file,
             accuracy_threshold=0.1,
             serving_model_dir=self._serving_model_dir,
             pipeline_root=self._pipeline_root,
             metadata_path=self._metadata_path,
             enable_tuning=False,
             examplegen_input_config=examplegen_input_config,
             examplegen_range_config=examplegen_range_config,
             resolver_range_config=resolver_range_config,
             beam_pipeline_args=[]))
Ejemplo n.º 8
0
 def run_pipeline(examplegen_range_config):
     LocalDagRunner().run(
         penguin_pipeline_local._create_pipeline(
             pipeline_name=self._pipeline_name,
             data_root=self._data_root_span,
             module_file=module_file,
             accuracy_threshold=0.1,
             serving_model_dir=self._serving_model_dir,
             pipeline_root=self._pipeline_root,
             metadata_path=self._metadata_path,
             user_provided_schema_path=None,
             enable_tuning=False,
             enable_bulk_inferrer=False,
             examplegen_input_config=examplegen_input_config,
             examplegen_range_config=examplegen_range_config,
             resolver_range_config=resolver_range_config,
             beam_pipeline_args=self._make_beam_pipeline_args(),
             enable_transform_input_cache=True))
Ejemplo n.º 9
0
    def testPenguinPipelineLocalWithBulkInferrer(self, model_framework):
        if model_framework == 'tfdf_experimental':
            # Skip if TFDF is not available or incompatible.
            try:
                importlib.import_module('tensorflow_decision_forests')
            except (ImportError, tf.errors.NotFoundError):
                self.skipTest('TensorflowDecisionForests is not available')
        module_file = self._module_file_name(model_framework)
        LocalDagRunner().run(
            penguin_pipeline_local._create_pipeline(
                pipeline_name=self._pipeline_name,
                data_root=self._data_root,
                module_file=module_file,
                accuracy_threshold=0.1,
                serving_model_dir=self._serving_model_dir,
                pipeline_root=self._pipeline_root,
                metadata_path=self._metadata_path,
                user_provided_schema_path=None,
                enable_tuning=False,
                enable_bulk_inferrer=True,
                examplegen_input_config=None,
                examplegen_range_config=None,
                resolver_range_config=None,
                beam_pipeline_args=[],
                enable_transform_input_cache=False))

        self.assertTrue(fileio.exists(self._serving_model_dir))
        self.assertTrue(fileio.exists(self._metadata_path))
        expected_execution_count = 10  # 9 components + 1 resolver
        metadata_config = metadata.sqlite_metadata_connection_config(
            self._metadata_path)
        store = mlmd.MetadataStore(metadata_config)
        artifact_count = len(store.get_artifacts())
        execution_count = len(store.get_executions())
        self.assertGreaterEqual(artifact_count, execution_count)
        self.assertEqual(expected_execution_count, execution_count)

        self._assertPipelineExecution(has_bulk_inferrer=True)