def testRecordLatestBeamPipeline(self, mock_get_latest_executions,
                                     mock_metadata, mock_config):
        # Tests recording Beam pipeline outputs for the latest execution.
        with mock.patch.object(pipeline_recorder_utils,
                               '_get_paths',
                               return_value=self.paths) as mock_get_paths:
            pipeline_recorder_utils.record_pipeline(
                output_dir=self._base_dir,
                metadata_db_uri=self.metadata_db_uri,
                host=None,
                port=None,
                pipeline_name=self.pipeline_name,
                run_id=None)

            mock_config.assert_called_with(self.metadata_db_uri)
            mock_metadata.assert_called()
            mock_get_paths.assert_called()
            mock_get_latest_executions.assert_called()

            # Verifying that test.txt has been copied from src_uri to dest_uri
            files = tf.io.gfile.listdir(self.dest_uri)
            self.assertLen(files, 1)
            self.assertEqual(
                io_utils.read_string_file(os.path.join(self.dest_uri,
                                                       files[0])),
                self.content)
Ejemplo n.º 2
0
    def testRecordBeamPipelineRunId(self, mock_metadata, mock_config):
        # Tests recording Beam pipeline outputs given a run_id.
        with mock.patch.object(pipeline_recorder_utils, '_get_execution_dict',
                               return_value=self.execution_dict
                               ) as mock_get_execution_dict,\
            mock.patch.object(pipeline_recorder_utils, '_get_paths',
                              return_value=self.paths
                              ) as mock_get_paths:
            pipeline_recorder_utils.record_pipeline(
                output_dir=self._base_dir,
                metadata_db_uri=self.metadata_db_uri,
                run_id=self.run_id)

            mock_config.assert_called_with(self.metadata_db_uri)
            mock_metadata.assert_called()
            mock_get_execution_dict.assert_called()
            mock_get_paths.assert_called()

            # Verifying that test.txt has been copied from src_uri to dest_uri
            files = fileio.listdir(self.dest_uri)
            self.assertLen(files, 1)
            self.assertEqual(
                io_utils.read_string_file(os.path.join(self.dest_uri,
                                                       files[0])),
                self.content)
Ejemplo n.º 3
0
    def setUp(self):
        super(TaxiPipelineRegressionEndToEndTest, self).setUp()
        self._test_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        self._pipeline_name = 'beam_stub_test'
        # This example assumes that the taxi data and taxi utility function are
        # stored in tfx/examples/chicago_taxi_pipeline. Feel free to customize this
        # as needed.
        taxi_root = os.path.dirname(taxi_pipeline_beam.__file__)
        self._data_root = os.path.join(taxi_root, 'data', 'simple')
        self._module_file = os.path.join(taxi_root, 'taxi_utils.py')
        self._serving_model_dir = os.path.join(self._test_dir, 'serving_model')
        self._pipeline_root = os.path.join(self._test_dir, 'tfx', 'pipelines',
                                           self._pipeline_name)
        # Metadata path for recording successful pipeline run.
        self._recorded_mlmd_path = os.path.join(self._test_dir, 'tfx',
                                                'record', 'metadata.db')
        # Metadata path for stub pipeline runs.
        self._metadata_path = os.path.join(self._test_dir, 'tfx', 'metadata',
                                           self._pipeline_name, 'metadata.db')
        self._recorded_output_dir = os.path.join(self._test_dir, 'testdata')

        # Runs the pipeline and record to self._recorded_output_dir
        record_taxi_pipeline = taxi_pipeline_beam._create_pipeline(  # pylint:disable=protected-access
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=self._module_file,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._recorded_mlmd_path,
            beam_pipeline_args=[])

        local_dag_runner.LocalDagRunner().run(record_taxi_pipeline)

        pipeline_recorder_utils.record_pipeline(
            output_dir=self._recorded_output_dir,
            metadata_db_uri=self._recorded_mlmd_path,
            pipeline_name=self._pipeline_name)

        self.taxi_pipeline = taxi_pipeline_beam._create_pipeline(  # pylint:disable=protected-access
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=self._module_file,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._metadata_path,
            beam_pipeline_args=[])
    def setUp(self):
        super().setUp()
        self._test_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        self._pipeline_name = 'imdb_stub_test'
        # This example assumes that the imdb data and imdb utility function are
        # stored in tfx/examples/imdb. Feel free to customize this as needed.
        imdb_root = os.path.dirname(imdb_pipeline_native_keras.__file__)
        self._data_root = os.path.join(imdb_root, 'data')
        self._module_file = os.path.join(imdb_root,
                                         'imdb_utils_native_keras.py')
        self._serving_model_dir = os.path.join(self._test_dir, 'serving_model')
        self._pipeline_root = os.path.join(self._test_dir, 'pipelines',
                                           self._pipeline_name)
        # Metadata path for recording successful pipeline run.
        self._recorded_mlmd_path = os.path.join(self._test_dir, 'record',
                                                'metadata.db')
        # Metadata path for stub pipeline
        self._metadata_path = os.path.join(self._test_dir, 'metadata',
                                           self._pipeline_name, 'metadata.db')
        self._recorded_output_dir = os.path.join(self._test_dir, 'testdata')

        record_imdb_pipeline = imdb_pipeline_native_keras._create_pipeline(  # pylint:disable=protected-access
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=self._module_file,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._recorded_mlmd_path,
            beam_pipeline_args=[])

        BeamDagRunner().run(record_imdb_pipeline)

        pipeline_recorder_utils.record_pipeline(
            output_dir=self._recorded_output_dir,
            metadata_db_uri=self._recorded_mlmd_path,
            pipeline_name=self._pipeline_name)

        # Run pipeline with stub executors.
        self.imdb_pipeline = imdb_pipeline_native_keras._create_pipeline(  # pylint:disable=protected-access
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=self._module_file,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._metadata_path,
            beam_pipeline_args=[])
Ejemplo n.º 5
0
  def testRecordLatestKfpPipeline(self, mock_get_latest_executions):
    # Tests recording KFP pipeline outputs for the latest execution.
    with mock.patch.object(
        pipeline_recorder_utils, '_get_paths',
        return_value=self.paths) as mock_get_paths:
      pipeline_recorder_utils.record_pipeline(
          output_dir=self._base_dir,
          host=self.host,
          port=self.port,
          pipeline_name=self.pipeline_name)
      mock_get_paths.assert_called()
      mock_get_latest_executions.assert_called()

      files = fileio.listdir(self.dest_uri)
      self.assertLen(files, 1)
      self.assertEqual(
          io_utils.read_string_file(os.path.join(self.dest_uri, files[0])),
          self.content)
Ejemplo n.º 6
0
    def testRecordKfpPipelineRunId(self):
        # Tests recording KFP pipeline outputs given a run_id.
        with mock.patch.object(pipeline_recorder_utils, '_get_execution_dict',
                               return_value=self.execution_dict
                               ) as mock_get_execution_dict,\
            mock.patch.object(pipeline_recorder_utils, '_get_paths',
                              return_value=self.paths) as mock_get_paths:
            pipeline_recorder_utils.record_pipeline(output_dir=self._base_dir,
                                                    host=self.host,
                                                    port=self.port,
                                                    run_id=self.run_id)

            mock_get_execution_dict.assert_called()
            mock_get_paths.assert_called()

            # Verifying that test.txt has been copied from src_uri to dest_uri
            files = tf.io.gfile.listdir(self.dest_uri)
            self.assertLen(files, 1)
            self.assertEqual(
                io_utils.read_string_file(os.path.join(self.dest_uri,
                                                       files[0])),
                self.content)
Ejemplo n.º 7
0
def main(unused_argv):
    pipeline_recorder_utils.record_pipeline(FLAGS.output_dir,
                                            FLAGS.metadata_db_uri, FLAGS.host,
                                            FLAGS.port, FLAGS.pipeline_name,
                                            FLAGS.run_id)
    def testTaxiPipelineBeam(self):
        # Runs the pipeline and record to self._recorded_output_dir
        record_taxi_pipeline = taxi_pipeline_beam._create_pipeline(  # pylint:disable=protected-access
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=self._module_file,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._recorded_mlmd_path,
            beam_pipeline_args=[])
        BeamDagRunner().run(record_taxi_pipeline)
        pipeline_recorder_utils.record_pipeline(
            output_dir=self._recorded_output_dir,
            metadata_db_uri=self._recorded_mlmd_path,
            host=None,
            port=None,
            pipeline_name=self._pipeline_name,
            run_id=None)

        # Run pipeline with stub executors.
        taxi_pipeline = taxi_pipeline_beam._create_pipeline(  # pylint:disable=protected-access
            pipeline_name=self._pipeline_name,
            data_root=self._data_root,
            module_file=self._module_file,
            serving_model_dir=self._serving_model_dir,
            pipeline_root=self._pipeline_root,
            metadata_path=self._metadata_path,
            beam_pipeline_args=[])

        model_resolver_id = 'ResolverNode.latest_blessed_model_resolver'
        stubbed_component_ids = [
            component.id for component in taxi_pipeline.components
            if component.id != model_resolver_id
        ]

        stub_launcher = stub_component_launcher.get_stub_launcher_class(
            test_data_dir=self._recorded_output_dir,
            stubbed_component_ids=stubbed_component_ids,
            stubbed_component_map={})
        stub_pipeline_config = pipeline_config.PipelineConfig(
            supported_launcher_classes=[
                stub_launcher,
            ])
        BeamDagRunner(config=stub_pipeline_config).run(taxi_pipeline)

        self.assertTrue(tf.io.gfile.exists(self._metadata_path))

        metadata_config = metadata.sqlite_metadata_connection_config(
            self._metadata_path)

        # Verify that recorded files are successfully copied to the output uris.
        with metadata.Metadata(metadata_config) as m:
            artifacts = m.store.get_artifacts()
            artifact_count = len(artifacts)
            executions = m.store.get_executions()
            execution_count = len(executions)
            # Artifact count is greater by 3 due to extra artifacts produced by
            # Evaluator(blessing and evaluation), Trainer(model and model_run) and
            # Transform(example, graph, cache) minus Resolver which doesn't generate
            # new artifact.
            self.assertEqual(artifact_count, execution_count + 3)
            self.assertLen(taxi_pipeline.components, execution_count)

            for execution in executions:
                component_id = execution.properties[
                    metadata._EXECUTION_TYPE_KEY_COMPONENT_ID].string_value  # pylint: disable=protected-access
                if component_id == 'ResolverNode.latest_blessed_model_resolver':
                    continue
                eid = [execution.id]
                events = m.store.get_events_by_execution_ids(eid)
                output_events = [
                    x for x in events
                    if x.type == metadata_store_pb2.Event.OUTPUT
                ]
                for event in output_events:
                    steps = event.path.steps
                    self.assertTrue(steps[0].HasField('key'))
                    name = steps[0].key
                    artifacts = m.store.get_artifacts_by_id(
                        [event.artifact_id])
                    for idx, artifact in enumerate(artifacts):
                        self.assertDirectoryEqual(
                            artifact.uri,
                            os.path.join(self._recorded_output_dir,
                                         component_id, name, str(idx)))