def testRecordLatestBeamPipeline(self, mock_get_latest_executions, mock_metadata, mock_config): # Tests recording Beam pipeline outputs for the latest execution. with mock.patch.object(pipeline_recorder_utils, '_get_paths', return_value=self.paths) as mock_get_paths: pipeline_recorder_utils.record_pipeline( output_dir=self._base_dir, metadata_db_uri=self.metadata_db_uri, host=None, port=None, pipeline_name=self.pipeline_name, run_id=None) mock_config.assert_called_with(self.metadata_db_uri) mock_metadata.assert_called() mock_get_paths.assert_called() mock_get_latest_executions.assert_called() # Verifying that test.txt has been copied from src_uri to dest_uri files = tf.io.gfile.listdir(self.dest_uri) self.assertLen(files, 1) self.assertEqual( io_utils.read_string_file(os.path.join(self.dest_uri, files[0])), self.content)
def testRecordBeamPipelineRunId(self, mock_metadata, mock_config): # Tests recording Beam pipeline outputs given a run_id. with mock.patch.object(pipeline_recorder_utils, '_get_execution_dict', return_value=self.execution_dict ) as mock_get_execution_dict,\ mock.patch.object(pipeline_recorder_utils, '_get_paths', return_value=self.paths ) as mock_get_paths: pipeline_recorder_utils.record_pipeline( output_dir=self._base_dir, metadata_db_uri=self.metadata_db_uri, run_id=self.run_id) mock_config.assert_called_with(self.metadata_db_uri) mock_metadata.assert_called() mock_get_execution_dict.assert_called() mock_get_paths.assert_called() # Verifying that test.txt has been copied from src_uri to dest_uri files = fileio.listdir(self.dest_uri) self.assertLen(files, 1) self.assertEqual( io_utils.read_string_file(os.path.join(self.dest_uri, files[0])), self.content)
def setUp(self): super(TaxiPipelineRegressionEndToEndTest, self).setUp() self._test_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self._pipeline_name = 'beam_stub_test' # This example assumes that the taxi data and taxi utility function are # stored in tfx/examples/chicago_taxi_pipeline. Feel free to customize this # as needed. taxi_root = os.path.dirname(taxi_pipeline_beam.__file__) self._data_root = os.path.join(taxi_root, 'data', 'simple') self._module_file = os.path.join(taxi_root, 'taxi_utils.py') self._serving_model_dir = os.path.join(self._test_dir, 'serving_model') self._pipeline_root = os.path.join(self._test_dir, 'tfx', 'pipelines', self._pipeline_name) # Metadata path for recording successful pipeline run. self._recorded_mlmd_path = os.path.join(self._test_dir, 'tfx', 'record', 'metadata.db') # Metadata path for stub pipeline runs. self._metadata_path = os.path.join(self._test_dir, 'tfx', 'metadata', self._pipeline_name, 'metadata.db') self._recorded_output_dir = os.path.join(self._test_dir, 'testdata') # Runs the pipeline and record to self._recorded_output_dir record_taxi_pipeline = taxi_pipeline_beam._create_pipeline( # pylint:disable=protected-access pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._recorded_mlmd_path, beam_pipeline_args=[]) local_dag_runner.LocalDagRunner().run(record_taxi_pipeline) pipeline_recorder_utils.record_pipeline( output_dir=self._recorded_output_dir, metadata_db_uri=self._recorded_mlmd_path, pipeline_name=self._pipeline_name) self.taxi_pipeline = taxi_pipeline_beam._create_pipeline( # pylint:disable=protected-access pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, beam_pipeline_args=[])
def setUp(self): super().setUp() self._test_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self._pipeline_name = 'imdb_stub_test' # This example assumes that the imdb data and imdb utility function are # stored in tfx/examples/imdb. Feel free to customize this as needed. imdb_root = os.path.dirname(imdb_pipeline_native_keras.__file__) self._data_root = os.path.join(imdb_root, 'data') self._module_file = os.path.join(imdb_root, 'imdb_utils_native_keras.py') self._serving_model_dir = os.path.join(self._test_dir, 'serving_model') self._pipeline_root = os.path.join(self._test_dir, 'pipelines', self._pipeline_name) # Metadata path for recording successful pipeline run. self._recorded_mlmd_path = os.path.join(self._test_dir, 'record', 'metadata.db') # Metadata path for stub pipeline self._metadata_path = os.path.join(self._test_dir, 'metadata', self._pipeline_name, 'metadata.db') self._recorded_output_dir = os.path.join(self._test_dir, 'testdata') record_imdb_pipeline = imdb_pipeline_native_keras._create_pipeline( # pylint:disable=protected-access pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._recorded_mlmd_path, beam_pipeline_args=[]) BeamDagRunner().run(record_imdb_pipeline) pipeline_recorder_utils.record_pipeline( output_dir=self._recorded_output_dir, metadata_db_uri=self._recorded_mlmd_path, pipeline_name=self._pipeline_name) # Run pipeline with stub executors. self.imdb_pipeline = imdb_pipeline_native_keras._create_pipeline( # pylint:disable=protected-access pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, beam_pipeline_args=[])
def testRecordLatestKfpPipeline(self, mock_get_latest_executions): # Tests recording KFP pipeline outputs for the latest execution. with mock.patch.object( pipeline_recorder_utils, '_get_paths', return_value=self.paths) as mock_get_paths: pipeline_recorder_utils.record_pipeline( output_dir=self._base_dir, host=self.host, port=self.port, pipeline_name=self.pipeline_name) mock_get_paths.assert_called() mock_get_latest_executions.assert_called() files = fileio.listdir(self.dest_uri) self.assertLen(files, 1) self.assertEqual( io_utils.read_string_file(os.path.join(self.dest_uri, files[0])), self.content)
def testRecordKfpPipelineRunId(self): # Tests recording KFP pipeline outputs given a run_id. with mock.patch.object(pipeline_recorder_utils, '_get_execution_dict', return_value=self.execution_dict ) as mock_get_execution_dict,\ mock.patch.object(pipeline_recorder_utils, '_get_paths', return_value=self.paths) as mock_get_paths: pipeline_recorder_utils.record_pipeline(output_dir=self._base_dir, host=self.host, port=self.port, run_id=self.run_id) mock_get_execution_dict.assert_called() mock_get_paths.assert_called() # Verifying that test.txt has been copied from src_uri to dest_uri files = tf.io.gfile.listdir(self.dest_uri) self.assertLen(files, 1) self.assertEqual( io_utils.read_string_file(os.path.join(self.dest_uri, files[0])), self.content)
def main(unused_argv): pipeline_recorder_utils.record_pipeline(FLAGS.output_dir, FLAGS.metadata_db_uri, FLAGS.host, FLAGS.port, FLAGS.pipeline_name, FLAGS.run_id)
def testTaxiPipelineBeam(self): # Runs the pipeline and record to self._recorded_output_dir record_taxi_pipeline = taxi_pipeline_beam._create_pipeline( # pylint:disable=protected-access pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._recorded_mlmd_path, beam_pipeline_args=[]) BeamDagRunner().run(record_taxi_pipeline) pipeline_recorder_utils.record_pipeline( output_dir=self._recorded_output_dir, metadata_db_uri=self._recorded_mlmd_path, host=None, port=None, pipeline_name=self._pipeline_name, run_id=None) # Run pipeline with stub executors. taxi_pipeline = taxi_pipeline_beam._create_pipeline( # pylint:disable=protected-access pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, beam_pipeline_args=[]) model_resolver_id = 'ResolverNode.latest_blessed_model_resolver' stubbed_component_ids = [ component.id for component in taxi_pipeline.components if component.id != model_resolver_id ] stub_launcher = stub_component_launcher.get_stub_launcher_class( test_data_dir=self._recorded_output_dir, stubbed_component_ids=stubbed_component_ids, stubbed_component_map={}) stub_pipeline_config = pipeline_config.PipelineConfig( supported_launcher_classes=[ stub_launcher, ]) BeamDagRunner(config=stub_pipeline_config).run(taxi_pipeline) self.assertTrue(tf.io.gfile.exists(self._metadata_path)) metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) # Verify that recorded files are successfully copied to the output uris. with metadata.Metadata(metadata_config) as m: artifacts = m.store.get_artifacts() artifact_count = len(artifacts) executions = m.store.get_executions() execution_count = len(executions) # Artifact count is greater by 3 due to extra artifacts produced by # Evaluator(blessing and evaluation), Trainer(model and model_run) and # Transform(example, graph, cache) minus Resolver which doesn't generate # new artifact. self.assertEqual(artifact_count, execution_count + 3) self.assertLen(taxi_pipeline.components, execution_count) for execution in executions: component_id = execution.properties[ metadata._EXECUTION_TYPE_KEY_COMPONENT_ID].string_value # pylint: disable=protected-access if component_id == 'ResolverNode.latest_blessed_model_resolver': continue eid = [execution.id] events = m.store.get_events_by_execution_ids(eid) output_events = [ x for x in events if x.type == metadata_store_pb2.Event.OUTPUT ] for event in output_events: steps = event.path.steps self.assertTrue(steps[0].HasField('key')) name = steps[0].key artifacts = m.store.get_artifacts_by_id( [event.artifact_id]) for idx, artifact in enumerate(artifacts): self.assertDirectoryEqual( artifact.uri, os.path.join(self._recorded_output_dir, component_id, name, str(idx)))