def setUp(self): self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.gfile.MakeDirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.gfile.MakeDirs(self._model_push.uri) self._output_dict = { 'model_push': [self._model_push], } self._serving_model_dir = os.path.join(self._output_data_dir, 'serving_model_dir') tf.gfile.MakeDirs(self._serving_model_dir) self._exec_properties = { 'push_destination': json_format.MessageToJson( pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=self._serving_model_dir))), } self._executor = executor.Executor()
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.io.gfile.makedirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { executor.MODEL_KEY: [self._model_export], executor.MODEL_BLESSING_KEY: [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.io.gfile.makedirs(self._model_push.uri) self._output_dict = { executor.PUSHED_MODEL_KEY: [self._model_push], } self._serving_model_dir = os.path.join(self._output_data_dir, 'serving_model_dir') tf.io.gfile.makedirs(self._serving_model_dir) self._exec_properties = { 'push_destination': json_format.MessageToJson( pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=self._serving_model_dir)), preserving_proto_field_name=True), } self._executor = executor.Executor()
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) fileio.makedirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { MODEL_KEY: [self._model_export], MODEL_BLESSING_KEY: [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') fileio.makedirs(self._model_push.uri) self._output_dict = { PUSHED_MODEL_KEY: [self._model_push], } self._serving_model_dir = os.path.join(self._output_data_dir, 'serving_model_dir') fileio.makedirs(self._serving_model_dir) self._exec_properties = self._MakeExecProperties() self._executor = executor.Executor()
def setUp(self): self._source_data_dir = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), 'components', 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.gfile.MakeDirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.gfile.MakeDirs(self._model_push.uri) self._output_dict = { 'model_push': [self._model_push], } self._exec_properties = { 'custom_config': { 'ai_platform_serving_args': { 'model_name': 'model_name', 'project_id': 'project_id' }, }, } self._executor = Executor()
def setUp(self): super().setUp() self._source_data_dir = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), 'components', 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) fileio.makedirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { standard_component_specs.MODEL_KEY: [self._model_export], standard_component_specs.MODEL_BLESSING_KEY: [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') fileio.makedirs(self._model_push.uri) self._output_dict = { standard_component_specs.PUSHED_MODEL_KEY: [self._model_push], } # Dict format of exec_properties. custom_config needs to be serialized # before being passed into Do function. self._exec_properties = { 'custom_config': { constants.SERVING_ARGS_KEY: { 'model_name': 'model_name', 'project_id': 'project_id' }, }, 'push_destination': None, } self._container_image_uri_vertex = 'gcr.io/path/to/container' # Dict format of exec_properties for Vertex. custom_config needs to be # serialized before being passed into Do function. self._exec_properties_vertex = { 'custom_config': { constants.SERVING_ARGS_KEY: { 'endpoint_name': 'endpoint_name', 'project_id': 'project_id', }, constants.VERTEX_CONTAINER_IMAGE_URI_KEY: self._container_image_uri_vertex, constants.VERTEX_REGION_KEY: 'us-central1', constants.ENABLE_VERTEX_KEY: True, }, 'push_destination': None, } self._executor = executor.Executor()
def testConstruct(self): examples = standard_artifacts.Examples() model = standard_artifacts.Model() model_blessing = standard_artifacts.ModelBlessing() pushed_model = standard_artifacts.PushedModel() bulk_inferrer = component.BulkInferrer( examples=channel_utils.as_channel([examples]), model=channel_utils.as_channel([model]), model_blessing=channel_utils.as_channel([model_blessing]) pushed_model=channel_utils.as_channel([pushed_model])) self.assertEqual('InferenceResult', bulk_inferrer.outputs['inference_result'].type_name)
def __init__( self, model: types.Channel = None, push_destination: Optional[Union[pusher_pb2.PushDestination, Dict[Text, Any]]] = None, custom_config: Optional[Dict[Text, Any]] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, output: Optional[types.Channel] = None, instance_name: Optional[Text] = None, enable_cache: Optional[bool] = None ): """Construct a Pusher component. Args: model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. push_destination: A pusher_pb2.PushDestination instance, providing info for tensorflow serving to load models. Optional if executor_class doesn't require push_destination. If any field is provided as a RuntimeParameter, push_destination should be constructed as a dict with the same field names as PushDestination proto message. custom_config: A dict which contains the deployment job parameters to be passed to cloud-based training platforms. The [Kubeflow example](https://github.com/tensorflow/tfx/blob/master/tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow.py#L211) contains an example how this can be used by custom executors. custom_executor_spec: Optional custom executor spec. output: Optional output `standard_artifacts.PushedModel` channel with result of push. instance_name: Optional unique instance name. Necessary if multiple Pusher components are declared in the same pipeline. enable_cache: Optional boolean to indicate if cache is enabled for the Pusher component. If not specified, defaults to the value specified for pipeline's enable_cache parameter. """ output = output or types.Channel( type=standard_artifacts.PushedModel, artifacts=[standard_artifacts.PushedModel()]) if push_destination is None and not custom_executor_spec: raise ValueError('push_destination is required unless a ' 'custom_executor_spec is supplied that does not require ' 'it.') spec = AlwaysPusherSpec( model=model, push_destination=push_destination, custom_config=custom_config, pushed_model=output) super(AlwaysPusher, self).__init__( spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name, enable_cache=enable_cache)
def __init__( self, model: types.Channel = None, model_blessing: types.Channel = None, push_destination: Optional[Union[pusher_pb2.PushDestination, Dict[Text, Any]]] = None, custom_config: Optional[Dict[Text, Any]] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, model_push: Optional[types.Channel] = None, model_export: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct a Pusher component. Args: model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. model_blessing: A Channel of type `standard_artifacts.ModelBlessing`, usually produced by a ModelValidator component. _required_ push_destination: A pusher_pb2.PushDestination instance, providing info for tensorflow serving to load models. Optional if executor_class doesn't require push_destination. If any field is provided as a RuntimeParameter, push_destination should be constructed as a dict with the same field names as PushDestination proto message. custom_config: A dict which contains the deployment job parameters to be passed to cloud-based training platforms. The [Kubeflow example](https://github.com/tensorflow/tfx/blob/master/tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow.py#L211) contains an example how this can be used by custom executors. custom_executor_spec: Optional custom executor spec. model_push: Optional output 'ModelPushPath' channel with result of push. model_export: Backwards compatibility alias for the 'model' argument. instance_name: Optional unique instance name. Necessary if multiple Pusher components are declared in the same pipeline. """ model = model or model_export model_push = model_push or types.Channel( type=standard_artifacts.PushedModel, artifacts=[standard_artifacts.PushedModel()]) if push_destination is None and not custom_executor_spec: raise ValueError( 'push_destination is required unless a ' 'custom_executor_spec is supplied that does not require ' 'it.') spec = PusherSpec(model_export=model, model_blessing=model_blessing, push_destination=push_destination, custom_config=custom_config, model_push=model_push) super(Pusher, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name)
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), 'components', 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.io.gfile.makedirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.io.gfile.makedirs(self._model_push.uri) self._output_dict = { 'model_push': [self._model_push], } self._exec_properties = { 'custom_config': { 'bigquery_serving_args': { 'model_name': 'model_name', 'project_id': 'project_id', 'bq_dataset_id': 'bq_dataset_id', }, }, 'push_destination': None, } self._executor = Executor() # Setting up Mock for external services self.addCleanup(mock.patch.stopall) self.mock_bq = mock.patch.object(bigquery, 'Client', autospec=True).start() self.mock_check_blessing = mock.patch.object(Executor, 'CheckBlessing', autospec=True).start() self.mock_path_utils = mock.patch.object( path_utils, 'serving_model_path', return_value='gs://test_model_path', autospec=True).start()
def __init__( self, model_export: types.Channel = None, model_blessing: types.Channel = None, push_destination: Optional[pusher_pb2.PushDestination] = None, custom_config: Optional[Dict[Text, Any]] = None, executor_class: Optional[Type[base_executor.BaseExecutor]] = None, model_push: Optional[types.Channel] = None, model: Optional[types.Channel] = None, name: Optional[Text] = None): """Construct a Pusher component. Args: model_export: A Channel of 'ModelExportPath' type, usually produced by Trainer component (required). model_blessing: A Channel of 'ModelBlessingPath' type, usually produced by ModelValidator component (required). push_destination: A pusher_pb2.PushDestination instance, providing info for tensorflow serving to load models. Optional if executor_class doesn't require push_destination. custom_config: A dict which contains the deployment job parameters to be passed to Google Cloud ML Engine. For the full set of parameters supported by Google Cloud ML Engine, refer to https://cloud.google.com/ml-engine/reference/rest/v1/projects.models executor_class: Optional custom python executor class. model_push: Optional output 'ModelPushPath' channel with result of push. model: Forwards compatibility alias for the 'model_exports' argument. name: Optional unique name. Necessary if multiple Pusher components are declared in the same pipeline. """ model_export = model_export or model model_push = model_push or types.Channel( type=standard_artifacts.PushedModel, artifacts=[standard_artifacts.PushedModel()]) if push_destination is None and not executor_class: raise ValueError( 'push_destination is required unless a custom ' 'executor_class is supplied that does not require ' 'it.') spec = PusherSpec(model_export=model_export, model_blessing=model_blessing, push_destination=push_destination, custom_config=custom_config, model_push=model_push) super(Pusher, self).__init__(spec=spec, custom_executor_class=executor_class, name=name)
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), 'components', 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) fileio.makedirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { standard_component_specs.MODEL_KEY: [self._model_export], standard_component_specs.MODEL_BLESSING_KEY: [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') fileio.makedirs(self._model_push.uri) self._output_dict = { standard_component_specs.PUSHED_MODEL_KEY: [self._model_push], } # Dict format of exec_properties. custom_config needs to be serialized # before being passed into Do function. self._exec_properties = { 'custom_config': { executor.SERVING_ARGS_KEY: { 'model_name': 'model_name', 'project_id': 'project_id' }, }, 'push_destination': None, } self._executor = executor.Executor()
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self.component_id = 'test_component' # Create input dict. self._examples = standard_artifacts.Examples() self._examples.uri = os.path.join(self._source_data_dir, 'csv_example_gen') self._examples.split_names = artifact_utils.encode_split_names( ['unlabelled']) self._model = standard_artifacts.Model() self._model.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._model_blessing.uri = os.path.join(self._source_data_dir, 'model_validator/blessed') self._model_blessing.set_int_custom_property('blessed', 1) self._pushed_model = standard_artifacts.PushedModel() self._pushed_model.uri = os.path.join(self._source_data_dir, 'pusher/pushed') self._pushed_model.set_int_custom_property('pushed', 1) self._inference_result = standard_artifacts.InferenceResult() self._prediction_log_dir = os.path.join(self._output_data_dir, 'prediction_logs') self._inference_result.uri = self._prediction_log_dir # Create context self._tmp_dir = os.path.join(self._output_data_dir, '.temp') self._context = executor.Executor.Context(tmp_dir=self._tmp_dir, unique_id='2')
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), 'components', 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.io.gfile.makedirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.io.gfile.makedirs(self._model_push.uri) self._output_dict = { 'model_push': [self._model_push], } self._exec_properties = { 'custom_config': { executor.SERVING_ARGS_KEY: { 'model_name': 'model_name', 'project_id': 'project_id' }, }, 'push_destination': None, } self._executor = executor.Executor()
def __init__( self, model: types.Channel = None, model_blessing: types.Channel = None, infra_blessing: Optional[types.Channel] = None, push_destination: Optional[Union[pusher_pb2.PushDestination, Dict[Text, Any]]] = None, custom_config: Optional[Dict[Text, Any]] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, output: Optional[types.Channel] = None, model_export: Optional[types.Channel] = None, instance_name: Optional[Text] = None, enable_cache: Optional[bool] = None): """Construct a Pusher component. Args: model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. model_blessing: A Channel of type `standard_artifacts.ModelBlessing`, usually produced by a ModelValidator component. _required_ infra_blessing: An optional Channel of type `standard_artifacts.InfraBlessing`, usually produced from an InfraValidator component. push_destination: A pusher_pb2.PushDestination instance, providing info for tensorflow serving to load models. Optional if executor_class doesn't require push_destination. If any field is provided as a RuntimeParameter, push_destination should be constructed as a dict with the same field names as PushDestination proto message. custom_config: A dict which contains the deployment job parameters to be passed to cloud-based training platforms. The [Kubeflow example]( https://github.com/tensorflow/tfx/blob/6ff57e36a7b65818d4598d41e584a42584d361e6/tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow_gcp.py#L278-L285) contains an example how this can be used by custom executors. custom_executor_spec: Optional custom executor spec. output: Optional output `standard_artifacts.PushedModel` channel with result of push. model_export: Backwards compatibility alias for the 'model' argument. instance_name: Optional unique instance name. Necessary if multiple Pusher components are declared in the same pipeline. enable_cache: Optional boolean to indicate if cache is enabled for the Pusher component. If not specified, defaults to the value specified for pipeline's enable_cache parameter. """ if model_export: absl.logging.warning( 'The "model_export" argument to the Pusher component has ' 'been renamed to "model" and is deprecated. Please update your ' 'usage as support for this argument will be removed soon.') model = model_export output = output or types.Channel( type=standard_artifacts.PushedModel, artifacts=[standard_artifacts.PushedModel()]) if push_destination is None and not custom_executor_spec: raise ValueError( 'push_destination is required unless a ' 'custom_executor_spec is supplied that does not require ' 'it.') spec = PusherSpec(model=model, model_blessing=model_blessing, infra_blessing=infra_blessing, push_destination=push_destination, custom_config=custom_config, pushed_model=output) super(Pusher, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name, enable_cache=enable_cache)
def test_scaffold_assets_with_source(self, output_file_format: str, artifacts: bool): if artifacts: connection_config = metadata_store_pb2.ConnectionConfig() connection_config.fake_database.SetInParent() mlmd_store = mlmd.MetadataStore(connection_config) else: mlmd_store = None train_dataset_name = 'Dataset-Split-train' train_features = ['feature_name1'] eval_dataset_name = 'Dataset-Split-eval' eval_features = ['feature_name2'] tfma_path = os.path.join(self.tmpdir, 'tfma') tfdv_path = os.path.join(self.tmpdir, 'tfdv') pushed_model_path = os.path.join(self.tmpdir, 'pushed_model') self._write_tfma(tfma_path, output_file_format, mlmd_store) self._write_tfdv(tfdv_path, train_dataset_name, train_features, eval_dataset_name, eval_features, mlmd_store) if artifacts: model_evaluation_artifacts = mlmd_store.get_artifacts_by_type( standard_artifacts.ModelEvaluation.TYPE_NAME) example_statistics_artifacts = mlmd_store.get_artifacts_by_type( standard_artifacts.ExampleStatistics.TYPE_NAME) pushed_model_artifact = standard_artifacts.PushedModel() pushed_model_artifact.uri = pushed_model_path tfma_src = src.TfmaSource( model_evaluation_artifacts=model_evaluation_artifacts, metrics_exclude=['average_loss']) tfdv_src = src.TfdvSource( example_statistics_artifacts=example_statistics_artifacts, features_include=['feature_name1']) model_src = src.ModelSource( pushed_model_artifact=pushed_model_artifact) else: tfma_src = src.TfmaSource(eval_result_paths=[tfma_path], metrics_exclude=['average_loss']) tfdv_src = src.TfdvSource(dataset_statistics_paths=[tfdv_path], features_include=['feature_name1']) model_src = src.ModelSource(pushed_model_path=pushed_model_path) mc = model_card_toolkit.ModelCardToolkit(source=src.Source( tfma=tfma_src, tfdv=tfdv_src, model=model_src)).scaffold_assets() with self.subTest(name='quantitative_analysis'): list_to_proto = lambda lst: [x.to_proto() for x in lst] expected_performance_metrics = [ model_card.PerformanceMetric( type='post_export_metrics/example_count', value='2.0') ] self.assertCountEqual( list_to_proto(mc.quantitative_analysis.performance_metrics), list_to_proto(expected_performance_metrics)) self.assertLen(mc.quantitative_analysis.graphics.collection, 1) with self.subTest(name='model_parameters.data'): self.assertLen(mc.model_parameters.data, 2) # train and eval for dataset in mc.model_parameters.data: for graphic in dataset.graphics.collection: self.assertIsNotNone( graphic.image, msg= f'No image found for graphic: {dataset.name} {graphic.name}' ) graphic.image = None # ignore graphic.image for below assertions self.assertIn( model_card.Dataset( name=train_dataset_name, graphics=model_card.GraphicsCollection(collection=[ model_card.Graphic(name='counts | feature_name1') ])), mc.model_parameters.data) self.assertNotIn( model_card.Dataset( name=eval_dataset_name, graphics=model_card.GraphicsCollection(collection=[ model_card.Graphic(name='counts | feature_name2') ])), mc.model_parameters.data) with self.subTest(name='model_details.path'): self.assertEqual(mc.model_details.path, pushed_model_path)