Beispiel #1
0
def make_client_factory(
    model: standard_artifacts.Model,
    serving_spec: infra_validator_pb2.ServingSpec) -> ClientFactory:
  """Creates ClientFactory from Model artifact and ServingSpec configuration.

  Note that for each `serving_binary` in ServingSpec there is a corresponding
  ModelServerClient class. (1on1 mapping)

  Args:
    model: A `Model` artifact.
    serving_spec: A `ServingSpec` configuration.

  Returns:
    A ModelServerClient factory function that takes Text endpoint as an argument
    and returns a ModelServerClient.
  """
  serving_binary = serving_spec.WhichOneof('serving_binary')
  if not serving_binary:
    raise ValueError('serving_binary must be set.')

  if serving_binary == TENSORFLOW_SERVING:
    model_name = os.path.basename(
        os.path.dirname(path_utils.serving_model_path(model.uri)))
    return functools.partial(
        tensorflow_serving_client.TensorFlowServingClient,
        model_name=model_name)
  else:
    raise NotImplementedError('{} is not supported'.format(serving_binary))
Beispiel #2
0
    def GetModelPath(self, input_dict: Dict[str, List[types.Artifact]]) -> str:
        """Get input model path to push.

    Pusher can push various types of artifacts if it contains the model. This
    method decides which artifact type is given to the Pusher and extracts the
    real model path. Subclass of Pusher Executor should use this method to
    acquire the source model path.

    Args:
      input_dict: A dictionary of artifacts that is given as the fisrt argument
          to the Executor.Do() method.
    Returns:
      A resolved input model path.
    Raises:
      RuntimeError: If no model path is found from input_dict.
    """
        # Check input_dict['model'] first.
        models = input_dict.get(standard_component_specs.MODEL_KEY)
        if models:
            model = artifact_utils.get_single_instance(models)
            return path_utils.serving_model_path(
                model.uri, path_utils.is_old_model_artifact(model))

        # Falls back to input_dict['infra_blessing']
        blessed_models = input_dict.get(
            standard_component_specs.INFRA_BLESSING_KEY)
        if not blessed_models:
            # Should not reach here; Pusher.__init__ prohibits creating a component
            # without having any of model or infra_blessing inputs.
            raise RuntimeError('Pusher has no model input.')
        model = artifact_utils.get_single_instance(blessed_models)
        if not model.get_int_custom_property(_INFRA_BLESSING_MODEL_FLAG_KEY):
            raise RuntimeError('InfraBlessing does not contain a model. Check '
                               'request_spec.make_warmup is set to True.')
        return path_utils.stamped_model_path(model.uri)
Beispiel #3
0
  def testDoSkippedModelCreation(self, mock_runner, mock_run_model_inference,
                                 _):
    input_dict = {
        'examples': [self._examples],
        'model': [self._model],
        'model_blessing': [self._model_blessing],
    }
    output_dict = {
        'inference_result': [self._inference_result],
    }
    ai_platform_serving_args = {
        'model_name': 'model_name',
        'project_id': 'project_id'
    }
    # Create exe properties.
    exec_properties = {
        'data_spec':
            proto_utils.proto_to_json(bulk_inferrer_pb2.DataSpec()),
        'custom_config':
            json_utils.dumps(
                {executor.SERVING_ARGS_KEY: ai_platform_serving_args}),
    }
    mock_runner.get_service_name_and_api_version.return_value = ('ml', 'v1')
    mock_runner.create_model_for_aip_prediction_if_not_exist.return_value = False

    # Run executor.
    bulk_inferrer = executor.Executor(self._context)
    bulk_inferrer.Do(input_dict, output_dict, exec_properties)

    ai_platform_prediction_model_spec = (
        model_spec_pb2.AIPlatformPredictionModelSpec(
            project_id='project_id',
            model_name='model_name',
            version_name=self._model_version))
    ai_platform_prediction_model_spec.use_serialization_config = True
    inference_endpoint = model_spec_pb2.InferenceSpecType()
    inference_endpoint.ai_platform_prediction_model_spec.CopyFrom(
        ai_platform_prediction_model_spec)
    mock_run_model_inference.assert_called_once_with(mock.ANY, mock.ANY,
                                                     mock.ANY, mock.ANY,
                                                     mock.ANY,
                                                     inference_endpoint)
    executor_class_path = '%s.%s' % (bulk_inferrer.__class__.__module__,
                                     bulk_inferrer.__class__.__name__)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      job_labels = telemetry_utils.make_labels_dict()
    mock_runner.deploy_model_for_aip_prediction.assert_called_once_with(
        serving_path=path_utils.serving_model_path(self._model.uri),
        model_version_name=mock.ANY,
        ai_platform_serving_args=ai_platform_serving_args,
        labels=job_labels,
        api=mock.ANY,
        skip_model_endpoint_creation=True,
        set_default=False)
    mock_runner.delete_model_from_aip_if_exists.assert_called_once_with(
        model_version_name=mock.ANY,
        ai_platform_serving_args=ai_platform_serving_args,
        api=mock.ANY,
        delete_model_endpoint=False)
Beispiel #4
0
    def _PrepareModelPath(
            self, model: types.Artifact,
            serving_spec: infra_validator_pb2.ServingSpec) -> str:
        model_path = path_utils.serving_model_path(
            model.uri, path_utils.is_old_model_artifact(model))
        serving_binary = serving_spec.WhichOneof('serving_binary')
        if serving_binary == _TENSORFLOW_SERVING:
            # TensorFlow Serving requires model to be stored in its own directory
            # structure flavor. If current model_path does not conform to the flavor,
            # we need to make a copy to the temporary path.
            try:
                # Check whether current model_path conforms to the tensorflow serving
                # model path flavor. (Parsed without exception)
                tf_serving_flavor.parse_model_path(
                    model_path, expected_model_name=serving_spec.model_name)
            except ValueError:
                # Copy the model to comply with the tensorflow serving model path
                # flavor.
                temp_model_path = tf_serving_flavor.make_model_path(
                    model_base_path=self._get_tmp_dir(),
                    model_name=serving_spec.model_name,
                    version=int(time.time()))
                io_utils.copy_dir(src=model_path, dst=temp_model_path)
                self._AddCleanup(io_utils.delete_dir,
                                 self._context.get_tmp_path())
                return temp_model_path

        return model_path
Beispiel #5
0
    def setUp(self):
        super(LocalDockerRunnerTest, self).setUp()

        base_dir = os.path.join(
            os.path.dirname(  # components/
                os.path.dirname(  # infra_validator/
                    os.path.dirname(__file__))),  # model_server_runners/
            'testdata')
        self._model = standard_artifacts.Model()
        self._model.uri = os.path.join(base_dir, 'trainer', 'current')
        self._model_name = 'chicago-taxi'
        self._model_path = path_utils.serving_model_path(self._model.uri)

        # Mock docker.DockerClient
        patcher = mock.patch('docker.DockerClient')
        self._docker_client = patcher.start().return_value
        self.addCleanup(patcher.stop)

        self._serving_spec = _create_serving_spec({
            'tensorflow_serving': {
                'tags': ['1.15.0']
            },
            'local_docker': {},
            'model_name':
            self._model_name,
        })
        self._serving_binary = serving_bins.parse_serving_binaries(
            self._serving_spec)[0]
        patcher = mock.patch.object(self._serving_binary, 'MakeClient')
        self._model_server_client = patcher.start().return_value
        self.addCleanup(patcher.stop)
Beispiel #6
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]):
        """Overrides the tfx_pusher_executor.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: Mostly a passthrough input dict for
        tfx.components.Pusher.executor.  custom_config.ai_platform_serving_args
        is consumed by this class.  For the full set of parameters supported by
        Google Cloud AI Platform, refer to
        https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models#creating_a_model_version.

    Returns:
      None
    Raises:
      ValueError: if ai_platform_serving_args is not in
      exec_properties.custom_config.
      RuntimeError: if the Google Cloud AI Platform training job failed.
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        if not self.CheckBlessing(input_dict, output_dict):
            return

        model_export = artifact_utils.get_single_instance(
            input_dict['model_export'])
        model_export_uri = model_export.uri
        model_blessing_uri = artifact_utils.get_single_uri(
            input_dict['model_blessing'])
        model_push = artifact_utils.get_single_instance(
            output_dict['model_push'])
        # TODO(jyzhao): should this be in driver or executor.
        if not tf.gfile.Exists(os.path.join(model_blessing_uri, 'BLESSED')):
            model_push.set_int_custom_property('pushed', 0)
            tf.logging.info('Model on %s was not blessed', model_blessing_uri)
            return

        exec_properties_copy = exec_properties.copy()
        custom_config = exec_properties_copy.pop('custom_config', {})
        ai_platform_serving_args = custom_config['ai_platform_serving_args']

        # Deploy the model.
        model_path = path_utils.serving_model_path(model_export_uri)
        # Note: we do not have a logical model version right now. This
        # model_version is a timestamp mapped to trainer's exporter.
        model_version = os.path.basename(model_path)
        if ai_platform_serving_args is not None:
            runner.deploy_model_for_cmle_serving(model_path, model_version,
                                                 ai_platform_serving_args)

        # Make sure artifacts are populated in a standard way by calling
        # tfx.pusher.executor.Executor.Do().
        exec_properties_copy['push_destination'] = exec_properties.get(
            'push_destination') or self._make_local_temp_destination()
        super(Executor, self).Do(input_dict, output_dict, exec_properties_copy)
Beispiel #7
0
  def setUp(self):
    super(ExecutorTest, self).setUp()

    # Setup Mocks

    patcher = mock.patch.object(request_builder, 'build_requests')
    self.build_requests_mock = patcher.start()
    self.addCleanup(patcher.stop)

    # Setup directories

    source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    base_output_dir = os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR',
                                     self.get_temp_dir())
    output_data_dir = os.path.join(base_output_dir, self._testMethodName)

    # Setup input_dict.

    self._model = standard_artifacts.Model()
    self._model.uri = os.path.join(source_data_dir, 'trainer', 'current')
    self._model_path = path_utils.serving_model_path(self._model.uri)
    examples = standard_artifacts.Examples()
    examples.uri = os.path.join(source_data_dir, 'transform',
                                'transformed_examples', 'eval')
    examples.split_names = artifact_utils.encode_split_names(['eval'])

    self._input_dict = {
        'model': [self._model],
        'examples': [examples],
    }
    self._blessing = standard_artifacts.InfraBlessing()
    self._blessing.uri = os.path.join(output_data_dir, 'blessing')
    self._output_dict = {'blessing': [self._blessing]}
    temp_dir = os.path.join(output_data_dir, '.temp')
    self._context = executor.Executor.Context(tmp_dir=temp_dir, unique_id='1')
    self._serving_spec = _make_serving_spec({
        'tensorflow_serving': {
            'tags': ['1.15.0']
        },
        'local_docker': {},
        'model_name': 'chicago-taxi',
    })
    self._serving_binary = serving_bins.parse_serving_binaries(
        self._serving_spec)[0]
    self._validation_spec = _make_validation_spec({
        'max_loading_time_seconds': 10,
        'num_tries': 3
    })
    self._request_spec = _make_request_spec({
        'tensorflow_serving': {
            'rpc_kind': 'CLASSIFY'
        },
        'max_examples': 1
    })
    self._exec_properties = {
        'serving_spec': json_format.MessageToJson(self._serving_spec),
        'validation_spec': json_format.MessageToJson(self._validation_spec),
        'request_spec': json_format.MessageToJson(self._request_spec),
    }
Beispiel #8
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]):
        """Overrides the tfx_pusher_executor.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: Mostly a passthrough input dict for
        tfx.components.Pusher.executor.  custom_config.ai_platform_serving_args
        is consumed by this class.  For the full set of parameters supported by
        Google Cloud AI Platform, refer to
        https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models#creating_a_model_version.

    Returns:
      None
    Raises:
      ValueError:
        If ai_platform_serving_args is not in exec_properties.custom_config.
        If Serving model path does not start with gs://.
      RuntimeError: if the Google Cloud AI Platform training job failed.
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        model_push = artifact_utils.get_single_instance(
            output_dict[tfx_pusher_executor.PUSHED_MODEL_KEY])
        if not self.CheckBlessing(input_dict):
            model_push.set_int_custom_property('pushed', 0)
            return

        model_export = artifact_utils.get_single_instance(
            input_dict[tfx_pusher_executor.MODEL_KEY])
        model_export_uri = model_export.uri

        exec_properties_copy = exec_properties.copy()
        custom_config = exec_properties_copy.pop('custom_config', {})
        ai_platform_serving_args = custom_config[SERVING_ARGS_KEY]
        if not ai_platform_serving_args:
            raise ValueError(
                '\'ai_platform_serving_args\' is missing in \'custom_config\'')
        # Deploy the model.
        model_path = path_utils.serving_model_path(model_export_uri)
        # Note: we do not have a logical model version right now. This
        # model_version is a timestamp mapped to trainer's exporter.
        model_version = os.path.basename(model_path)
        executor_class_path = '%s.%s' % (self.__class__.__module__,
                                         self.__class__.__name__)
        runner.deploy_model_for_aip_prediction(
            model_path,
            model_version,
            ai_platform_serving_args,
            executor_class_path,
        )

        model_push.set_int_custom_property('pushed', 1)
        model_push.set_string_custom_property('pushed_model', model_path)
Beispiel #9
0
 def __init__(self, model: standard_artifacts.Model, image_uri: Text,
              config: infra_validator_pb2.LocalDockerConfig,
              client_factory: factory.ClientFactory):
     self._model_dir = os.path.dirname(
         path_utils.serving_model_path(model.uri))
     self._image_uri = image_uri
     self._docker = self._MakeDockerClientFromConfig(config)
     self._client_factory = client_factory
     self._container = None
     self._client = None
Beispiel #10
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result, optional.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'inference_result' not in output_dict:
            raise ValueError('\'inference_result\' is missing in output dict.')
        output = artifact_utils.get_single_instance(
            output_dict['inference_result'])
        if 'model' not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if 'model_blessing' in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                output.set_int_custom_property('inferred', 0)
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(input_dict['model'])
        model_path = path_utils.serving_model_path(model.uri)
        logging.info('Use exported model from %s.', model_path)

        data_spec = bulk_inferrer_pb2.DataSpec()
        json_format.Parse(exec_properties['data_spec'], data_spec)
        if self._run_model_inference(
                data_spec, input_dict['examples'], output.uri,
                self._get_inference_spec(model_path, exec_properties)):
            output.set_int_custom_property('inferred', 1)
        else:
            output.set_int_custom_property('inferred', 0)
Beispiel #11
0
    def testKerasModelPath(self):
        # Create folders based on Keras based Trainer output model directory.
        output_uri = os.path.join(self.get_temp_dir(), 'model_dir')
        serving_model_path = path_utils.serving_model_dir(output_uri)
        serving_model = os.path.join(serving_model_path, 'saved_model.pb')
        io_utils.write_string_file(serving_model, 'testing')

        # Test retrieving model folder.
        self.assertEqual(serving_model_path,
                         path_utils.eval_model_path(output_uri))
        self.assertEqual(serving_model_path,
                         path_utils.serving_model_path(output_uri))
Beispiel #12
0
  def _CreateKubernetesRunner(self, k8s_config_dict=None):
    self._serving_spec = infra_validator_pb2.ServingSpec()
    json_format.ParseDict({
        'tensorflow_serving': {
            'tags': ['1.15.0']},
        'kubernetes': k8s_config_dict or {},
        'model_name': self._model_name,
    }, self._serving_spec)
    serving_binary = serving_bins.parse_serving_binaries(self._serving_spec)[0]

    return kubernetes_runner.KubernetesRunner(
        model_path=path_utils.serving_model_path(self._model.uri),
        serving_binary=serving_binary,
        serving_spec=self._serving_spec)
Beispiel #13
0
    def testEstimatorModelPath(self, is_old_artifact):
        # Create folders based on Estimator based Trainer output model directory,
        # after Executor performs cleaning.
        output_uri = os.path.join(self.get_temp_dir(), 'model_dir')
        eval_model_path = path_utils.eval_model_dir(output_uri,
                                                    is_old_artifact)
        eval_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(eval_model, 'testing')
        serving_model_path = path_utils.serving_model_dir(
            output_uri, is_old_artifact)
        serving_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(serving_model, 'testing')

        # Test retrieving model folder.
        self.assertEqual(
            eval_model_path,
            path_utils.eval_model_path(output_uri, is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.serving_model_path(output_uri, is_old_artifact))

        self.assertEqual(
            eval_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TFMA_EVAL,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_KERAS,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_GENERIC,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_ESTIMATOR,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri, path_constants.TF_JS,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_LITE,
                                             is_old_artifact))
Beispiel #14
0
  def _GetFnArgs(self, input_dict: Dict[Text, List[types.Artifact]],
                 output_dict: Dict[Text, List[types.Artifact]],
                 exec_properties: Dict[Text, Any]) -> fn_args_utils.FnArgs:
    # Load and deserialize custom config from execution properties.
    # Note that in the component interface the default serialization of custom
    # config is 'null' instead of '{}'. Therefore we need to default the
    # json_utils.loads to 'null' then populate it with an empty dict when
    # needed.
    custom_config = json_utils.loads(
        exec_properties.get(constants.CUSTOM_CONFIG_KEY, 'null')) or {}
    if not isinstance(custom_config, dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict. Got %s instead.' % type(custom_config))

    # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
    if input_dict.get(constants.BASE_MODEL_KEY):
      base_model = path_utils.serving_model_path(
          artifact_utils.get_single_uri(input_dict[constants.BASE_MODEL_KEY]))
    else:
      base_model = None

    if input_dict.get(constants.HYPERPARAMETERS_KEY):
      hyperparameters_file = io_utils.get_only_uri_in_dir(
          artifact_utils.get_single_uri(
              input_dict[constants.HYPERPARAMETERS_KEY]))
      hyperparameters_config = json.loads(
          file_io.read_file_to_string(hyperparameters_file))
    else:
      hyperparameters_config = None

    output_path = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_KEY])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    model_run_dir = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_RUN_KEY])

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    result = fn_args_utils.get_common_fn_args(input_dict, exec_properties)
    result.transform_output = result.transform_graph_path
    result.serving_model_dir = serving_model_dir
    result.eval_model_dir = eval_model_dir
    result.model_run_dir = model_run_dir
    result.schema_file = result.schema_path
    result.base_model = base_model
    result.hyperparameters = hyperparameters_config
    result.custom_config = custom_config
    return result
Beispiel #15
0
def ensemble_selection(
    problem_statement: Parameter[str],
    examples: InputArtifact[standard_artifacts.Examples],
    evaluation_split_name: Parameter[str],
    ensemble_size: Parameter[int],
    metric: Parameter[str],
    goal: Parameter[str],
    model: OutputArtifact[standard_artifacts.Model],
    input_model0: InputArtifact[standard_artifacts.Model] = None,
    input_model1: InputArtifact[standard_artifacts.Model] = None,
    input_model2: InputArtifact[standard_artifacts.Model] = None,
    input_model3: InputArtifact[standard_artifacts.Model] = None,
    input_model4: InputArtifact[standard_artifacts.Model] = None,
    input_model5: InputArtifact[standard_artifacts.Model] = None,
    input_model6: InputArtifact[standard_artifacts.Model] = None,
    input_model7: InputArtifact[standard_artifacts.Model] = None,
    input_model8: InputArtifact[standard_artifacts.Model] = None,
    input_model9: InputArtifact[standard_artifacts.Model] = None,
) -> None:  # pytype: disable=invalid-annotation,wrong-arg-types
    """Runs the SimpleML trainer as a separate component."""

    problem_statement = text_format.Parse(problem_statement,
                                          ps_pb2.ProblemStatement())
    input_models = [
        input_model0, input_model1, input_model2, input_model3, input_model4,
        input_model5, input_model6, input_model7, input_model8, input_model9
    ]
    saved_model_paths = {
        str(i): path_utils.serving_model_path(model.uri)
        for i, model in enumerate(input_models) if model
    }
    logging.info('Saved model paths: %s', saved_model_paths)

    label_key = _label_key(problem_statement)

    es = es_lib.EnsembleSelection(problem_statement=problem_statement,
                                  saved_model_paths=saved_model_paths,
                                  ensemble_size=ensemble_size,
                                  metric=tf.keras.metrics.deserialize(
                                      json.loads(metric)),
                                  goal=goal)

    es.fit(*_data_from_examples(examples_path=os.path.join(
        examples.uri, evaluation_split_name),
                                label_key=label_key))
    logging.info('Selected ensemble weights: %s', es.weights)
    es.save(export_path=os.path.join(path_utils.serving_model_dir(model.uri),
                                     'export', 'serving'))
Beispiel #16
0
    def testEstimatorModelPath(self):
        # Create folders based on Estimator based Trainer output model directory,
        # after Executor performs cleaning.
        output_uri = os.path.join(self.get_temp_dir(), 'model_dir')
        eval_model_path = path_utils.eval_model_dir(output_uri)
        eval_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(eval_model, 'testing')
        serving_model_path = path_utils.serving_model_dir(output_uri)
        serving_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(serving_model, 'testing')

        # Test retrieving model folder.
        self.assertEqual(eval_model_path,
                         path_utils.eval_model_path(output_uri))
        self.assertEqual(serving_model_path,
                         path_utils.serving_model_path(output_uri))
Beispiel #17
0
    def _GetFnArgs(self, input_dict: Dict[Text, List[types.Artifact]],
                   output_dict: Dict[Text, List[types.Artifact]],
                   exec_properties: Dict[Text, Any]) -> fn_args_utils.FnArgs:
        # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
        if input_dict.get(standard_component_specs.BASE_MODEL_KEY):
            base_model_artifact = artifact_utils.get_single_instance(
                input_dict[standard_component_specs.BASE_MODEL_KEY])
            base_model = path_utils.serving_model_path(
                base_model_artifact.uri,
                path_utils.is_old_model_artifact(base_model_artifact))
        else:
            base_model = None

        if input_dict.get(standard_component_specs.HYPERPARAMETERS_KEY):
            hyperparameters_file = io_utils.get_only_uri_in_dir(
                artifact_utils.get_single_uri(
                    input_dict[standard_component_specs.HYPERPARAMETERS_KEY]))
            hyperparameters_config = json.loads(
                file_io.read_file_to_string(hyperparameters_file))
        else:
            hyperparameters_config = None

        output_path = artifact_utils.get_single_uri(
            output_dict[standard_component_specs.MODEL_KEY])
        serving_model_dir = path_utils.serving_model_dir(output_path)
        eval_model_dir = path_utils.eval_model_dir(output_path)

        model_run_dir = artifact_utils.get_single_uri(
            output_dict[standard_component_specs.MODEL_RUN_KEY])

        # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
        result = fn_args_utils.get_common_fn_args(input_dict, exec_properties)
        if result.custom_config and not isinstance(result.custom_config, dict):
            raise ValueError(
                'custom_config in execution properties needs to be a '
                'dict. Got %s instead.' % type(result.custom_config))
        result.transform_output = result.transform_graph_path
        result.serving_model_dir = serving_model_dir
        result.eval_model_dir = eval_model_dir
        result.model_run_dir = model_run_dir
        result.schema_file = result.schema_path
        result.base_model = base_model
        result.hyperparameters = hyperparameters_config
        return result
Beispiel #18
0
def run_fn(fn_args: executor.TrainerFnArgs):
    """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
    schema = io_utils.parse_pbtxt_file(fn_args.schema_file,
                                       schema_pb2.Schema())

    training_spec = _trainer_fn(fn_args, schema)

    # Train the model
    absl.logging.info('Training model.')
    tf.estimator.train_and_evaluate(training_spec['estimator'],
                                    training_spec['train_spec'],
                                    training_spec['eval_spec'])
    absl.logging.info('Training complete.  Model written to %s',
                      fn_args.serving_model_dir)

    # Export an eval savedmodel for TFMA
    # NOTE: When trained in distributed training cluster, eval_savedmodel must be
    # exported only by the chief worker (check TF_CONFIG).
    absl.logging.info('Exporting eval_savedmodel for TFMA.')
    eval_export_dir = path_utils.eval_model_dir(fn_args.model_run_dir)
    tfma.export.export_eval_savedmodel(
        estimator=training_spec['estimator'],
        export_dir_base=eval_export_dir,
        eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

    absl.logging.info('Exported eval_savedmodel to %s.',
                      fn_args.eval_model_dir)

    # TODO(b/160795287): Deprecate estimator based executor.
    # Copy serving and eval model from model_run to model artifact directory.
    serving_source = path_utils.serving_model_path(fn_args.model_run_dir)
    io_utils.copy_dir(serving_source, fn_args.serving_model_dir)
    absl.logging.info('Serving model copied to: %s.',
                      fn_args.serving_model_dir)

    eval_source = path_utils.eval_model_path(fn_args.model_run_dir)
    io_utils.copy_dir(eval_source, fn_args.eval_model_dir)
    absl.logging.info('Eval model copied to: %s.', fn_args.eval_model_dir)
Beispiel #19
0
  def Do(self, input_dict,
         output_dict,
         exec_properties):
    """Push model to target if blessed.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: A dict of execution properties, including:
        - push_destination: JSON string of pusher_pb2.PushDestination instance,
          providing instruction of destination to push model.

    Returns:
      None
    """
    self._log_startup(input_dict, output_dict, exec_properties)
    model_export = types.get_single_instance(input_dict['model_export'])
    model_export_uri = model_export.uri
    model_blessing_uri = types.get_single_uri(input_dict['model_blessing'])
    model_push = types.get_single_instance(output_dict['model_push'])
    model_push_uri = model_push.uri
    # TODO(jyzhao): should this be in driver or executor.
    if not tf.gfile.Exists(os.path.join(model_blessing_uri, 'BLESSED')):
      model_push.set_int_custom_property('pushed', 0)
      tf.logging.info('Model on %s was not blessed',)
      return
    tf.logging.info('Model pushing.')
    # Copy the model we are pushing into
    model_path = path_utils.serving_model_path(model_export_uri)
    # Note: we do not have a logical model version right now. This
    # model_version is a timestamp mapped to trainer's exporter.
    model_version = os.path.basename(model_path)
    tf.logging.info('Model version is %s', model_version)
    io_utils.copy_dir(model_path, os.path.join(model_push_uri, model_version))
    tf.logging.info('Model written to %s.', model_push_uri)

    # Copied to a fixed outside path, which can be listened by model server.
    #
    # If model is already successfully copied to outside before, stop copying.
    # This is because model validator might blessed same model twice (check
    # mv driver) with different blessing output, we still want Pusher to
    # handle the mv output again to keep metadata tracking, but no need to
    # copy to outside path again..
    # TODO(jyzhao): support rpc push and verification.
    push_destination = pusher_pb2.PushDestination()
    json_format.Parse(exec_properties['push_destination'], push_destination)
    serving_path = os.path.join(push_destination.filesystem.base_directory,
                                model_version)
    if tf.gfile.Exists(serving_path):
      tf.logging.info(
          'Destination directory %s already exists, skipping current push.',
          serving_path)
    else:
      # tf.serving won't load partial model, it will retry until fully copied.
      io_utils.copy_dir(model_path, serving_path)
      tf.logging.info('Model written to serving path %s.', serving_path)

    model_push.set_int_custom_property('pushed', 1)
    model_push.set_string_custom_property('pushed_model', model_export_uri)
    model_push.set_int_custom_property('pushed_model_id', model_export.id)
    tf.logging.info('Model pushed to %s.', serving_path)

    if exec_properties.get('custom_config'):
      cmle_serving_args = exec_properties.get('custom_config',
                                              {}).get('cmle_serving_args')
      if cmle_serving_args is not None:
        return cmle_runner.deploy_model_for_serving(serving_path, model_version,
                                                    cmle_serving_args,
                                                    exec_properties['log_root'])
Beispiel #20
0
 def test_model_path(self):
   # Test retrieving model folder.
   self.assertEqual(self._eval_model_path,
                    path_utils.eval_model_path(self._output_uri))
   self.assertEqual(self._serving_model_path,
                    path_utils.serving_model_path(self._output_uri))