예제 #1
0
    def _GetFnArgs(self, input_dict: Dict[str, List[types.Artifact]],
                   output_dict: Dict[str, List[types.Artifact]],
                   exec_properties: Dict[str, Any]) -> fn_args_utils.FnArgs:
        if input_dict.get(standard_component_specs.HYPERPARAMETERS_KEY):
            hyperparameters_file = io_utils.get_only_uri_in_dir(
                artifact_utils.get_single_uri(
                    input_dict[standard_component_specs.HYPERPARAMETERS_KEY]))
            hyperparameters_config = json.loads(
                file_io.read_file_to_string(hyperparameters_file))
        else:
            hyperparameters_config = None

        output_path = artifact_utils.get_single_uri(
            output_dict[standard_component_specs.MODEL_KEY])
        serving_model_dir = path_utils.serving_model_dir(output_path)
        eval_model_dir = path_utils.eval_model_dir(output_path)

        model_run_dir = artifact_utils.get_single_uri(
            output_dict[standard_component_specs.MODEL_RUN_KEY])

        # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
        result = fn_args_utils.get_common_fn_args(input_dict, exec_properties)
        if result.custom_config and not isinstance(result.custom_config, dict):
            raise ValueError(
                'custom_config in execution properties needs to be a '
                'dict. Got %s instead.' % type(result.custom_config))
        result.transform_output = result.transform_graph_path
        result.serving_model_dir = serving_model_dir
        result.eval_model_dir = eval_model_dir
        result.model_run_dir = model_run_dir
        result.schema_file = result.schema_path
        result.hyperparameters = hyperparameters_config
        return result
예제 #2
0
 def _verify_model_exports(self):
     self.assertTrue(
         tf.io.gfile.exists(
             path_utils.eval_model_dir(self._model_exports.uri)))
     self.assertTrue(
         tf.io.gfile.exists(
             path_utils.serving_model_dir(self._model_exports.uri)))
예제 #3
0
def _trainer_fn(trainer_fn_args, schema):
    """Build the estimator using the high level API.

  Args:
    trainer_fn_args: Holds args used to train the model as name/value pairs.
    schema: Holds the schema of the training examples.

  Returns:
    A dict of the following:
      - estimator: The estimator that will be used for training and eval.
      - train_spec: Spec for training.
      - eval_spec: Spec for eval.
      - eval_input_receiver_fn: Input function for eval.
  """

    train_batch_size = 20
    eval_batch_size = 10

    train_input_fn = lambda: _input_fn(  # pylint: disable=g-long-lambda
        trainer_fn_args.train_files,
        trainer_fn_args.data_accessor,
        schema,
        batch_size=train_batch_size)

    eval_input_fn = lambda: _input_fn(  # pylint: disable=g-long-lambda
        trainer_fn_args.eval_files,
        trainer_fn_args.data_accessor,
        schema,
        batch_size=eval_batch_size)

    train_spec = tf.estimator.TrainSpec(train_input_fn,
                                        max_steps=trainer_fn_args.train_steps)

    serving_receiver_fn = lambda: _serving_input_receiver_fn(schema)

    exporter = tf.estimator.FinalExporter('iris', serving_receiver_fn)
    eval_spec = tf.estimator.EvalSpec(eval_input_fn,
                                      steps=trainer_fn_args.eval_steps,
                                      exporters=[exporter],
                                      name='iris-eval')

    run_config = tf.estimator.RunConfig(save_checkpoints_steps=999,
                                        keep_checkpoint_max=1)

    export_dir = path_utils.serving_model_dir(trainer_fn_args.model_run_dir)
    run_config = run_config.replace(model_dir=export_dir)

    estimator = tf.keras.estimator.model_to_estimator(
        keras_model=_keras_model_builder(), config=run_config)

    # Create an input receiver for TFMA processing
    eval_receiver_fn = lambda: _eval_input_receiver_fn(schema)

    return {
        'estimator': estimator,
        'train_spec': train_spec,
        'eval_spec': eval_spec,
        'eval_input_receiver_fn': eval_receiver_fn
    }
예제 #4
0
    def testKerasModelPath(self):
        # Create folders based on Keras based Trainer output model directory.
        output_uri = os.path.join(self.get_temp_dir(), 'model_dir')
        serving_model_path = path_utils.serving_model_dir(output_uri)
        serving_model = os.path.join(serving_model_path, 'saved_model.pb')
        io_utils.write_string_file(serving_model, 'testing')

        # Test retrieving model folder.
        self.assertEqual(serving_model_path,
                         path_utils.eval_model_path(output_uri))
        self.assertEqual(serving_model_path,
                         path_utils.serving_model_path(output_uri))
예제 #5
0
    def testEstimatorModelPath(self, is_old_artifact):
        # Create folders based on Estimator based Trainer output model directory,
        # after Executor performs cleaning.
        output_uri = os.path.join(self.get_temp_dir(), 'model_dir')
        eval_model_path = path_utils.eval_model_dir(output_uri,
                                                    is_old_artifact)
        eval_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(eval_model, 'testing')
        serving_model_path = path_utils.serving_model_dir(
            output_uri, is_old_artifact)
        serving_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(serving_model, 'testing')

        # Test retrieving model folder.
        self.assertEqual(
            eval_model_path,
            path_utils.eval_model_path(output_uri, is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.serving_model_path(output_uri, is_old_artifact))

        self.assertEqual(
            eval_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TFMA_EVAL,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_KERAS,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_GENERIC,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_ESTIMATOR,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri, path_constants.TF_JS,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_LITE,
                                             is_old_artifact))
예제 #6
0
파일: executor.py 프로젝트: RominYue/tfx
  def _GetFnArgs(self, input_dict: Dict[Text, List[types.Artifact]],
                 output_dict: Dict[Text, List[types.Artifact]],
                 exec_properties: Dict[Text, Any]) -> fn_args_utils.FnArgs:
    # Load and deserialize custom config from execution properties.
    # Note that in the component interface the default serialization of custom
    # config is 'null' instead of '{}'. Therefore we need to default the
    # json_utils.loads to 'null' then populate it with an empty dict when
    # needed.
    custom_config = json_utils.loads(
        exec_properties.get(constants.CUSTOM_CONFIG_KEY, 'null')) or {}
    if not isinstance(custom_config, dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict. Got %s instead.' % type(custom_config))

    # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
    if input_dict.get(constants.BASE_MODEL_KEY):
      base_model = path_utils.serving_model_path(
          artifact_utils.get_single_uri(input_dict[constants.BASE_MODEL_KEY]))
    else:
      base_model = None

    if input_dict.get(constants.HYPERPARAMETERS_KEY):
      hyperparameters_file = io_utils.get_only_uri_in_dir(
          artifact_utils.get_single_uri(
              input_dict[constants.HYPERPARAMETERS_KEY]))
      hyperparameters_config = json.loads(
          file_io.read_file_to_string(hyperparameters_file))
    else:
      hyperparameters_config = None

    output_path = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_KEY])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    model_run_dir = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_RUN_KEY])

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    result = fn_args_utils.get_common_fn_args(input_dict, exec_properties)
    result.transform_output = result.transform_graph_path
    result.serving_model_dir = serving_model_dir
    result.eval_model_dir = eval_model_dir
    result.model_run_dir = model_run_dir
    result.schema_file = result.schema_path
    result.base_model = base_model
    result.hyperparameters = hyperparameters_config
    result.custom_config = custom_config
    return result
예제 #7
0
  def _assertNumberOfTrainerOutputIsOne(self, pipeline_name):
    """Make sure the number of trainer executions and output models."""
    # There must be only one execution of Trainer.
    trainer_output_base_dir = os.path.join(
        self._pipeline_root(pipeline_name), 'Trainer', 'model')
    trainer_outputs = fileio.listdir(trainer_output_base_dir)
    self.assertEqual(1, len(trainer_outputs))

    # There must be only one saved models each for serving and eval.
    model_uri = os.path.join(trainer_output_base_dir, trainer_outputs[0])
    eval_model_dir = path_utils.eval_model_dir(model_uri)
    serving_model_dir = path_utils.serving_model_dir(model_uri)
    self.assertEqual(1, fileio.listdir(eval_model_dir).count('saved_model.pb'))
    self.assertEqual(1,
                     fileio.listdir(serving_model_dir).count('saved_model.pb'))
예제 #8
0
def ensemble_selection(
    problem_statement: Parameter[str],
    examples: InputArtifact[standard_artifacts.Examples],
    evaluation_split_name: Parameter[str],
    ensemble_size: Parameter[int],
    metric: Parameter[str],
    goal: Parameter[str],
    model: OutputArtifact[standard_artifacts.Model],
    input_model0: InputArtifact[standard_artifacts.Model] = None,
    input_model1: InputArtifact[standard_artifacts.Model] = None,
    input_model2: InputArtifact[standard_artifacts.Model] = None,
    input_model3: InputArtifact[standard_artifacts.Model] = None,
    input_model4: InputArtifact[standard_artifacts.Model] = None,
    input_model5: InputArtifact[standard_artifacts.Model] = None,
    input_model6: InputArtifact[standard_artifacts.Model] = None,
    input_model7: InputArtifact[standard_artifacts.Model] = None,
    input_model8: InputArtifact[standard_artifacts.Model] = None,
    input_model9: InputArtifact[standard_artifacts.Model] = None,
) -> None:  # pytype: disable=invalid-annotation,wrong-arg-types
    """Runs the SimpleML trainer as a separate component."""

    problem_statement = text_format.Parse(problem_statement,
                                          ps_pb2.ProblemStatement())
    input_models = [
        input_model0, input_model1, input_model2, input_model3, input_model4,
        input_model5, input_model6, input_model7, input_model8, input_model9
    ]
    saved_model_paths = {
        str(i): path_utils.serving_model_path(model.uri)
        for i, model in enumerate(input_models) if model
    }
    logging.info('Saved model paths: %s', saved_model_paths)

    label_key = _label_key(problem_statement)

    es = es_lib.EnsembleSelection(problem_statement=problem_statement,
                                  saved_model_paths=saved_model_paths,
                                  ensemble_size=ensemble_size,
                                  metric=tf.keras.metrics.deserialize(
                                      json.loads(metric)),
                                  goal=goal)

    es.fit(*_data_from_examples(examples_path=os.path.join(
        examples.uri, evaluation_split_name),
                                label_key=label_key))
    logging.info('Selected ensemble weights: %s', es.weights)
    es.save(export_path=os.path.join(path_utils.serving_model_dir(model.uri),
                                     'export', 'serving'))
예제 #9
0
    def testEstimatorModelPath(self):
        # Create folders based on Estimator based Trainer output model directory,
        # after Executor performs cleaning.
        output_uri = os.path.join(self.get_temp_dir(), 'model_dir')
        eval_model_path = path_utils.eval_model_dir(output_uri)
        eval_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(eval_model, 'testing')
        serving_model_path = path_utils.serving_model_dir(output_uri)
        serving_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(serving_model, 'testing')

        # Test retrieving model folder.
        self.assertEqual(eval_model_path,
                         path_utils.eval_model_path(output_uri))
        self.assertEqual(serving_model_path,
                         path_utils.serving_model_path(output_uri))
예제 #10
0
    def _assertNumberOfTrainerOutputIsOne(self, pipeline_name):
        """Make sure the number of trainer executions and output models."""
        # There must be only one execution of Trainer.
        trainer_output_base_dir = os.path.join(
            self._pipeline_root(pipeline_name), 'Trainer', 'model')
        trainer_outputs = tf.io.gfile.listdir(trainer_output_base_dir)
        self.assertEqual(1, len(trainer_outputs))

        # There must be only one saved models each for serving and eval.
        model_uri = os.path.join(trainer_output_base_dir, trainer_outputs[0])
        self.assertEqual(
            1, len(tf.io.gfile.listdir(path_utils.eval_model_dir(model_uri))))
        self.assertEqual(
            1,
            len(
                tf.io.gfile.listdir(
                    os.path.join(path_utils.serving_model_dir(model_uri),
                                 'export', 'chicago-taxi'))))
예제 #11
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Uses a user-supplied tf.estimator to train a TensorFlow model locally.

    The Trainer Executor invokes a training_fn callback function provided by
    the user via the module_file parameter.  With the tf.estimator returned by
    this function, the Trainer Executor then builds a TensorFlow model using the
    user-provided tf.estimator.

    Args:
      input_dict: Input dict from input key to a list of ML-Metadata Artifacts.
        - examples: Examples used for training, must include 'train' and 'eval'
          splits.
        - transform_output: Optional input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: Exported model.
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.

    Returns:
      None

    Raises:
      ValueError: When neither or both of 'module_file' and 'trainer_fn'
        are present in 'exec_properties'.
    """
    self._log_startup(input_dict, output_dict, exec_properties)

    # TODO(zhitaoli): Deprecate this in a future version.
    if exec_properties.get('custom_config', None):
      cmle_args = exec_properties.get('custom_config',
                                      {}).get('cmle_training_args')
      if cmle_args:
        executor_class_path = '.'.join([Executor.__module__, Executor.__name__])
        absl.logging.warn(
            'Passing \'cmle_training_args\' to trainer directly is deprecated, '
            'please use extension executor at '
            'tfx.extensions.google_cloud_ai_platform.trainer.executor instead')

        return runner.start_cmle_training(input_dict, output_dict,
                                          exec_properties, executor_class_path,
                                          cmle_args)

    trainer_fn = self._GetTrainerFn(exec_properties)

    # Set up training parameters
    train_files = [
        _all_files_pattern(
            artifact_utils.get_split_uri(input_dict['examples'], 'train'))
    ]
    transform_output = artifact_utils.get_single_uri(
        input_dict['transform_output']) if input_dict.get(
            'transform_output', None) else None
    eval_files = [
        _all_files_pattern(
            artifact_utils.get_split_uri(input_dict['examples'], 'eval'))
    ]
    schema_file = io_utils.get_only_uri_in_dir(
        artifact_utils.get_single_uri(input_dict['schema']))

    train_args = trainer_pb2.TrainArgs()
    eval_args = trainer_pb2.EvalArgs()
    json_format.Parse(exec_properties['train_args'], train_args)
    json_format.Parse(exec_properties['eval_args'], eval_args)

    # https://github.com/tensorflow/tfx/issues/45: Replace num_steps=0 with
    # num_steps=None.  Conversion of the proto to python will set the default
    # value of an int as 0 so modify the value here.  Tensorflow will raise an
    # error if num_steps <= 0.
    train_steps = train_args.num_steps or None
    eval_steps = eval_args.num_steps or None

    output_path = artifact_utils.get_single_uri(output_dict['output'])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    # Assemble warm start path if needed.
    warm_start_from = None
    if exec_properties.get('warm_starting') and exec_properties.get(
        'warm_start_from'):
      previous_model_dir = os.path.join(exec_properties['warm_start_from'],
                                        path_utils.SERVING_MODEL_DIR)
      if previous_model_dir and tf.io.gfile.exists(
          os.path.join(previous_model_dir, self._CHECKPOINT_FILE_NAME)):
        warm_start_from = previous_model_dir

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    hparams = _HParamWrapper(
        # A list of uris for train files.
        train_files=train_files,
        # An optional single uri for transform graph produced by TFT. Will be
        # None if not specified.
        transform_output=transform_output,
        # A single uri for the output directory of the serving model.
        serving_model_dir=serving_model_dir,
        # A list of uris for eval files.
        eval_files=eval_files,
        # A single uri for schema file.
        schema_file=schema_file,
        # Number of train steps.
        train_steps=train_steps,
        # Number of eval steps.
        eval_steps=eval_steps,
        # A single uri for the model directory to warm start from.
        warm_start_from=warm_start_from)

    schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())

    training_spec = trainer_fn(hparams, schema)

    # Train the model
    absl.logging.info('Training model.')
    tf.estimator.train_and_evaluate(training_spec['estimator'],
                                    training_spec['train_spec'],
                                    training_spec['eval_spec'])
    absl.logging.info('Training complete.  Model written to %s',
                      serving_model_dir)

    # Export an eval savedmodel for TFMA
    absl.logging.info('Exporting eval_savedmodel for TFMA.')
    tfma.export.export_eval_savedmodel(
        estimator=training_spec['estimator'],
        export_dir_base=eval_model_dir,
        eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

    absl.logging.info('Exported eval_savedmodel to %s.', eval_model_dir)
예제 #12
0
  def _GetFnArgs(self, input_dict: Dict[Text, List[types.Artifact]],
                 output_dict: Dict[Text, List[types.Artifact]],
                 exec_properties: Dict[Text, Any]) -> TrainerFnArgs:
    custom_config = exec_properties.get('custom_config') or {}
    if not isinstance(custom_config, dict):
      raise ValueError('Expect custom_config to be a dict but got %s instead' %
                       type(custom_config))

    # Set up training parameters
    train_files = [
        _all_files_pattern(
            artifact_utils.get_split_uri(input_dict[EXAMPLES_KEY], 'train'))
    ]
    transform_output = artifact_utils.get_single_uri(
        input_dict[TRANSFORM_GRAPH_KEY]) if input_dict.get(
            TRANSFORM_GRAPH_KEY, None) else None
    eval_files = [
        _all_files_pattern(
            artifact_utils.get_split_uri(input_dict[EXAMPLES_KEY], 'eval'))
    ]
    schema_file = io_utils.get_only_uri_in_dir(
        artifact_utils.get_single_uri(input_dict[SCHEMA_KEY]))
    # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
    base_model = path_utils.serving_model_path(
        artifact_utils.get_single_uri(input_dict[BASE_MODEL_KEY])
    ) if input_dict.get(BASE_MODEL_KEY) else None
    if input_dict.get(HYPERPARAMETERS_KEY):
      hyperparameters_file = io_utils.get_only_uri_in_dir(
          artifact_utils.get_single_uri(input_dict[HYPERPARAMETERS_KEY]))
      hyperparameters_config = json.loads(
          file_io.read_file_to_string(hyperparameters_file))
    else:
      hyperparameters_config = None

    train_args = trainer_pb2.TrainArgs()
    eval_args = trainer_pb2.EvalArgs()
    json_format.Parse(exec_properties['train_args'], train_args)
    json_format.Parse(exec_properties['eval_args'], eval_args)

    # https://github.com/tensorflow/tfx/issues/45: Replace num_steps=0 with
    # num_steps=None.  Conversion of the proto to python will set the default
    # value of an int as 0 so modify the value here.  Tensorflow will raise an
    # error if num_steps <= 0.
    train_steps = train_args.num_steps or None
    eval_steps = eval_args.num_steps or None

    output_path = artifact_utils.get_single_uri(output_dict[OUTPUT_MODEL_KEY])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    return TrainerFnArgs(
        # A list of uris for train files.
        train_files=train_files,
        # An optional single uri for transform graph produced by TFT. Will be
        # None if not specified.
        transform_output=transform_output,
        # A single uri for the output directory of the serving model.
        serving_model_dir=serving_model_dir,
        # A single uri for the output directory of the eval model.
        # Note that this is estimator only, Keras doesn't require it for TFMA.
        eval_model_dir=eval_model_dir,
        # A list of uris for eval files.
        eval_files=eval_files,
        # A single uri for schema file.
        schema_file=schema_file,
        # Number of train steps.
        train_steps=train_steps,
        # Number of eval steps.
        eval_steps=eval_steps,
        # Base model that will be used for this training job.
        base_model=base_model,
        # An optional kerastuner.HyperParameters config.
        hyperparameters=hyperparameters_config,
        # Additional parameters to pass to trainer function.
        **custom_config)
예제 #13
0
def trainer_fn(trainer_fn_args, schema):
    """Build the estimator using the high level API.

  Args:
    trainer_fn_args: Holds args used to train the model as name/value pairs.
    schema: Holds the schema of the training examples.

  Returns:
    A dict of the following:
      - estimator: The estimator that will be used for training and eval.
      - train_spec: Spec for training.
      - eval_spec: Spec for eval.
      - eval_input_receiver_fn: Input function for eval.
  """
    if trainer_fn_args.hyperparameters:
        hp = trainer_fn_args.hyperparameters
        first_dnn_layer_size = hp.get('first_dnn_layer_size')
        num_dnn_layers = hp.get('num_dnn_layers')
        dnn_decay_factor = hp.get('dnn_decay_factor')
    else:
        # Number of nodes in the first layer of the DNN
        first_dnn_layer_size = 100
        num_dnn_layers = 4
        dnn_decay_factor = 0.7

    train_batch_size = 40
    eval_batch_size = 40

    # TODO(b/162532757): use _tfxio_input_fn exclusively once tfx-bsl post-0.22 is
    # released.
    use_tfxio_input_fn = trainer_fn_args.get('use_tfxio_input_fn', False)
    input_fn = _tfxio_input_fn if use_tfxio_input_fn else _input_fn
    tf_transform_output = tft.TFTransformOutput(
        trainer_fn_args.transform_output)

    train_input_fn = lambda: input_fn(  # pylint: disable=g-long-lambda
        trainer_fn_args.train_files,
        trainer_fn_args.data_accessor,
        tf_transform_output,
        batch_size=train_batch_size)

    eval_input_fn = lambda: input_fn(  # pylint: disable=g-long-lambda
        trainer_fn_args.eval_files,
        trainer_fn_args.data_accessor,
        tf_transform_output,
        batch_size=eval_batch_size)

    train_spec = tf.estimator.TrainSpec(  # pylint: disable=g-long-lambda
        train_input_fn,
        max_steps=trainer_fn_args.train_steps)

    serving_receiver_fn = lambda: _example_serving_receiver_fn(  # pylint: disable=g-long-lambda
        tf_transform_output, schema)

    exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn)
    eval_spec = tf.estimator.EvalSpec(eval_input_fn,
                                      steps=trainer_fn_args.eval_steps,
                                      exporters=[exporter],
                                      name='chicago-taxi-eval')

    run_config = tf.estimator.RunConfig(
        save_checkpoints_steps=999,
        # keep_checkpoint_max must be more than the number of worker replicas
        # nodes if training distributed, in order to avoid race condition.
        keep_checkpoint_max=5)

    export_dir = path_utils.serving_model_dir(trainer_fn_args.model_run_dir)
    run_config = run_config.replace(model_dir=export_dir)
    warm_start_from = trainer_fn_args.base_model

    estimator = _build_estimator(
        # Construct layers sizes with exponetial decay
        hidden_units=[
            max(2, int(first_dnn_layer_size * dnn_decay_factor**i))
            for i in range(num_dnn_layers)
        ],
        config=run_config,
        warm_start_from=warm_start_from)

    # Create an input receiver for TFMA processing
    receiver_fn = lambda: _eval_input_receiver_fn(  # pylint: disable=g-long-lambda
        tf_transform_output, schema)

    return {
        'estimator': estimator,
        'train_spec': train_spec,
        'eval_spec': eval_spec,
        'eval_input_receiver_fn': receiver_fn
    }
예제 #14
0
  def testAIPlatformTrainerPipeline(self):
    """Trainer-only test pipeline on AI Platform Training."""
    pipeline_name = 'kubeflow-aip-trainer-test-{}'.format(self._random_id())
    pipeline = self._create_pipeline(
        pipeline_name,
        [
            self.schema_importer,
            self.transformed_examples_importer,
            self.transform_graph_importer,
            Trainer(
                custom_executor_spec=executor_spec.ExecutorClassSpec(
                    ai_platform_trainer_executor.Executor),
                module_file=self._trainer_module,
                transformed_examples=self.transformed_examples_importer
                .outputs['result'],
                schema=self.schema_importer.outputs['result'],
                transform_graph=self.transform_graph_importer.outputs['result'],
                train_args=trainer_pb2.TrainArgs(num_steps=10),
                eval_args=trainer_pb2.EvalArgs(num_steps=5),
                custom_config={
                    # Test that distributed training is behaves properly.
                    ai_platform_trainer_executor.TRAINING_ARGS_KEY: {
                        'project':
                            self._gcp_project_id,
                        'region':
                            self._gcp_region,
                        'jobDir':
                            os.path.join(
                                self._pipeline_root(pipeline_name), 'tmp'),
                        'masterConfig': {
                            'imageUri': self._container_image,
                        },
                        'scaleTier':
                            'CUSTOM',
                        'masterType':
                            'large_model',
                        'parameterServerType':
                            'standard',
                        'parameterServerCount':
                            1,
                        'workerType':
                            'standard',
                        'workerCount':
                            2,
                    }
                })
        ])
    self._compile_and_run_pipeline(pipeline)

    # There must be only one execution of Trainer.
    trainer_output_base_dir = os.path.join(
        self._pipeline_root(pipeline_name), 'Trainer', 'model')
    trainer_outputs = tf.io.gfile.listdir(trainer_output_base_dir)
    self.assertEqual(1, len(trainer_outputs))

    # There must be only one saved models each for serving and eval.
    model_uri = os.path.join(trainer_output_base_dir, trainer_outputs[0])
    self.assertEqual(
        1, len(tf.io.gfile.listdir(path_utils.eval_model_dir(model_uri))))
    self.assertEqual(
        1,
        len(
            tf.io.gfile.listdir(
                os.path.join(
                    path_utils.serving_model_dir(model_uri), 'export',
                    'chicago-taxi'))))
예제 #15
0
  def _GetFnArgs(self, input_dict: Dict[Text, List[types.Artifact]],
                 output_dict: Dict[Text, List[types.Artifact]],
                 exec_properties: Dict[Text, Any]) -> TrainerFnArgs:
    fn_args = fn_args_utils.get_common_fn_args(input_dict, exec_properties)

    # Load and deserialize custom config from execution properties.
    # Note that in the component interface the default serialization of custom
    # config is 'null' instead of '{}'. Therefore we need to default the
    # json_utils.loads to 'null' then populate it with an empty dict when
    # needed.
    custom_config = json_utils.loads(
        exec_properties.get(constants.CUSTOM_CONFIG_KEY, 'null')) or {}
    if not isinstance(custom_config, Dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict. Got %s instead.' % type(custom_config))

    # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
    if input_dict.get(constants.BASE_MODEL_KEY):
      base_model = path_utils.serving_model_path(
          artifact_utils.get_single_uri(input_dict[constants.BASE_MODEL_KEY]))
    else:
      base_model = None

    if input_dict.get(constants.HYPERPARAMETERS_KEY):
      hyperparameters_file = io_utils.get_only_uri_in_dir(
          artifact_utils.get_single_uri(
              input_dict[constants.HYPERPARAMETERS_KEY]))
      hyperparameters_config = json.loads(
          file_io.read_file_to_string(hyperparameters_file))
    else:
      hyperparameters_config = None

    output_path = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_KEY])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    model_run_dir = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_RUN_KEY])

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    return TrainerFnArgs(
        # A list of uris for train files.
        train_files=fn_args.train_files,
        # An optional single uri for transform graph produced by TFT. Will be
        # None if not specified.
        transform_output=fn_args.transform_graph_path,
        # A single uri for the output directory of the serving model.
        serving_model_dir=serving_model_dir,
        # A single uri for the output directory of the eval model.
        # Note that this is estimator only, Keras doesn't require it for TFMA.
        eval_model_dir=eval_model_dir,
        # A list of uris for eval files.
        eval_files=fn_args.eval_files,
        # A single uri for the output directory of model training related files.
        model_run_dir=model_run_dir,
        # A single uri for schema file.
        schema_file=fn_args.schema_path,
        # Number of train steps.
        train_steps=fn_args.train_steps,
        # Number of eval steps.
        eval_steps=fn_args.eval_steps,
        # Base model that will be used for this training job.
        base_model=base_model,
        # An optional kerastuner.HyperParameters config.
        hyperparameters=hyperparameters_config,
        # Additional parameters to pass to trainer function.
        **custom_config)
예제 #16
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Uses a user-supplied tf.estimator to train a TensorFlow model locally.

    The Trainer Executor invokes a training_fn callback function provided by
    the user via the module_file parameter.  With the tf.estimator returned by
    this function, the Trainer Executor then builds a TensorFlow model using the
    user-provided tf.estimator.

    Args:
      input_dict: Input dict from input key to a list of ML-Metadata Artifacts.
        - examples: Examples used for training, must include 'train' and 'eval'
          splits.
        - transform_output: Optional input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - model: Exported model.
        - model_run: Model training related outputs (e.g., Tensorboard logs)
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.
        - custom_config: Optional. JSON-serialized dict of additional parameters
          to pass to trainer function.

    Returns:
      None

    Raises:
      ValueError: When neither or both of 'module_file' and 'trainer_fn'
        are present in 'exec_properties'.
    """
    self._log_startup(input_dict, output_dict, exec_properties)

    fn_args = self._GetFnArgs(input_dict, output_dict, exec_properties)
    trainer_fn = udf_utils.get_fn(exec_properties, 'trainer_fn')

    schema = io_utils.parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema())

    # TODO(b/160795287): Deprecate estimator based executor.
    # Provide user with a modified fn_args, with model_run given as
    # the working directory. Executor will then copy user models to
    # model artifact directory.
    serving_dest = fn_args.serving_model_dir
    eval_dest = fn_args.eval_model_dir

    working_dir = fn_args.model_run_dir
    fn_args.serving_model_dir = path_utils.serving_model_dir(working_dir)
    fn_args.eval_model_dir = path_utils.eval_model_dir(working_dir)

    training_spec = trainer_fn(fn_args, schema)

    # Train the model
    absl.logging.info('Training model.')
    tf.estimator.train_and_evaluate(training_spec['estimator'],
                                    training_spec['train_spec'],
                                    training_spec['eval_spec'])

    absl.logging.info(
        'Training complete. Model written to %s. ModelRun written to %s',
        fn_args.serving_model_dir, fn_args.model_run_dir)

    # Export an eval savedmodel for TFMA. If distributed training, it must only
    # be written by the chief worker, as would be done for serving savedmodel.
    if _is_chief():
      absl.logging.info('Exporting eval_savedmodel for TFMA.')
      tfma.export.export_eval_savedmodel(
          estimator=training_spec['estimator'],
          export_dir_base=fn_args.eval_model_dir,
          eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

      absl.logging.info('Exported eval_savedmodel to %s.',
                        fn_args.eval_model_dir)

      # TODO(b/160795287): Deprecate estimator based executor.
      # Copy serving and eval model from model_run to model artifact directory.
      serving_source = path_utils.serving_model_path(fn_args.model_run_dir)
      io_utils.copy_dir(serving_source, serving_dest)
      absl.logging.info('Serving model copied to: %s.', serving_dest)

      eval_source = path_utils.eval_model_path(fn_args.model_run_dir)
      io_utils.copy_dir(eval_source, eval_dest)
      absl.logging.info('Eval model copied to: %s.', eval_dest)

    else:
      absl.logging.info(
          'Model export is skipped because this is not the chief worker.')
예제 #17
0
파일: executor.py 프로젝트: zw39125432/tfx
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Uses a user-supplied tf.estimator to train a TensorFlow model locally.

    The Trainer Executor invokes a training_fn callback function provided by
    the user via the module_file parameter.  With the tf.estimator returned by
    this function, the Trainer Executor then builds a TensorFlow model using the
    user-provided tf.estimator.

    Args:
      input_dict: Input dict from input key to a list of ML-Metadata Artifacts.
        - examples: Examples used for training, must include 'train' and 'eval'
          splits.
        - transform_output: Optional input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: Exported model.
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.

    Returns:
      None

    Raises:
      ValueError: When neither or both of 'module_file' and 'trainer_fn'
        are present in 'exec_properties'.
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        custom_config = exec_properties.get('custom_config') or {}
        if not isinstance(custom_config, dict):
            raise ValueError(
                'Expect custom_config to be a dict but got %s instead' %
                type(custom_config))

        trainer_fn = self._GetTrainerFn(exec_properties)

        # Set up training parameters
        train_files = [
            _all_files_pattern(
                artifact_utils.get_split_uri(input_dict['examples'], 'train'))
        ]
        transform_output = artifact_utils.get_single_uri(
            input_dict['transform_output']) if input_dict.get(
                'transform_output', None) else None
        eval_files = [
            _all_files_pattern(
                artifact_utils.get_split_uri(input_dict['examples'], 'eval'))
        ]
        schema_file = io_utils.get_only_uri_in_dir(
            artifact_utils.get_single_uri(input_dict['schema']))
        # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
        base_model = path_utils.serving_model_path(
            artifact_utils.get_single_uri(input_dict['base_model'])
        ) if input_dict.get('base_model') else None
        if input_dict.get('hyperparameters'):
            hyperparameters_file = io_utils.get_only_uri_in_dir(
                artifact_utils.get_single_uri(input_dict['hyperparameters']))
            hyperparameters_config = json.loads(
                file_io.read_file_to_string(hyperparameters_file))
        else:
            hyperparameters_config = None

        train_args = trainer_pb2.TrainArgs()
        eval_args = trainer_pb2.EvalArgs()
        json_format.Parse(exec_properties['train_args'], train_args)
        json_format.Parse(exec_properties['eval_args'], eval_args)

        # https://github.com/tensorflow/tfx/issues/45: Replace num_steps=0 with
        # num_steps=None.  Conversion of the proto to python will set the default
        # value of an int as 0 so modify the value here.  Tensorflow will raise an
        # error if num_steps <= 0.
        train_steps = train_args.num_steps or None
        eval_steps = eval_args.num_steps or None

        output_path = artifact_utils.get_single_uri(output_dict['output'])
        serving_model_dir = path_utils.serving_model_dir(output_path)
        eval_model_dir = path_utils.eval_model_dir(output_path)

        # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
        train_fn_args = TrainerFnArgs(
            # A list of uris for train files.
            train_files=train_files,
            # An optional single uri for transform graph produced by TFT. Will be
            # None if not specified.
            transform_output=transform_output,
            # A single uri for the output directory of the serving model.
            serving_model_dir=serving_model_dir,
            # A list of uris for eval files.
            eval_files=eval_files,
            # A single uri for schema file.
            schema_file=schema_file,
            # Number of train steps.
            train_steps=train_steps,
            # Number of eval steps.
            eval_steps=eval_steps,
            # Base model that will be used for this training job.
            base_model=base_model,
            # An optional kerastuner.HyperParameters config.
            hyperparameters=hyperparameters_config,
            # Additional parameters to pass to trainer function.
            **custom_config)

        schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())

        training_spec = trainer_fn(train_fn_args, schema)

        # Train the model
        absl.logging.info('Training model.')
        tf.estimator.train_and_evaluate(training_spec['estimator'],
                                        training_spec['train_spec'],
                                        training_spec['eval_spec'])
        absl.logging.info('Training complete.  Model written to %s',
                          serving_model_dir)

        # Export an eval savedmodel for TFMA
        absl.logging.info('Exporting eval_savedmodel for TFMA.')
        tfma.export.export_eval_savedmodel(
            estimator=training_spec['estimator'],
            export_dir_base=eval_model_dir,
            eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

        absl.logging.info('Exported eval_savedmodel to %s.', eval_model_dir)
예제 #18
0
    def Do(self, input_dict: Dict[str, List[Artifact]],
           output_dict: Dict[str, List[Artifact]],
           exec_properties: Dict[str, Any]) -> None:
        """Recommends a tuner config.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - meta_train_features_N: MetaFeatures for Nth train dataset.
        - hparams_train_N: HParms for Nth train dataset. The maximum value `N`
          being _MAX_INPUTS.
      output_dict: Output dict from key to a list of artifacts.
      exec_properties: A dict of execution properties.

    Raises:
    """

        algorithm = exec_properties['algorithm']
        metafeatures_list = []
        # This should be agnostic to meta-feature type.
        for ix in range(MAX_INPUTS):
            metafeature_key = f'meta_train_features_{ix}'
            if metafeature_key in input_dict:
                metafeature_uri = os.path.join(
                    artifact_utils.get_single_uri(input_dict[metafeature_key]),
                    artifacts.MetaFeatures.DEFAULT_FILE_NAME)
                logging.info('Found %s at %s.', metafeature_key,
                             metafeature_uri)
                metafeatures = json.loads(
                    io_utils.read_string_file(metafeature_uri))
                metafeatures_list.append(metafeatures['metafeature'])

        all_hparams = []
        for ix in range(MAX_INPUTS):
            hparam_key = f'hparams_train_{ix}'
            if hparam_key in input_dict:
                hyperparameters_file = io_utils.get_only_uri_in_dir(
                    artifact_utils.get_single_uri(input_dict[hparam_key]))
                logging.info('Found %s at %s.', hparam_key,
                             hyperparameters_file)
                hparams_json = json.loads(
                    io_utils.read_string_file(hyperparameters_file))
                all_hparams.append(hparams_json['values'])

        if algorithm == MAJORITY_VOTING:
            discrete_search_space = self._create_search_space_using_voting(
                all_hparams)
            hparams_config_list = [discrete_search_space.get_config()]
        elif algorithm == NEAREST_NEIGHBOR:
            # Build nearest_neighbor model
            output_path = artifact_utils.get_single_uri(
                output_dict[OUTPUT_MODEL])
            serving_model_dir = path_utils.serving_model_dir(output_path)
            model = self._create_knn_model_from_metafeatures(metafeatures_list)
            # TODO(nikhilmehta): Consider adding signature here.
            model.save(serving_model_dir)

            # Collect all Candidate HParams
            hparams_list = self._convert_to_kerastuner_hyperparameters(
                all_hparams)
            hparams_config_list = [
                hparam.get_config() for hparam in hparams_list
            ]
        else:
            raise NotImplementedError(
                f'The algorithm "{algorithm}" is not supported.')

        meta_hparams_path = os.path.join(
            artifact_utils.get_single_uri(output_dict[OUTPUT_HYPERPARAMS]),
            _DEFAULT_FILE_NAME)
        io_utils.write_string_file(meta_hparams_path,
                                   json.dumps(hparams_config_list))
        logging.info('Meta HParams saved at %s', meta_hparams_path)
예제 #19
0
파일: executor.py 프로젝트: rohithreddy/tfx
    def Do(self, input_dict, output_dict, exec_properties):
        """Runs trainer job the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - transformed_examples: Transformed example.
        - transform_output: Input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: Exported model.
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        # TODO(khaas): Move this to tfx/extensions.
        if exec_properties.get('custom_config', None):
            cmle_args = exec_properties.get('custom_config',
                                            {}).get('cmle_training_args')
            if cmle_args:
                return cmle_runner.start_cmle_training(input_dict, output_dict,
                                                       exec_properties,
                                                       cmle_args)

        trainer_fn = io_utils.import_func(exec_properties['module_file'],
                                          'trainer_fn')

        # Set up training parameters
        train_files = [
            _all_files_pattern(
                types.get_split_uri(input_dict['transformed_examples'],
                                    'train'))
        ]
        transform_output = types.get_single_uri(input_dict['transform_output'])
        eval_files = _all_files_pattern(
            types.get_split_uri(input_dict['transformed_examples'], 'eval'))
        schema_file = io_utils.get_only_uri_in_dir(
            types.get_single_uri(input_dict['schema']))

        train_args = trainer_pb2.TrainArgs()
        eval_args = trainer_pb2.EvalArgs()
        json_format.Parse(exec_properties['train_args'], train_args)
        json_format.Parse(exec_properties['eval_args'], eval_args)

        # https://github.com/tensorflow/tfx/issues/45: Replace num_steps=0 with
        # num_steps=None.  Conversion of the proto to python will set the default
        # value of an int as 0 so modify the value here.  Tensorflow will raise an
        # error if num_steps <= 0.
        train_steps = train_args.num_steps or None
        eval_steps = eval_args.num_steps or None

        output_path = types.get_single_uri(output_dict['output'])
        serving_model_dir = path_utils.serving_model_dir(output_path)
        eval_model_dir = path_utils.eval_model_dir(output_path)

        # Assemble warm start path if needed.
        warm_start_from = None
        if exec_properties.get('warm_starting') and exec_properties.get(
                'warm_start_from'):
            previous_model_dir = os.path.join(
                exec_properties['warm_start_from'],
                path_utils.SERVING_MODEL_DIR)
            if previous_model_dir and tf.gfile.Exists(
                    os.path.join(previous_model_dir,
                                 self._CHECKPOINT_FILE_NAME)):
                warm_start_from = previous_model_dir

        # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
        hparams = tf.contrib.training.HParams(
            train_files=train_files,
            transform_output=transform_output,
            output_dir=output_path,
            serving_model_dir=serving_model_dir,
            eval_files=eval_files,
            schema_file=schema_file,
            train_steps=train_steps,
            eval_steps=eval_steps,
            warm_start_from=warm_start_from)

        schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())

        training_spec = trainer_fn(hparams, schema)

        # Train the model
        tf.logging.info('Training model.')
        tf.estimator.train_and_evaluate(training_spec['estimator'],
                                        training_spec['train_spec'],
                                        training_spec['eval_spec'])
        tf.logging.info('Training complete.  Model written to %s',
                        serving_model_dir)

        # Export an eval savedmodel for TFMA
        tf.logging.info('Exporting eval_savedmodel for TFMA.')
        tfma.export.export_eval_savedmodel(
            estimator=training_spec['estimator'],
            export_dir_base=eval_model_dir,
            eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

        tf.logging.info('Exported eval_savedmodel to %s.', eval_model_dir)