Esempio n. 1
0
  def testGetFnFailure(self):
    exec_properties = {
        'module_file': 'path/to/module_file.py',
        'test_fn': 'path.to.test_fn',
    }

    with self.assertRaises(ValueError):
      udf_utils.get_fn(exec_properties, 'test_fn')
Esempio n. 2
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        if exec_properties.get(_TUNE_ARGS_KEY):
            raise ValueError(
                "TuneArgs is not supported for default Tuner's Executor.")

        tuner_fn = udf_utils.get_fn(exec_properties, 'tuner_fn')
        fn_args = fn_args_utils.get_common_fn_args(input_dict, exec_properties,
                                                   self._get_tmp_dir())

        tuner_fn_result = tuner_fn(fn_args)
        tuner = tuner_fn_result.tuner
        fit_kwargs = tuner_fn_result.fit_kwargs

        # TODO(b/156966497): set logger for printing.
        tuner.search_space_summary()
        absl.logging.info('Start tuning...')
        tuner.search(**fit_kwargs)
        tuner.results_summary()
        best_hparams_config = tuner.get_best_hyperparameters()[0].get_config()
        absl.logging.info('Best hyperParameters: %s' % best_hparams_config)
        best_hparams_path = os.path.join(
            artifact_utils.get_single_uri(
                output_dict[_BEST_HYPERPARAMETERS_KEY]), _DEFAULT_FILE_NAME)
        io_utils.write_string_file(best_hparams_path,
                                   json.dumps(best_hparams_config))
        absl.logging.info('Best Hyperparameters are written to %s.' %
                          best_hparams_path)
Esempio n. 3
0
    def Do(self, input_dict: Dict[str, List[types.Artifact]],
           output_dict: Dict[str, List[types.Artifact]],
           exec_properties: Dict[str, Any]) -> None:

        if tfx_tuner.get_tune_args(exec_properties):
            raise ValueError(
                "TuneArgs is not supported by this Tuner's Executor.")

        metalearning_algorithm = None
        if 'metalearning_algorithm' in exec_properties:
            metalearning_algorithm = exec_properties.get(
                'metalearning_algorithm')

        warmup_trials = 0
        warmup_trial_data = None
        if metalearning_algorithm:
            warmup_tuner, warmup_trials = self.warmup(input_dict,
                                                      exec_properties,
                                                      metalearning_algorithm)
            warmup_trial_data = extract_tuner_trial_progress(warmup_tuner)
        else:
            logging.info('MetaLearning Algorithm not provided.')

        # Create new fn_args for final tuning stage.
        fn_args = fn_args_utils.get_common_fn_args(
            input_dict, exec_properties, working_dir=self._get_tmp_dir())
        tuner_fn = udf_utils.get_fn(exec_properties, 'tuner_fn')
        tuner_fn_result = tuner_fn(fn_args)
        tuner_fn_result.tuner.oracle.max_trials = max(
            (tuner_fn_result.tuner.oracle.max_trials - warmup_trials), 1)
        tuner = self.search(tuner_fn_result)
        tuner_trial_data = extract_tuner_trial_progress(tuner)

        if warmup_trial_data:
            cumulative_tuner_trial_data, best_tuner_ix = merge_trial_data(
                warmup_trial_data, tuner_trial_data)
            cumulative_tuner_trial_data[
                'warmup_trial_data'] = warmup_trial_data[BEST_CUMULATIVE_SCORE]
            cumulative_tuner_trial_data['tuner_trial_data'] = tuner_trial_data[
                BEST_CUMULATIVE_SCORE]

            if isinstance(tuner.oracle.objective, kerastuner.Objective):
                cumulative_tuner_trial_data[
                    'objective'] = tuner.oracle.objective.name
            else:
                cumulative_tuner_trial_data[
                    'objective'] = 'objective not understood'

            tuner_trial_data = cumulative_tuner_trial_data
            best_tuner = warmup_tuner if best_tuner_ix == 0 else tuner
        else:
            best_tuner = tuner
        tfx_tuner.write_best_hyperparameters(best_tuner, output_dict)
        tuner_plot_path = os.path.join(
            artifact_utils.get_single_uri(output_dict['trial_summary_plot']),
            'tuner_plot_data.txt')
        io_utils.write_string_file(tuner_plot_path,
                                   json.dumps(tuner_trial_data))
        logging.info('Tuner plot data written at: %s', tuner_plot_path)
Esempio n. 4
0
 def Do(self, input_dict: Dict[Text, List[types.Artifact]],
        output_dict: Dict[Text, List[types.Artifact]],
        exec_properties: Dict[Text, Any]) -> None:
     self._log_startup(input_dict, output_dict, exec_properties)
     create_decoder_func = udf_utils.get_fn(exec_properties,
                                            _CREATE_DECODER_FUNC_KEY)
     tf_graph_record_decoder.save_decoder(
         create_decoder_func(),
         value_utils.GetSoleValue(output_dict, _DATA_VIEW_KEY).uri)
Esempio n. 5
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Uses a user-supplied run_fn to train a TensorFlow model locally.

    The Trainer Executor invokes a run_fn callback function provided by
    the user via the module_file parameter. In this function, user defines the
    model and trains it, then saves the model and training related files
    (e.g, Tensorboard logs) to the provided locations.

    Args:
      input_dict: Input dict from input key to a list of ML-Metadata Artifacts.
        - examples: Examples used for training, must include 'train' and 'eval'
          if custom splits is not specified in train_args and eval_args.
        - transform_graph: Optional input transform graph.
        - transform_output: Optional input transform graph, deprecated.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - model: Exported model.
        - model_run: Model training related outputs (e.g., Tensorboard logs)
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.
        - custom_config: Optional. JSON-serialized dict of additional parameters
          to pass to trainer function.

    Returns:
      None

    Raises:
      ValueError: When neither or both of 'module_file' and 'run_fn'
        are present in 'exec_properties'.
      RuntimeError: If run_fn failed to generate model in desired location.
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        fn_args = self._GetFnArgs(input_dict, output_dict, exec_properties)
        run_fn = udf_utils.get_fn(exec_properties, 'run_fn')

        # Train the model
        absl.logging.info('Training model.')
        run_fn(fn_args)

        # Note: If trained with multi-node distribution workers, it is the user
        # module's responsibility to export the model only once.
        if not fileio.exists(fn_args.serving_model_dir):
            raise RuntimeError('run_fn failed to generate model.')

        absl.logging.info(
            'Training complete. Model written to %s. ModelRun written to %s',
            fn_args.serving_model_dir, fn_args.model_run_dir)
Esempio n. 6
0
 def Do(self, input_dict: Dict[Text, List[types.Artifact]],
        output_dict: Dict[Text, List[types.Artifact]],
        exec_properties: Dict[Text, Any]) -> None:
     del input_dict
     if _MODULE_FILE_KEY in exec_properties:
         create_decoder_func = import_utils.import_func_from_source(
             exec_properties.get(_MODULE_FILE_KEY),
             exec_properties.get(_CREATE_DECODER_FUNC_KEY))
     else:
         create_decoder_func = udf_utils.get_fn(exec_properties,
                                                _CREATE_DECODER_FUNC_KEY)
     tf_graph_record_decoder.save_decoder(
         create_decoder_func(),
         value_utils.GetSoleValue(output_dict, _DATA_VIEW_KEY).uri)
Esempio n. 7
0
def _run_transform(args, beam_pipeline_args):
    """Construct and run transform executor."""
    absl.logging.set_verbosity(absl.logging.INFO)

    def make_beam_pipeline():
        return beam.Pipeline(beam_pipeline_args)

    preprocessing_fn = udf_utils.get_fn(
        {
            standard_component_specs.PREPROCESSING_FN_KEY:
            args.preprocessing_fn_path
        }, standard_component_specs.PREPROCESSING_FN_KEY)

    inputs = {
        labels.ANALYZE_DATA_PATHS_LABEL:
        args.analyze_examples,
        labels.ANALYZE_PATHS_FILE_FORMATS_LABEL:
        [labels.FORMAT_TFRECORD] * len(args.analyze_examples),
        labels.TRANSFORM_DATA_PATHS_LABEL:
        [args.analyze_examples + args.transform_only_examples],
        labels.TRANSFORM_PATHS_FILE_FORMATS_LABEL: [labels.FORMAT_TFRECORD] *
        (len(args.analyze_examples) + len(args.transform_only_examples)),
        labels.SCHEMA_PATH_LABEL:
        args.input_schema_path,
        labels.PREPROCESSING_FN:
        preprocessing_fn,
        labels.EXAMPLES_DATA_FORMAT_LABEL:
        example_gen_pb2.PayloadFormat.Value(args.example_data_format),
        labels.DISABLE_STATISTICS_LABEL:
        args.disable_statistics,
        labels.MAKE_BEAM_PIPELINE_FN:
        make_beam_pipeline,
    }
    outputs = {
        labels.TRANSFORM_METADATA_OUTPUT_PATH_LABEL:
        args.transform_fn,
        labels.TRANSFORM_MATERIALIZE_OUTPUT_PATHS_LABEL:
        (args.transformed_examples),
        labels.PER_SET_STATS_OUTPUT_PATHS_LABEL: (args.per_set_stats_outputs),
        labels.TEMP_OUTPUT_LABEL:
        args.tmp_location,
    }

    executor.TransformProcessor().Transform(inputs, outputs, args.status_file)
Esempio n. 8
0
 def testGetFnFromModule(self, mock_import_func):
   exec_properties = {'test_fn': 'path.to.test_fn'}
   udf_utils.get_fn(exec_properties, 'test_fn')
   mock_import_func.assert_called_once_with('path.to', 'test_fn')
Esempio n. 9
0
 def testGetFnFromSource(self, mock_import_func):
   exec_properties = {'module_file': 'path/to/module_file.py'}
   udf_utils.get_fn(exec_properties, 'test_fn')
   mock_import_func.assert_called_once_with('path/to/module_file.py',
                                            'test_fn')
Esempio n. 10
0
 def testGetFnFailure(self):
     with self.assertRaises(ValueError):
         udf_utils.get_fn({}, 'test_fn')
Esempio n. 11
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Uses a user-supplied tf.estimator to train a TensorFlow model locally.

    The Trainer Executor invokes a training_fn callback function provided by
    the user via the module_file parameter.  With the tf.estimator returned by
    this function, the Trainer Executor then builds a TensorFlow model using the
    user-provided tf.estimator.

    Args:
      input_dict: Input dict from input key to a list of ML-Metadata Artifacts.
        - examples: Examples used for training, must include 'train' and 'eval'
          splits.
        - transform_output: Optional input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - model: Exported model.
        - model_run: Model training related outputs (e.g., Tensorboard logs)
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.
        - custom_config: Optional. JSON-serialized dict of additional parameters
          to pass to trainer function.

    Returns:
      None

    Raises:
      ValueError: When neither or both of 'module_file' and 'trainer_fn'
        are present in 'exec_properties'.
    """
    self._log_startup(input_dict, output_dict, exec_properties)

    fn_args = self._GetFnArgs(input_dict, output_dict, exec_properties)
    trainer_fn = udf_utils.get_fn(exec_properties, 'trainer_fn')

    schema = io_utils.parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema())

    # TODO(b/160795287): Deprecate estimator based executor.
    # Provide user with a modified fn_args, with model_run given as
    # the working directory. Executor will then copy user models to
    # model artifact directory.
    serving_dest = fn_args.serving_model_dir
    eval_dest = fn_args.eval_model_dir

    working_dir = fn_args.model_run_dir
    fn_args.serving_model_dir = path_utils.serving_model_dir(working_dir)
    fn_args.eval_model_dir = path_utils.eval_model_dir(working_dir)

    training_spec = trainer_fn(fn_args, schema)

    # Train the model
    absl.logging.info('Training model.')
    tf.estimator.train_and_evaluate(training_spec['estimator'],
                                    training_spec['train_spec'],
                                    training_spec['eval_spec'])

    absl.logging.info(
        'Training complete. Model written to %s. ModelRun written to %s',
        fn_args.serving_model_dir, fn_args.model_run_dir)

    # Export an eval savedmodel for TFMA. If distributed training, it must only
    # be written by the chief worker, as would be done for serving savedmodel.
    if _is_chief():
      absl.logging.info('Exporting eval_savedmodel for TFMA.')
      tfma.export.export_eval_savedmodel(
          estimator=training_spec['estimator'],
          export_dir_base=fn_args.eval_model_dir,
          eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

      absl.logging.info('Exported eval_savedmodel to %s.',
                        fn_args.eval_model_dir)

      # TODO(b/160795287): Deprecate estimator based executor.
      # Copy serving and eval model from model_run to model artifact directory.
      serving_source = path_utils.serving_model_path(fn_args.model_run_dir)
      io_utils.copy_dir(serving_source, serving_dest)
      absl.logging.info('Serving model copied to: %s.', serving_dest)

      eval_source = path_utils.eval_model_path(fn_args.model_run_dir)
      io_utils.copy_dir(eval_source, eval_dest)
      absl.logging.info('Eval model copied to: %s.', eval_dest)

    else:
      absl.logging.info(
          'Model export is skipped because this is not the chief worker.')
Esempio n. 12
0
    def warmup(self, input_dict: Dict[str, List[types.Artifact]],
               exec_properties: Dict[str,
                                     List[types.Artifact]], algorithm: str):

        # Perform warmup tuning if WARMUP_HYPERPARAMETERS given.
        hparams_warmup_config_list = None
        if input_dict.get(WARMUP_HYPERPARAMETERS):
            hyperparameters_file = io_utils.get_only_uri_in_dir(
                artifact_utils.get_single_uri(
                    input_dict[WARMUP_HYPERPARAMETERS]))
            hparams_warmup_config_list = json.loads(
                io_utils.read_string_file(hyperparameters_file))

        fn_args = fn_args_utils.get_common_fn_args(
            input_dict,
            exec_properties,
            working_dir=self._get_tmp_dir() + 'warmup')

        # TODO(nikhilmehta): Currently all algorithms need warmup_hyperparameters.
        # This may not be needed for other algorithms that can predict hyperparams.
        if not hparams_warmup_config_list:
            raise ValueError('Expected warmup_hyperparameters')

        logging.info('Algorithm: %s', algorithm)
        warmup_trials = 0
        if algorithm == 'majority_voting':
            warmup_trials = DEFAULT_WARMUP_TRIALS
            fn_args.custom_config[
                WARMUP_HYPERPARAMETERS] = hparams_warmup_config_list[0]
        elif algorithm == 'nearest_neighbor':
            warmup_trials = DEFAULT_WARMUP_TRIALS

            if input_dict.get('metamodel'):
                metamodel_path = io_utils.get_only_uri_in_dir(
                    artifact_utils.get_single_uri(input_dict['metamodel']))
                logging.info('Meta model path: %s', metamodel_path)
                metamodel = _load_keras_model(metamodel_path)
            else:
                raise ValueError(
                    f'Tuner for metalearning_algorithm={algorithm} expects metamodel.'
                )

            if input_dict.get('metafeature'):
                metafeature_path = io_utils.get_only_uri_in_dir(
                    artifact_utils.get_single_uri(input_dict['metafeature']))
                logging.info('Metafeature: %s', metafeature_path)
                metafeature = json.loads(
                    io_utils.read_string_file(metafeature_path))
                metafeature = metafeature['metafeature']
            else:
                raise ValueError(
                    f'Tuner for metalearning_algorithm={algorithm} expects metafeature.'
                )

            metafeature = np.array(metafeature, dtype=np.float32)
            metafeature = np.expand_dims(metafeature, axis=0)
            logits = metamodel(metafeature).numpy()[0]
            nearest_configs = [
                hparams_warmup_config_list[ix]
                for ix in np.argsort(logits)[-DEFAULT_K:]
            ]
            nearest_hparam_config = _merge_hparam_configs(nearest_configs)
            fn_args.custom_config[
                WARMUP_HYPERPARAMETERS] = nearest_hparam_config
        else:
            raise NotImplementedError(
                f'Tuning for metalearning_algorithm={algorithm} is not implemented.'
            )

        # kerastuner doesn't support grid search, setting max_trials large enough.
        # Track issue: https://github.com/keras-team/keras-tuner/issues/340
        fn_args.custom_config['max_trials'] = warmup_trials
        tuner_fn = udf_utils.get_fn(exec_properties, 'tuner_fn')
        warmtuner_fn_result = tuner_fn(fn_args)
        warmup_tuner = self.search(warmtuner_fn_result)

        return warmup_tuner, warmup_trials
Esempio n. 13
0
def _get_tuner_fn(exec_properties: Dict[str, Any]) -> Callable[..., Any]:
    """Returns tuner_fn from execution properties."""
    return udf_utils.get_fn(exec_properties, 'tuner_fn')