Esempio n. 1
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    train_files = fn_args.train_files
    eval_files = fn_args.eval_files

    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    hparams = _get_hyperparameters()

    tuner = kerastuner.Hyperband(hypermodel=_build_keras_model,
                                 hyperparameters=hparams,
                                 objective=kerastuner.Objective(
                                     'binary_accuracy', 'max'),
                                 factor=3,
                                 max_epochs=2,
                                 directory=fn_args.working_dir,
                                 project_name='ftfx:simple_e2e')

    train_dataset = _input_fn(train_files, tf_transform_output)
    eval_dataset = _input_fn(eval_files, tf_transform_output)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps
                         })
Esempio n. 2
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    """Build the tuner using the CloudTuner API.

  Args:
    fn_args: Holds args as name/value pairs. See
      https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs.
      - transform_graph_path: optional transform graph produced by TFT.
      - custom_config: An optional dictionary passed to the component. In this
        example, it contains the dict ai_platform_tuning_args.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.

  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
    transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)

    # CloudTuner is a subclass of kerastuner.Tuner which inherits from
    # BaseTuner.
    tuner = CloudTuner(
        _build_keras_model,
        # The project/region configuations for Cloud Vizier service and its trial
        # executions. Note: this example uses the same configuration as the
        # CAIP Training service for distributed tuning flock management to view
        # all of the pipeline's jobs and resources in the same project. It can
        # also be configured separately.
        project_id=fn_args.custom_config['ai_platform_tuning_args']['project'],
        region=fn_args.custom_config['ai_platform_tuning_args']['region'],
        objective=kerastuner.Objective('val_sparse_categorical_accuracy',
                                       'max'),
        hyperparameters=_get_hyperparameters(),
        max_trials=8,  # Optional.
        directory=fn_args.working_dir)

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              transform_graph,
                              batch_size=_TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             transform_graph,
                             batch_size=_EVAL_BATCH_SIZE)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps
                         })
Esempio n. 3
0
def tuner_fn(fn_args: TrainerFnArgs) -> TunerFnResult:
    """Build the tuner using the KerasTuner API.
  Args:
    fn_args: Holds args as name/value pairs.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.
      - schema_path: optional schema of the input data.
      - transform_graph_path: optional transform graph produced by TFT.
  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
    transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)

    # Construct a build_keras_model_fn that just takes hyperparams from get_hyperparameters as input.
    build_keras_model_fn = functools.partial(
        _build_keras_model, tf_transform_output=transform_graph)

    # BayesianOptimization is a subclass of kerastuner.Tuner which inherits from BaseTuner.
    tuner = kerastuner.BayesianOptimization(
        build_keras_model_fn,
        max_trials=10,
        hyperparameters=_get_hyperparameters(),
        # New entries allowed for n_units hyperparameter construction conditional on n_layers selected.
        #       allow_new_entries=True,
        #       tune_new_entries=True,
        objective=kerastuner.Objective('val_sparse_categorical_accuracy',
                                       'max'),
        directory=fn_args.working_dir,
        project_name='covertype_tuning')

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              transform_graph,
                              batch_size=TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             transform_graph,
                             batch_size=EVAL_BATCH_SIZE)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps
                         })
Esempio n. 4
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    """Build the tuner using the KerasTuner API.

  Args:
    fn_args: Holds args as name/value pairs.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.
      - schema_path: optional schema of the input data.
      - transform_graph_path: optional transform graph produced by TFT.

  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
    # RandomSearch is a subclass of kerastuner.Tuner which inherits from
    # BaseTuner.
    tuner = kerastuner.RandomSearch(_build_keras_model,
                                    max_trials=6,
                                    hyperparameters=_get_hyperparameters(),
                                    allow_new_entries=False,
                                    objective=kerastuner.Objective(
                                        'val_sparse_categorical_accuracy',
                                        'max'),
                                    directory=fn_args.working_dir,
                                    project_name='penguin_tuning')

    transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              transform_graph,
                              batch_size=_TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             transform_graph,
                             batch_size=_EVAL_BATCH_SIZE)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps
                         })
Esempio n. 5
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
  """Build the tuner using the KerasTuner API.

  Args:
    fn_args: Holds args as name/value pairs.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.
      - schema_path: optional schema of the input data.
      - transform_graph_path: optional transform graph produced by TFT.

  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
  hp = kerastuner.HyperParameters()
  # Defines search space.
  hp.Choice('learning_rate', [1e-1, 1e-3])
  hp.Int('num_layers', 1, 5)

  # RandomSearch is a subclass of Keras model Tuner.
  tuner = kerastuner.RandomSearch(
      _build_keras_model,
      max_trials=5,
      hyperparameters=hp,
      allow_new_entries=False,
      objective='val_sparse_categorical_accuracy',
      directory=fn_args.working_dir,
      project_name='test')

  schema = schema_pb2.Schema()
  io_utils.parse_pbtxt_file(fn_args.schema_path, schema)
  train_dataset = _input_fn(fn_args.train_files, schema)
  eval_dataset = _input_fn(fn_args.eval_files, schema)

  return TunerFnResult(
      tuner=tuner,
      fit_kwargs={
          'x': train_dataset,
          'validation_data': eval_dataset,
          'steps_per_epoch': fn_args.train_steps,
          'validation_steps': fn_args.eval_steps
      })
Esempio n. 6
0
def tuner_fn(fn_args: TrainerFnArgs) -> TunerFnResult:
  """Build the tuner using CloudTuner (KerasTuner instance).
  Args:
    fn_args: Holds args used to train and tune the model as name/value pairs. See 
      https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs.
  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
  transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)
  
  # Construct a build_keras_model_fn that just takes hyperparams from get_hyperparameters as input.
  build_keras_model_fn = functools.partial(
      _build_keras_model, tf_transform_output=transform_graph)  

  # CloudTuner is a subclass of kerastuner.Tuner which inherits from BaseTuner.   
  tuner = CloudTuner(
      build_keras_model_fn,
      project_id=fn_args.custom_config['ai_platform_training_args']['project'],
      region=fn_args.custom_config['ai_platform_training_args']['region'],      
      max_trials=50,
      hyperparameters=_get_hyperparameters(),
      objective=kerastuner.Objective('val_sparse_categorical_accuracy', 'max'),
      directory=fn_args.working_dir)
  
  train_dataset = _input_fn(
      fn_args.train_files,
      fn_args.data_accessor,
      transform_graph,
      batch_size=TRAIN_BATCH_SIZE)

  eval_dataset = _input_fn(
      fn_args.eval_files,
      fn_args.data_accessor,
      transform_graph,
      batch_size=EVAL_BATCH_SIZE)

  return TunerFnResult(
      tuner=tuner,
      fit_kwargs={
          'x': train_dataset,
          'validation_data': eval_dataset,
          'steps_per_epoch': fn_args.train_steps,
          'validation_steps': fn_args.eval_steps
      })
Esempio n. 7
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
  """Build the tuner using the CloudTuner API.

  Args:
    fn_args: Holds args as name/value pairs.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.
      - schema_path: optional schema of the input data.
      - transform_graph_path: optional transform graph produced by TFT.

  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
  # CloudTuner is a subclass of kerastuner.Tuner which inherits from
  # BaseTuner.
  tuner = CloudTuner(
      _build_keras_model,
      project_id=_PROJECT_ID,
      region=_REGION,
      objective=kerastuner.Objective('val_sparse_categorical_accuracy', 'max'),
      hyperparameters=_get_hyperparameters(),
      max_trials=8,  # Optional.
      directory=fn_args.working_dir
      )

  transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)
  train_dataset = _input_fn(fn_args.train_files, transform_graph)
  eval_dataset = _input_fn(fn_args.eval_files, transform_graph)
  return TunerFnResult(
      tuner=tuner,
      fit_kwargs={
          'x': train_dataset,
          'validation_data': eval_dataset,
          'steps_per_epoch': fn_args.train_steps,
          'validation_steps': fn_args.eval_steps
      })
Esempio n. 8
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    """Build the tuner using the CloudTuner API.

  Args:
    fn_args: Holds args as name/value pairs. See
      https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs.
      - transform_graph_path: optional transform graph produced by TFT.
      - custom_config: An optional dictionary passed to the component. In this
        example, it contains the dict ai_platform_tuning_args.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.

  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation. For
                    DistributingCloudTuner, we generate datasets at the remote
                    jobs rather than serialize and then deserialize them.
  """

    # study_id should be the same across multiple tuner workers which starts
    # approximately at the same time.
    study_id = 'DistributingCloudTuner_study_{}'.format(
        datetime.datetime.now().strftime('%Y%m%d%H'))

    if _CLOUD_FIT_IMAGE == 'gcr.io/my-project-id/cloud_fit':
        raise ValueError('Build your own cloud_fit image, ' +
                         'default dummy one is used!')

    tuner = cloud_tuner.DistributingCloudTuner(
        _build_keras_model,
        # The project/region configuations for Cloud Vizier service and its trial
        # executions. Note: this example uses the same configuration as the
        # CAIP Training service for distributed tuning flock management to view
        # all of the pipeline's jobs and resources in the same project. It can
        # also be configured separately.
        project_id=fn_args.custom_config['ai_platform_tuning_args']['project'],
        region=fn_args.custom_config['ai_platform_tuning_args']['region'],
        objective=kerastuner.Objective('val_sparse_categorical_accuracy',
                                       'max'),
        hyperparameters=_get_hyperparameters(),
        max_trials=5,  # Optional.
        directory=os.path.join(
            fn_args.custom_config['remote_trials_working_dir'], study_id),
        study_id=study_id,
        container_uri=_CLOUD_FIT_IMAGE,
        # Optional `MachineConfig` that represents the configuration for the
        # general workers in a distribution cluster. More options see:
        # https://github.com/tensorflow/cloud/blob/master/src/python/tensorflow_cloud/core/machine_config.py
        replica_config=machine_config.COMMON_MACHINE_CONFIGS['K80_1X'],
        # Optional total number of workers in a distribution cluster including a
        # chief worker.
        replica_count=2)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps,
                             'train_files': fn_args.train_files,
                             'eval_files': fn_args.eval_files,
                             'transform_graph_path':
                             fn_args.transform_graph_path,
                             'label_key': _LABEL_KEY,
                             'train_batch_size': _TRAIN_BATCH_SIZE,
                             'eval_batch_size': _EVAL_BATCH_SIZE,
                         })
Esempio n. 9
0
def tuner_fn(fn_args: fn_args_utils.FnArgs) -> TunerFnResult:
    """Build the tuner using the KerasTuner API.

  Args:
    fn_args: Holds args as name/value pairs.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.
      - schema_path: optional schema of the input data.
      - transform_graph_path: optional transform graph produced by TFT.
      - custom_config: A dict with a single 'problem_statement' entry containing
        a text-format serialized ProblemStatement proto which defines the task.

  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """

    problem_statement = text_format.Parse(
        fn_args.custom_config['problem_statement'], ps_pb2.ProblemStatement())
    autodata_adapter = kma.KerasModelAdapter(
        problem_statement=problem_statement,
        transform_graph_dir=fn_args.transform_graph_path)

    build_keras_model_fn = functools.partial(_build_keras_model,
                                             autodata_adapter=autodata_adapter)
    if 'warmup_hyperparameters' in fn_args.custom_config:
        hyperparameters = hp_module.HyperParameters.from_config(
            fn_args.custom_config['warmup_hyperparameters'])
    else:
        hyperparameters = _get_hyperparameters()

    tuner_cls = get_tuner_cls_with_callbacks(kerastuner.RandomSearch)
    tuner = tuner_cls(build_keras_model_fn,
                      max_trials=fn_args.custom_config.get('max_trials', 10),
                      hyperparameters=hyperparameters,
                      allow_new_entries=False,
                      objective=autodata_adapter.tuner_objective,
                      directory=fn_args.working_dir,
                      project_name=f'{problem_statement.tasks[0].name}_tuning')

    # TODO(nikhilmehta): Make batch-size tunable hyperparameter.
    train_dataset = autodata_adapter.get_dataset(
        file_pattern=fn_args.train_files,
        batch_size=128,
        num_epochs=None,
        shuffle=True)

    eval_dataset = autodata_adapter.get_dataset(
        file_pattern=fn_args.eval_files,
        batch_size=128,
        num_epochs=1,
        shuffle=False)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                         })
Esempio n. 10
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    """Build the tuner using CloudTuner (KerasTuner instance).
    Args:
      fn_args: Holds args used to train and tune the model as name/value pairs. See
        https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs.
    Returns:
      A namedtuple contains the following:
        - tuner: A BaseTuner that will be used for tuning.
        - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                      model , e.g., the training and validation dataset. Required
                      args depend on the above tuner's implementation.
    """
    custom_config_dict = _get_custom_config_dict(fn_args)

    max_trials = custom_config_dict.get('max_trials', MAX_TRIALS)

    transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)

    # Construct a build_keras_model_fn that just takes hyperparams from get_hyperparameters as input.
    build_keras_model_fn = functools.partial(
        _build_keras_model, tf_transform_output=transform_graph)

    # CloudTuner is a subclass of kerastuner.Tuner which inherits from BaseTuner.
    #is_local_run = "custom_config" not in fn_args.custom_config
    is_local_run = custom_config_dict.get("is_local_run", True)
    absl.logging.info('is_local_run : %s' % is_local_run)
    if is_local_run:
        tuner = kerastuner.RandomSearch(build_keras_model_fn,
                                        max_trials=max_trials,
                                        hyperparameters=_get_hyperparameters(),
                                        allow_new_entries=False,
                                        objective=kerastuner.Objective(
                                            'val_binary_accuracy', 'max'),
                                        directory=fn_args.working_dir,
                                        project_name='titanic_tuning')
    else:
        tuner = CloudTuner(
            build_keras_model_fn,
            project_id=fn_args.custom_config['ai_platform_training_args']
            ['project'],
            region=fn_args.custom_config['ai_platform_training_args']
            ['region'],
            max_trials=max_trials,
            hyperparameters=_get_hyperparameters(),
            objective=kerastuner.Objective('val_binary_accuracy', 'max'),
            # objective=kerastuner.Objective('auc', 'min'),
            directory=fn_args.working_dir)

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              transform_graph,
                              batch_size=TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             transform_graph,
                             batch_size=EVAL_BATCH_SIZE)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps
                         })