Example #1
0
    def test_metalearner_nearest_neighbor(self):

        meta_train_data = {}
        metadata_indices = [1, 2, 3]
        for ix, dataset_id in enumerate(metadata_indices):
            hparams = standard_artifacts.HyperParameters()
            hparams.uri = os.path.join(self._input_data_dir,
                                       f'Tuner.train_mockdata_{dataset_id}',
                                       'best_hyperparameters')
            meta_train_data[f'hparams_train_{ix}'] = [hparams]

            metafeature = artifacts.MetaFeatures()
            metafeature.uri = os.path.join(
                self._input_data_dir,
                f'MetaFeatureGen.train_mockdata_{dataset_id}', 'metafeatures')
            meta_train_data[f'meta_train_features_{ix}'] = [metafeature]

        input_dict = {
            **meta_train_data,
        }
        output_dict = {
            executor.OUTPUT_HYPERPARAMS: [self._hparams_out],
            executor.OUTPUT_MODEL: [self._model_out],
        }

        exec_properties = self._exec_properties.copy()
        exec_properties['algorithm'] = executor.NEAREST_NEIGHBOR
        ex = executor.MetaLearnerExecutor()
        ex.Do(input_dict, output_dict, exec_properties)

        self._verify_hparams_outputs(executor.NEAREST_NEIGHBOR)
        self._verify_model_export()
Example #2
0
    def test_metalearner_majority_voting(self):

        meta_train_data = {}
        metadata_indices = [1, 2, 3]
        for ix, dataset_id in enumerate(metadata_indices):
            hparams = standard_artifacts.HyperParameters()
            hparams.uri = os.path.join(self._input_data_dir,
                                       f'Tuner.train_mockdata_{dataset_id}',
                                       'best_hyperparameters')
            meta_train_data[f'hparams_train_{ix}'] = [hparams]

        input_dict = {
            **meta_train_data,
        }
        output_dict = {
            executor.OUTPUT_HYPERPARAMS: [self._hparams_out],
            executor.OUTPUT_MODEL: [self._model_out],
        }

        exec_properties = self._exec_properties.copy()
        exec_properties['algorithm'] = executor.MAJORITY_VOTING
        ex = executor.MetaLearnerExecutor()
        ex.Do(input_dict, output_dict, exec_properties)

        self._verify_hparams_outputs(executor.MAJORITY_VOTING)
Example #3
0
  def __init__(self,
               examples: types.Channel = None,
               schema: Optional[types.Channel] = None,
               transform_graph: Optional[types.Channel] = None,
               module_file: Optional[Text] = None,
               tuner_fn: Optional[Text] = None,
               train_args: trainer_pb2.TrainArgs = None,
               eval_args: trainer_pb2.EvalArgs = None,
               tune_args: Optional[tuner_pb2.TuneArgs] = None,
               best_hyperparameters: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    """Construct a Tuner component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, serving as the
        source of examples that are used in tuning (required).
      schema:  An optional Channel of type `standard_artifacts.Schema`, serving
        as the schema of training and eval data. This is used when raw examples
        are provided.
      transform_graph: An optional Channel of type
        `standard_artifacts.TransformGraph`, serving as the input transform
        graph if present. This is used when transformed examples are provided.
      module_file: A path to python module file containing UDF tuner definition.
        The module_file must implement a function named `tuner_fn` at its top
        level. The function must have the following signature.
            def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
        Exactly one of 'module_file' or 'tuner_fn' must be supplied.
      tuner_fn:  A python path to UDF model definition function. See
        'module_file' for the required signature of the UDF. Exactly one of
        'module_file' or 'tuner_fn' must be supplied.
      train_args: A trainer_pb2.TrainArgs instance, containing args used for
        training. Current only num_steps is available.
      eval_args: A trainer_pb2.EvalArgs instance, containing args used for eval.
        Current only num_steps is available.
      tune_args: A tuner_pb2.TuneArgs instance, containing args used for tuning.
        Current only num_parallel_trials is available.
      best_hyperparameters: Optional Channel of type
        `standard_artifacts.HyperParameters` for result of the best hparams.
      instance_name: Optional unique instance name. Necessary if multiple Tuner
        components are declared in the same pipeline.
    """
    if bool(module_file) == bool(tuner_fn):
      raise ValueError(
          "Exactly one of 'module_file' or 'tuner_fn' must be supplied")

    best_hyperparameters = best_hyperparameters or types.Channel(
        type=standard_artifacts.HyperParameters,
        artifacts=[standard_artifacts.HyperParameters()])
    spec = TunerSpec(
        examples=examples,
        schema=schema,
        transform_graph=transform_graph,
        module_file=module_file,
        tuner_fn=tuner_fn,
        train_args=train_args,
        eval_args=eval_args,
        tune_args=tune_args,
        best_hyperparameters=best_hyperparameters)
    super(Tuner, self).__init__(spec=spec, instance_name=instance_name)
Example #4
0
  def setUp(self):
    super(ComponentTest, self).setUp()

    self.examples = channel_utils.as_channel([standard_artifacts.Examples()])
    self.transform_output = channel_utils.as_channel(
        [standard_artifacts.TransformGraph()])
    self.schema = channel_utils.as_channel([standard_artifacts.Schema()])
    self.hyperparameters = channel_utils.as_channel(
        [standard_artifacts.HyperParameters()])
    self.train_args = trainer_pb2.TrainArgs(num_steps=100)
    self.eval_args = trainer_pb2.EvalArgs(num_steps=50)
Example #5
0
    def setUp(self):
        super().setUp()

        self.examples = channel_utils.as_channel(
            [standard_artifacts.Examples()])
        self.transform_graph = channel_utils.as_channel(
            [standard_artifacts.TransformGraph()])
        self.schema = channel_utils.as_channel([standard_artifacts.Schema()])
        self.hyperparameters = channel_utils.as_channel(
            [standard_artifacts.HyperParameters()])
        self.train_args = trainer_pb2.TrainArgs(splits=['train'],
                                                num_steps=100)
        self.eval_args = trainer_pb2.EvalArgs(splits=['eval'], num_steps=50)
Example #6
0
  def setUp(self):
    super(ComponentTest, self).setUp()

    num_train = 5
    self.meta_train_data = {}
    for ix in range(num_train):
      self.meta_train_data[f'hparams_train_{ix}'] = channel_utils.as_channel(
          [standard_artifacts.HyperParameters()])
      self.meta_train_data[
          f'meta_train_features_{ix}'] = channel_utils.as_channel(
              [artifacts.MetaFeatures()])

    self.custom_config = {'some': 'thing', 'some other': 1, 'thing': 2}
Example #7
0
    def __init__(self,
                 examples: types.Channel = None,
                 schema: types.Channel = None,
                 module_file: Optional[Text] = None,
                 tuner_fn: Optional[Text] = None,
                 model: Optional[types.Channel] = None,
                 best_hyperparameters: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """Construct a Tuner component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, serving as the
        source of examples that are used in tuning (required). Transformed
        examples are not yet supported.
      schema:  A Channel of type `standard_artifacts.Schema`, serving as the
        schema of training and eval data.
      module_file: A path to python module file containing UDF KerasTuner
        definition. Exactly one of 'module_file' or 'tuner_fn' must be supplied.
        The module_file must implement a function named `tuner_fn` at its top
        level. The function takes working dir path, train data path, eval data
        path and tensorflow_metadata.proto.v0.schema_pb2.Schema and generates a
        namedtuple TunerFnResult which contains:
        - 'tuner': A KerasTuner that will be used for tuning.
        - 'train_dataset': A tf.data.Dataset of training data.
        - 'eval_dataset': A tf.data.Dataset of eval data.
      tuner_fn:  A python path to UDF model definition function. See
        'module_file' for the required signature of the UDF. Exactly one of
        'module_file' or 'tuner_fn' must be supplied.
      model: Optional Channel of type `standard_artifacts.Model` for result of
        best model.
      best_hyperparameters: Optional Channel of type
        `standard_artifacts.HyperParameters` for result of the best hparams.
      instance_name: Optional unique instance name. Necessary if multiple Tuner
        components are declared in the same pipeline.
    """
        if bool(module_file) == bool(tuner_fn):
            raise ValueError(
                "Exactly one of 'module_file' or 'tuner_fn' must be supplied")

        model = model or types.Channel(type=standard_artifacts.Model,
                                       artifacts=[standard_artifacts.Model()])
        best_hyperparameters = best_hyperparameters or types.Channel(
            type=standard_artifacts.HyperParameters,
            artifacts=[standard_artifacts.HyperParameters()])
        spec = TunerSpec(examples=examples,
                         schema=schema,
                         module_file=module_file,
                         tuner_fn=tuner_fn,
                         model_export_path=model,
                         best_hyperparameters=best_hyperparameters)
        super(Tuner, self).__init__(spec=spec, instance_name=instance_name)
Example #8
0
  def testDoWithHyperParameters(self):
    hp_artifact = standard_artifacts.HyperParameters()
    hp_artifact.uri = os.path.join(self._output_data_dir, 'hyperparameters/')

    # TODO(jyzhao): use real kerastuner.HyperParameters instead of dict.
    hyperparameters = {}
    hyperparameters['first_dnn_layer_size'] = 100
    hyperparameters['num_dnn_layers'] = 4
    hyperparameters['dnn_decay_factor'] = 0.7
    io_utils.write_string_file(
        os.path.join(hp_artifact.uri, 'hyperparameters.txt'),
        json.dumps(hyperparameters))

    self._input_dict[executor.HYPERPARAMETERS_KEY] = [hp_artifact]

    self._exec_properties['module_file'] = self._module_file
    self._do(self._trainer_executor)
    self._verify_model_exports()
Example #9
0
    def __init__(self,
                 examples: types.Channel = None,
                 schema: Optional[types.Channel] = None,
                 transform_graph: Optional[types.Channel] = None,
                 module_file: Optional[str] = None,
                 tuner_fn: Optional[str] = None,
                 train_args: trainer_pb2.TrainArgs = None,
                 eval_args: trainer_pb2.EvalArgs = None,
                 tune_args: Optional[tuner_pb2.TuneArgs] = None,
                 custom_config: Optional[Dict[str, Any]] = None,
                 metalearning_algorithm: Optional[str] = None,
                 warmup_hyperparameters: Optional[types.Channel] = None,
                 metamodel: Optional[types.Channel] = None,
                 metafeature: Optional[types.Channel] = None,
                 best_hyperparameters: Optional[types.Channel] = None,
                 instance_name: Optional[str] = None):
        """Constructs custom Tuner component that stores trial learning curve.

    Adapted from the following code:
    https://github.com/tensorflow/tfx/blob/master/tfx/components/tuner/component.py

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, serving as the
        source of examples that are used in tuning (required).
      schema:  An optional Channel of type `standard_artifacts.Schema`, serving
        as the schema of training and eval data. This is used when raw examples
        are provided.
      transform_graph: An optional Channel of type
        `standard_artifacts.TransformGraph`, serving as the input transform
        graph if present. This is used when transformed examples are provided.
      module_file: A path to python module file containing UDF tuner definition.
        The module_file must implement a function named `tuner_fn` at its top
        level. The function must have the following signature.
            def tuner_fn(fn_args: FnArgs) -> TunerFnResult: Exactly one of
              'module_file' or 'tuner_fn' must be supplied.
      tuner_fn:  A python path to UDF model definition function. See
        'module_file' for the required signature of the UDF. Exactly one of
        'module_file' or 'tuner_fn' must be supplied.
      train_args: A trainer_pb2.TrainArgs instance, containing args used for
        training. Currently only splits and num_steps are available. Default
        behavior (when splits is empty) is train on `train` split.
      eval_args: A trainer_pb2.EvalArgs instance, containing args used for eval.
        Currently only splits and num_steps are available. Default behavior
        (when splits is empty) is evaluate on `eval` split.
      tune_args: A tuner_pb2.TuneArgs instance, containing args used for tuning.
        Currently only num_parallel_trials is available.
      custom_config: A dict which contains addtional training job parameters
        that will be passed into user module.
      metalearning_algorithm: Optional str for the type of
        metalearning_algorithm.
      warmup_hyperparameters: Optional Channel of type
        `artifacts.KCandidateHyperParameters` for a list of recommended search
        space for warm-starting the tuner (generally the output of a
        metalearning component or subpipeline).
      metamodel: Optional Channel of type `standard_artifacts.Model` for trained
        meta model
      metafeature: Optional Channel of `artifacts.MetaFeatures` of the dataset
        to be tuned. This is used as an input to the `meta_model` to predict
        search space.
      best_hyperparameters: Optional Channel of type
        `standard_artifacts.HyperParameters` for result of the best hparams.
      instance_name: Optional unique instance name. Necessary if multiple Tuner
        components are declared in the same pipeline.
    """

        if bool(module_file) == bool(tuner_fn):
            raise ValueError(
                "Exactly one of 'module_file' or 'tuner_fn' must be supplied")

        best_hyperparameters = best_hyperparameters or types.Channel(
            type=standard_artifacts.HyperParameters,
            artifacts=[standard_artifacts.HyperParameters()])
        trial_summary_plot = types.Channel(type=TunerData,
                                           artifacts=[TunerData()])
        spec = AugmentedTunerSpec(
            examples=examples,
            schema=schema,
            transform_graph=transform_graph,
            module_file=module_file,
            tuner_fn=tuner_fn,
            train_args=train_args,
            eval_args=eval_args,
            tune_args=tune_args,
            metalearning_algorithm=metalearning_algorithm,
            warmup_hyperparameters=warmup_hyperparameters,
            metamodel=metamodel,
            metafeature=metafeature,
            best_hyperparameters=best_hyperparameters,
            trial_summary_plot=trial_summary_plot,
            custom_config=json_utils.dumps(custom_config),
        )
        super(AugmentedTuner, self).__init__(spec=spec,
                                             instance_name=instance_name)