Beispiel #1
0
  def _build_train_spec(self):
    train_hooks = [
        hooks.LogParametersCountHook(),
        hooks.CountersHook(
            every_n_steps=self._estimator.config.save_summary_steps,
            output_dir=self._estimator.model_dir)]

    train_spec = tf.estimator.TrainSpec(
        input_fn=self._model.input_fn(
            tf.estimator.ModeKeys.TRAIN,
            self._config["train"]["batch_size"],
            self._config["data"],
            self._config["data"]["train_features_file"],
            labels_file=self._config["data"]["train_labels_file"],
            batch_type=self._config["train"].get("batch_type", "examples"),
            batch_multiplier=self._num_devices,
            bucket_width=self._config["train"].get("bucket_width", 5),
            single_pass=self._config["train"].get("single_pass", False),
            num_threads=self._config["train"].get("num_threads"),
            sample_buffer_size=self._config["train"].get("sample_buffer_size", 500000),
            prefetch_buffer_size=self._config["train"].get("prefetch_buffer_size"),
            maximum_features_length=self._config["train"].get("maximum_features_length"),
            maximum_labels_length=self._config["train"].get("maximum_labels_length")),
        max_steps=self._config["train"].get("train_steps"),
        hooks=train_hooks)
    return train_spec
Beispiel #2
0
    def _model_fn(features, labels, params, mode, config):
      """model_fn implementation."""
      if mode == tf.estimator.ModeKeys.TRAIN:
        counters = self._register_word_counters(features, labels)
        counters_hook = hooks.CountersHook(
            every_n_steps=config.save_summary_steps,
            output_dir=config.model_dir,
            counters=counters)

        features_shards = dispatcher.shard(features)
        labels_shards = dispatcher.shard(labels)

        with tf.variable_scope(self.name, initializer=self._initializer(params)):
          losses_shards = dispatcher(
              _loss_op, features_shards, labels_shards, params, mode, config)

        loss = _extract_loss(losses_shards)
        train_op = optimize(loss, params, mixed_precision=(self.dtype == tf.float16))
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            train_op=train_op,
            training_hooks=[counters_hook])
      elif mode == tf.estimator.ModeKeys.EVAL:
        with tf.variable_scope(self.name):
          logits, predictions = self._build(features, labels, params, mode, config=config)
          loss = self._compute_loss(features, labels, logits, params, mode)

        loss = _extract_loss(loss)
        eval_metric_ops = self._compute_metrics(features, labels, predictions)
        evaluation_hooks = []
        if predictions is not None and eval_prediction_hooks_fn is not None:
          evaluation_hooks.extend(eval_prediction_hooks_fn(predictions))
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            eval_metric_ops=eval_metric_ops,
            evaluation_hooks=evaluation_hooks)
      elif mode == tf.estimator.ModeKeys.PREDICT:
        with tf.variable_scope(self.name):
          _, predictions = self._build(features, labels, params, mode, config=config)

        export_outputs = {}
        export_outputs[tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
            tf.estimator.export.PredictOutput(predictions))

        return tf.estimator.EstimatorSpec(
            mode,
            predictions=predictions,
            export_outputs=export_outputs)
      else:
        raise RuntimeError("Invalid mode")
Beispiel #3
0
    def _build_train_spec(self):
        train_hooks = [
            hooks.LogParametersCountHook(),
            hooks.CountersHook(
                every_n_steps=self._estimator.config.save_summary_steps,
                output_dir=self._estimator.model_dir)
        ]

        default_sample_buffer_size = 1000000
        if "sample_buffer_size" not in self._config["train"]:
            tf.logging.warn(
                "You did not set sample_buffer_size. By default, the "
                "training dataset is shuffled by chunk of %d examples. "
                "If your dataset is larger than this value and eval_delay "
                "is shorter than the training time of one epoch, a section "
                "of the dataset will be discarded. Consider setting "
                "sample_buffer_size to the size of your dataset." %
                default_sample_buffer_size)

        train_spec = tf.estimator.TrainSpec(
            input_fn=self._model.input_fn(
                tf.estimator.ModeKeys.TRAIN,
                self._config["train"]["batch_size"],
                self._config["data"],
                self._config["data"]["train_features_file"],
                labels_file=self._config["data"]["train_labels_file"],
                batch_type=self._config["train"].get("batch_type", "examples"),
                batch_multiplier=self._num_devices,
                bucket_width=self._config["train"].get("bucket_width", 5),
                single_pass=self._config["train"].get("single_pass", False),
                num_threads=self._config["train"].get("num_threads"),
                sample_buffer_size=self._config["train"].get(
                    "sample_buffer_size", default_sample_buffer_size),
                maximum_features_length=self._config["train"].get(
                    "maximum_features_length"),
                maximum_labels_length=self._config["train"].get(
                    "maximum_labels_length")),
            max_steps=self._config["train"].get("train_steps"),
            hooks=train_hooks)
        return train_spec
Beispiel #4
0
def train(estimator, model, config):
    """Runs training.

  Args:
    estimator: A `tf.estimator.Estimator`.
    model: A `opennmt.models.Model`.
    config: The configuration.
  """
    batch_size = config["train"]["batch_size"]
    prefetch_buffer_size = config["train"].get("prefetch_buffer_size",
                                               batch_size * 1000)
    num_parallel_process_calls = config["train"].get(
        "num_parallel_process_calls", multiprocessing.cpu_count())

    train_hooks = [
        hooks.LogParametersCountHook(),
        hooks.CountersHook(every_n_steps=estimator.config.save_summary_steps,
                           output_dir=estimator.model_dir)
    ]

    eval_hooks = []
    if config["train"].get("save_eval_predictions", False):
        save_path = os.path.join(estimator.model_dir, "eval")
        if not os.path.isdir(save_path):
            os.makedirs(save_path)
        eval_hooks.append(
            hooks.SaveEvaluationPredictionHook(
                model,
                os.path.join(save_path, "predictions.txt"),
                post_evaluation_fn=external_evaluation_fn(
                    config["train"].get("external_evaluators"),
                    config["data"]["eval_labels_file"],
                    output_dir=estimator.model_dir)))
    elif config["train"].get("external_evaluators") is not None:
        tf.logging.warning(
            "External evaluators only work when save_eval_predictions is enabled."
        )

    train_spec = tf.estimator.TrainSpec(input_fn=model.input_fn(
        tf.estimator.ModeKeys.TRAIN,
        batch_size,
        prefetch_buffer_size,
        num_parallel_process_calls,
        config["data"],
        config["data"]["train_features_file"],
        labels_file=config["data"]["train_labels_file"],
        num_buckets=config["train"].get("num_buckets", 5),
        sample_buffer_size=config["train"].get("sample_buffer_size", 1000000),
        maximum_features_length=config["train"].get("maximum_features_length"),
        maximum_labels_length=config["train"].get("maximum_labels_length")),
                                        max_steps=config["train"].get(
                                            "train_steps"),
                                        hooks=train_hooks)

    eval_spec = tf.estimator.EvalSpec(
        input_fn=model.input_fn(
            tf.estimator.ModeKeys.EVAL,
            batch_size,
            prefetch_buffer_size,
            num_parallel_process_calls,
            config["data"],
            config["data"]["eval_features_file"],
            labels_file=config["data"]["eval_labels_file"]),
        steps=None,
        hooks=eval_hooks,
        exporters=tf.estimator.LatestExporter(
            "latest", model.serving_input_fn(config["data"])),
        throttle_secs=config["train"].get("eval_delay", 18000))

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Beispiel #5
0
def train(estimator, model, config):
    """Runs training.

  Args:
    estimator: A `tf.estimator.Estimator`.
    model: A `opennmt.models.Model`.
    config: The configuration.
  """
    if "eval" not in config:
        config["eval"] = {}

    train_hooks = [
        hooks.LogParametersCountHook(),
        hooks.CountersHook(every_n_steps=estimator.config.save_summary_steps,
                           output_dir=estimator.model_dir)
    ]

    eval_hooks = []
    if (config["eval"].get("save_eval_predictions", False)
            or config["eval"].get("external_evaluators") is not None):
        save_path = os.path.join(estimator.model_dir, "eval")
        if not os.path.isdir(save_path):
            os.makedirs(save_path)
        eval_hooks.append(
            hooks.SaveEvaluationPredictionHook(
                model,
                os.path.join(save_path, "predictions.txt"),
                post_evaluation_fn=external_evaluation_fn(
                    config["eval"].get("external_evaluators"),
                    config["data"]["eval_labels_file"],
                    output_dir=estimator.model_dir)))

    default_sample_buffer_size = 1000000
    if "sample_buffer_size" not in config["train"]:
        tf.logging.warn(
            "You did not set sample_buffer_size. By default, the "
            "training dataset is shuffled by chunk of %d examples. "
            "If your dataset is larger than this value and eval_delay "
            "is shorter than the training time of one epoch, a section "
            "of the dataset will be discarded. Consider setting "
            "sample_buffer_size to the size of your dataset." %
            default_sample_buffer_size)

    train_batch_size = config["train"]["batch_size"]
    train_batch_type = config["train"].get("batch_type", "examples")
    train_prefetch_buffer_size = config["train"].get(
        "prefetch_buffer_size",
        train_batch_size * (1000 if train_batch_type == "examples" else 50))
    train_num_parallel_process_calls = config["train"].get(
        "num_parallel_process_calls", multiprocessing.cpu_count())
    train_spec = tf.estimator.TrainSpec(input_fn=model.input_fn(
        tf.estimator.ModeKeys.TRAIN,
        train_batch_size,
        train_prefetch_buffer_size,
        train_num_parallel_process_calls,
        config["data"],
        config["data"]["train_features_file"],
        labels_file=config["data"]["train_labels_file"],
        batch_type=train_batch_type,
        bucket_width=config["train"].get("bucket_width", 5),
        sample_buffer_size=config["train"].get("sample_buffer_size",
                                               default_sample_buffer_size),
        maximum_features_length=config["train"].get("maximum_features_length"),
        maximum_labels_length=config["train"].get("maximum_labels_length")),
                                        max_steps=config["train"].get(
                                            "train_steps"),
                                        hooks=train_hooks)

    eval_batch_size = config["eval"].get(
        "batch_size",
        train_batch_size if train_batch_type == "examples" else 30)
    eval_prefetch_buffer_size = config["eval"].get("prefetch_buffer_size",
                                                   eval_batch_size * 10)
    eval_num_parallel_process_calls = config["eval"].get(
        "num_parallel_process_calls", train_num_parallel_process_calls)
    eval_spec = tf.estimator.EvalSpec(
        input_fn=model.input_fn(
            tf.estimator.ModeKeys.EVAL,
            eval_batch_size,
            eval_prefetch_buffer_size,
            eval_num_parallel_process_calls,
            config["data"],
            config["data"]["eval_features_file"],
            labels_file=config["data"]["eval_labels_file"]),
        steps=None,
        hooks=eval_hooks,
        exporters=tf.estimator.LatestExporter(
            "latest", model.serving_input_fn(config["data"])),
        throttle_secs=config["eval"].get("eval_delay", 18000))

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Beispiel #6
0
        def _model_fn(features, labels, params, mode, config):
            """model_fn implementation."""
            if mode == tf.estimator.ModeKeys.TRAIN:
                counters = self._register_word_counters(features, labels)
                training_hooks = []
                if config is not None:
                    training_hooks.append(
                        hooks.CountersHook(
                            every_n_steps=config.save_summary_steps,
                            output_dir=config.model_dir,
                            counters=counters))

                features_shards = dispatcher.shard(features)
                labels_shards = dispatcher.shard(labels)

                with tf.variable_scope(self.name,
                                       initializer=self._initializer(params)):
                    losses_shards = dispatcher(_loss_op, features_shards,
                                               labels_shards, params, mode,
                                               config)

                loss = _extract_loss(losses_shards)

                #train_op, extra_variables = optimize_loss(
                #    loss, params, mixed_precision=(self.dtype == tf.float16))

                freeze_params = params.get("freeze")
                if freeze_params is not None:
                    var_list = self._get_variables(freeze_params)

                    train_op, extra_variables = optimize_loss(
                        loss,
                        params,
                        mixed_precision=(self.dtype == tf.float16),
                        var_list=var_list)
                else:
                    train_op, extra_variables = optimize_loss(
                        loss,
                        params,
                        mixed_precision=(self.dtype == tf.float16))

                if extra_variables:
                    training_hooks.append(
                        hooks.VariablesInitializerHook(extra_variables))
                return tf.estimator.EstimatorSpec(
                    mode,
                    loss=loss,
                    train_op=train_op,
                    training_hooks=training_hooks)
            elif mode == tf.estimator.ModeKeys.EVAL:
                with tf.variable_scope(self.name):
                    logits, predictions = self._build(features,
                                                      labels,
                                                      params,
                                                      mode,
                                                      config=config)
                    loss = self._compute_loss(features, labels, logits, params,
                                              mode)

                loss = _extract_loss\
                    (loss)
                eval_metric_ops = self._compute_metrics(
                    features, labels, predictions)  # pylint: disable=assignment-from-none
                evaluation_hooks = []
                if predictions is not None and eval_prediction_hooks_fn is not None:
                    evaluation_hooks.extend(
                        eval_prediction_hooks_fn(predictions))
                return tf.estimator.EstimatorSpec(
                    mode,
                    loss=loss,
                    eval_metric_ops=eval_metric_ops,
                    evaluation_hooks=evaluation_hooks)
            elif mode == tf.estimator.ModeKeys.PREDICT:
                with tf.variable_scope(self.name):
                    _, predictions = self._build(features,
                                                 labels,
                                                 params,
                                                 mode,
                                                 config=config)

                # Forward example index for reordering predictions.
                if "index" in features:
                    predictions["index"] = features["index"]

                export_outputs = {}
                export_outputs[tf.saved_model.signature_constants.
                               DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
                                   tf.estimator.export.PredictOutput(
                                       predictions))

                return tf.estimator.EstimatorSpec(
                    mode,
                    predictions=predictions,
                    export_outputs=export_outputs)
            else:
                raise RuntimeError("Invalid mode")
Beispiel #7
0
    def _model_fn(features, labels, params, mode, config):
      """model_fn implementation."""
      if mode == tf.estimator.ModeKeys.TRAIN:
        counters = self._register_word_counters(features, labels)
        training_hooks = []
        if config is not None:
          training_hooks.append(hooks.CountersHook(
              every_n_steps=config.save_summary_steps,
              output_dir=config.model_dir,
              counters=counters))

        features_shards = dispatcher.shard(features)
        labels_shards = dispatcher.shard(labels)

        with tf.variable_scope(self.name, initializer=self._initializer(params)):
          losses_shards = dispatcher(
              _loss_op, features_shards, labels_shards, params, mode, config)

        loss = _extract_loss(losses_shards)



        #train_op, extra_variables = optimize_loss(
        #    loss, params, mixed_precision=(self.dtype == tf.float16))

        #TODO: freeze_update, get config, get var_list, pass var_list, move >>> DIFFERENT VARLIST FOR LSTM AND TRANSFORMER, THUS, BEFORE SLECTING WHAT VARS, CHECK IF IT EXISTS IN CKPT >> MAKE IT ARCHI DEPENDENT 
        freeze_params = params.get("freeze")
        if False: #freeze_params is not None:

            tf.logging.info("Optimizing selected network components:", freeze_params)
                            #json.dumps(freeze_params, indent=2, sort_keys=True))

            var_list = self._get_variables(freeze_params)

            train_op, extra_variables = optimize_loss(
                loss, params, mixed_precision=(self.dtype == tf.float16), var_list=var_list)
        else:
            train_op, extra_variables = optimize_loss(
                loss, params, mixed_precision=(self.dtype == tf.float16))

        #TODO: simpler way of doing it, if freeze is None, pass var_list=None, else select vars to pass.
        #train_op, extra_variables = optimize_loss(
        #    loss, params, mixed_precision=(self.dtype == tf.float16), var_list=self._get_variables(params))


        if extra_variables:
          training_hooks.append(hooks.VariablesInitializerHook(extra_variables))
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            train_op=train_op,
            training_hooks=training_hooks)
      elif mode == tf.estimator.ModeKeys.EVAL:
        with tf.variable_scope(self.name):
          logits, predictions = self._build(features, labels, params, mode, config=config)
          loss = self._compute_loss(features, labels, logits, params, mode)

        loss = _extract_loss\
            (loss)
        eval_metric_ops = self._compute_metrics(features, labels, predictions)  # pylint: disable=assignment-from-none
        evaluation_hooks = []
        if predictions is not None and eval_prediction_hooks_fn is not None:
          evaluation_hooks.extend(eval_prediction_hooks_fn(predictions))
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            eval_metric_ops=eval_metric_ops,
            evaluation_hooks=evaluation_hooks)
      elif mode == tf.estimator.ModeKeys.PREDICT:
        with tf.variable_scope(self.name):
          _, predictions = self._build(features, labels, params, mode, config=config)

        # Forward example index for reordering predictions.
        if "index" in features:
          predictions["index"] = features["index"]

        export_outputs = {}
        export_outputs[tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
            tf.estimator.export.PredictOutput(predictions))

        return tf.estimator.EstimatorSpec(
            mode,
            predictions=predictions,
            export_outputs=export_outputs)
      else:
        raise RuntimeError("Invalid mode")