Example #1
0
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  with tf.variable_scope("model"):
    inputs, targets = features, labels

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    output = model(inputs, targets)

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions=output)

    logits = output

    # Calculate model loss.
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params.label_smoothing, params.vocab_size)
    loss = tf.reduce_sum(xentropy * weights) / tf.reduce_sum(weights)

    if mode == tf.estimator.ModeKeys.EVAL:
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op = get_train_op(loss, params)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def compute_bow_loss(latent_sample, targets, params, train):
    """
    Args:
      latent_variable: size [batch_size, hidden_size]
      targets: size [batch_size, length]
  """
    with tf.variable_scope("bow_decoder"):
        # feed forward
        bow_ffn_layer = ffn_layer.FeedFowardNetwork(
            params["latent_size"],
            params["filter_size"],
            params["relu_dropout"],
            train,
            params["allow_ffn_pad"],
            output_size=params["vocab_size"],
            activation=tf.nn.relu)
        expd_lv = tf.expand_dims(latent_sample,
                                 axis=1)  # get [batch_size, 1, hidden_size]
        bow_logits = bow_ffn_layer(
            expd_lv, padding=None)  # get [batch_size, 1, vocab_size]
        length = tf.shape(targets)[1]
        tile_bow_logits = tf.tile(
            bow_logits, [1, length, 1])  # get [batch_size, length, vocab_size]

        # compute loss
        xentropy, weights = metrics.padded_cross_entropy_loss(
            tile_bow_logits, targets, params["label_smoothing"],
            params["vocab_size"])

        # average first in sentence, then in batch
        bow_predict_loss = tf.reduce_sum(xentropy)

        return bow_predict_loss
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  with tf.variable_scope("model"):
    inputs, targets = features, labels

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    output = model(inputs, targets)

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions=output)

    logits = output

    # Calculate model loss.
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params.label_smoothing, params.vocab_size)
    loss = tf.reduce_sum(xentropy * weights) / tf.reduce_sum(weights)

    if mode == tf.estimator.ModeKeys.EVAL:
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op = get_train_op(loss, params)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
Example #4
0
def tf_loss_fn(logits, targets, label_smoothing, vocab_size):

    xentropy, weights = tf_metrics.padded_cross_entropy_loss(
        logits, targets, label_smoothing, vocab_size)
    # Compute the weighted mean of the cross entropy losses
    loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

    return loss, xentropy, weights
def create_tower_network(model, params, features, labels):
  print("features print: ", features)
  print("labels print: ", labels)
  with tf.variable_scope('forward_and_backward', reuse=False):
    logits = model(features, labels)
    logits.set_shape(labels.shape.as_list() + logits.shape.as_list()[2:])
    xentropy, weights = metrics.padded_cross_entropy_loss(logits, labels, params["label_smoothing"], params["vocab_size"])
    loss = tf.reduce_sum(xentropy)/tf.reduce_sum(weights)
    return logits, loss
Example #6
0
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    with tf.variable_scope("model"):
        inputs, targets = features, labels

        # Create model and get output logits.
        model = transformer.Transformer(params,
                                        mode == tf.estimator.ModeKeys.TRAIN)

        output = model(inputs, targets)

        # When in prediction mode, the labels/targets is None. The model output
        # is the prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(tf.estimator.ModeKeys.PREDICT,
                                              predictions=output)

        logits = output

        # Calculate model loss.
        # xentropy contains the cross entropy loss of every nonpadding token in the
        # targets.
        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits, targets, params.label_smoothing, params.vocab_size)
        # Compute the weighted mean of the cross entropy losses
        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        # Save loss as named tensor that will be logged with the logging hook.
        tf.identity(loss, "cross_entropy")

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                predictions={"predictions": logits},
                eval_metric_ops=metrics.get_eval_metrics(
                    logits, labels, params))
        else:
            train_op = get_train_op(loss, params)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)
Example #7
0
def _tower_fn(model, features, labels, params):
    logits = model(features, labels)

    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, labels, params["label_smoothing"], params["vocab_size"])
    tower_loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)
    with tf.variable_scope("get_train_op"):
        learning_rate = get_learning_rate(
            learning_rate=params["learning_rate"],
            hidden_size=params["hidden_size"],
            learning_rate_warmup_steps=params["learning_rate_warmup_steps"])
        optimizer = tf.contrib.opt.LazyAdamOptimizer(
            learning_rate,
            beta1=params["optimizer_adam_beta1"],
            beta2=params["optimizer_adam_beta2"],
            epsilon=params["optimizer_adam_epsilon"])
        model_params = tf.trainable_variables()
        #    tower_grad = optimizer.compute_gradients(tower_loss, model_params, colocate_gradients_with_ops=True)

        tower_grad = tf.gradients(tower_loss, model_params)
        return tower_loss, zip(tower_grad, model_params), logits
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    with tf.variable_scope("model"):
        inputs, targets = features, labels

        # Create model and get output logits.
        model = transformer.Transformer(params,
                                        mode == tf.estimator.ModeKeys.TRAIN)

        logits = model(inputs, targets)

        # When in prediction mode, the labels/targets is None. The model output
        # is the prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            if params["use_tpu"]:
                raise NotImplementedError(
                    "Prediction is not yet supported on TPUs.")
            return tf.estimator.EstimatorSpec(
                tf.estimator.ModeKeys.PREDICT,
                predictions=logits,
                export_outputs={
                    "translate": tf.estimator.export.PredictOutput(logits)
                })

        # Explicitly set the shape of the logits for XLA (TPU). This is needed
        # because the logits are passed back to the host VM CPU for metric
        # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
        # it is known from Transformer that the first two dimensions of logits
        # are the dimensions of targets. Note that the ambiguous shape of logits is
        # not a problem when computing xentropy, because padded_cross_entropy_loss
        # resolves the shape on the TPU.
        logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

        # Calculate model loss.
        # xentropy contains the cross entropy loss of every nonpadding token in the
        # targets.
        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits, targets, params["label_smoothing"], params["vocab_size"])
        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        # Save loss as named tensor that will be logged with the logging hook.
        tf.identity(loss, "cross_entropy")

        if mode == tf.estimator.ModeKeys.EVAL:
            if params["use_tpu"]:
                # host call functions should only have tensors as arguments.
                # This lambda pre-populates params so that metric_fn is
                # TPUEstimator compliant.
                metric_fn = lambda logits, labels: (metrics.get_eval_metrics(
                    logits, labels, params=params))
                eval_metrics = (metric_fn, [logits, labels])
                return tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    predictions={"predictions": logits},
                    eval_metrics=eval_metrics)
            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                predictions={"predictions": logits},
                eval_metric_ops=metrics.get_eval_metrics(
                    logits, labels, params))
        else:
            train_op, metric_dict = get_train_op_and_metrics(loss, params)

            # Epochs can be quite long. This gives some intermediate information
            # in TensorBoard.
            metric_dict["minibatch_loss"] = loss
            if params["use_tpu"]:
                return tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    host_call=tpu_util.construct_scalar_host_call(
                        metric_dict=metric_dict,
                        model_dir=params["model_dir"],
                        prefix="training/"))
            ####domyoung 2019.10.1####
            #record_scalars(metric_dict)
            for key, value in metric_dict.items():
                tf.summary.scalar(name=key, tensor=value)
                tf.logging.info(key)
            summary_hook = tf.train.SummarySaverHook(
                save_steps=20,
                output_dir=params["model_dir"],
                summary_op=tf.summary.merge_all())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op,
                                              training_hooks=[summary_hook])
Example #9
0
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  with tf.variable_scope("model"):
    inputs, targets = features, labels

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    logits = model(inputs, targets)

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      if params["use_tpu"]:
        raise NotImplementedError("Prediction is not yet supported on TPUs.")
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions=logits,
          export_outputs={
              "translate": tf.estimator.export.PredictOutput(logits)
          })

    # Explicitly set the shape of the logits for XLA (TPU). This is needed
    # because the logits are passed back to the host VM CPU for metric
    # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
    # it is known from Transformer that the first two dimensions of logits
    # are the dimensions of targets. Note that the ambiguous shape of logits is
    # not a problem when computing xentropy, because padded_cross_entropy_loss
    # resolves the shape on the TPU.
    logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

    # Calculate model loss.
    # xentropy contains the cross entropy loss of every nonpadding token in the
    # targets.
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params["label_smoothing"], params["vocab_size"])
    loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

    # Save loss as named tensor that will be logged with the logging hook.
    tf.identity(loss, "cross_entropy")

    if mode == tf.estimator.ModeKeys.EVAL:
      if params["use_tpu"]:
        # host call functions should only have tensors as arguments.
        # This lambda pre-populates params so that metric_fn is
        # TPUEstimator compliant.
        metric_fn = lambda logits, labels: (
            metrics.get_eval_metrics(logits, labels, params=params))
        eval_metrics = (metric_fn, [logits, labels])
        return tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=loss, predictions={"predictions": logits},
            eval_metrics=eval_metrics)
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op, metric_dict = get_train_op_and_metrics(loss, params)

      # Epochs can be quite long. This gives some intermediate information
      # in TensorBoard.
      metric_dict["minibatch_loss"] = loss
      if params["use_tpu"]:
        return tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=loss, train_op=train_op,
            host_call=tpu_util.construct_scalar_host_call(
                metric_dict=metric_dict, model_dir=params["model_dir"],
                prefix="training/")
        )
      record_scalars(metric_dict)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    with tf.variable_scope("model"):
        inputs, targets = features, labels

        # Create model and get output logits.
        train = (mode == tf.estimator.ModeKeys.TRAIN)
        #model = transformer.Transformer(params, train)
        #model = transformer2.Transformer(params, train)
        model = transformer3.Transformer(params, train)

        logits, latent_sample, prior_mu, prior_logvar, recog_mu, recog_logvar = model(
            inputs, targets)
        # debug
        #print('latent_sample.shape', tf.shape(latent_sample))
        #print('latent_sample.shape', latent_sample.shape[-1].value)
        #exit()

        # When in prediction mode, the labels/targets is None. The model output
        # is the prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            if params["use_tpu"]:
                raise NotImplementedError(
                    "Prediction is not yet supported on TPUs.")
            return tf.estimator.EstimatorSpec(
                tf.estimator.ModeKeys.PREDICT,
                predictions=logits,
                export_outputs={
                    "translate": tf.estimator.export.PredictOutput(logits)
                })

        # Explicitly set the shape of the logits for XLA (TPU). This is needed
        # because the logits are passed back to the host VM CPU for metric
        # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
        # it is known from Transformer that the first two dimensions of logits
        # are the dimensions of targets. Note that the ambiguous shape of logits is
        # not a problem when computing xentropy, because padded_cross_entropy_loss
        # resolves the shape on the TPU.
        logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

        # Calculate model loss.
        # xentropy contains the cross entropy loss of every nonpadding token in the
        # targets.
        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits, targets, params["label_smoothing"], params["vocab_size"])
        # size:
        #   xentropy: [batch_size, max(length_logits, length_labels)]
        #   weights:  [batch_size, max(length_logits, length_labels)], 0 or 1

        #loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        real_batch_size = tf.to_float(tf.shape(logits)[0])  # get batch_size

        if params["word_avg"]:
            predict_loss_avg_in_sentence = tf.reduce_sum(
                xentropy, axis=1) / tf.reduce_sum(weights, axis=1)
            predict_loss = tf.reduce_sum(
                predict_loss_avg_in_sentence) / real_batch_size
            # 1.first average in sentence by word;
            # 2.then average in batch by sample.
        else:
            predict_loss = tf.reduce_sum(xentropy) / real_batch_size

        if train:  # train mode
            # if use gaussian_kld_v2, the meaning of 'logvar' becomes standard deviation.
            if params["use_std"]:
                kl_loss = gaussian_kld_v2(recog_mu, recog_logvar, prior_mu,
                                          prior_logvar)
            else:
                kl_loss = gaussian_kld(recog_mu, recog_logvar, prior_mu,
                                       prior_logvar)
            kl_loss = tf.reduce_sum(kl_loss) / real_batch_size
            tf.identity(kl_loss, "kl_loss")
            # annealing
            if params["kl_weight"] == 'sigmoid':
                scaled_x = (tf.to_float(tf.train.get_or_create_global_step()) /
                            params["full_kl_steps"] -
                            0.5) * 20.0  # sigmoid weight
                kl_loss_weight = 1.0 / (1 + tf.exp(-scaled_x))
            elif params["kl_weight"] == 'linear':
                kl_loss_weights = tf.minimum(
                    (tf.to_float(tf.train.get_or_create_global_step()) /
                     params["full_kl_steps"]), 1.0)  # linear weight
            else:
                kl_loss_weight = 1.0

            weighted_kl_loss = kl_loss * kl_loss_weight
            tf.identity(weighted_kl_loss, "weighted_kl_loss")
            tf.identity(kl_loss_weight, "kl_loss_weight")
            if params["use_bow"]:
                bow_loss = compute_bow_loss(latent_sample, targets, params,
                                            train)
                loss = predict_loss + weighted_kl_loss + bow_loss
                tf.identity(bow_loss, "bow_loss")  # total loss
                #TENSORS_TO_LOG["bow_loss"] = "model/bow_loss"
            else:
                loss = predict_loss + weighted_kl_loss
        else:  # eval and infer modes
            loss = predict_loss

        # Save loss as named tensor that will be logged with the logging hook.
        tf.identity(predict_loss, "predict_loss")
        tf.identity(loss, "cross_entropy")  # total loss

        if mode == tf.estimator.ModeKeys.EVAL:
            if params["use_tpu"]:
                # host call functions should only have tensors as arguments.
                # functools.partial() pre-populates params so that metric_fn is
                # TPUEstimator compliant.
                metric_fn = functools.partial(metrics.get_eval_metrics,
                                              params=params)
                eval_metrics = (metric_fn, [logits, labels])
                return tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    predictions={"predictions": logits},
                    eval_metrics=eval_metrics)
            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                predictions={"predictions": logits},
                eval_metric_ops=metrics.get_eval_metrics(
                    logits, labels, params))
        else:
            train_op, metric_dict = get_train_op_and_metrics(loss, params)

            # Epochs can be quite long. This gives some intermediate information
            # in TensorBoard.
            #metric_dict["minibatch_loss"] = loss
            metric_dict["predict_loss"] = predict_loss
            metric_dict["kl_loss"] = kl_loss
            if params["use_bow"]:
                metric_dict["bow_loss"] = bow_loss
            if params["kl_weight"]:
                metric_dict["weighted_kl_loss"] = weighted_kl_loss
                metric_dict["kl_loss_weight"] = kl_loss_weight

            if params["use_tpu"]:
                return tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    host_call=tpu_util.construct_scalar_host_call(
                        metric_dict=metric_dict,
                        model_dir=params["model_dir"],
                        prefix="training/"))
            record_scalars(metric_dict)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)
Example #11
0
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  #tf.set_random_seed(1367)
  with tf.variable_scope("model"):
    inputs, targets = features, labels
    concrete_loss = tf.constant(0)
    total_loss = tf.constant(0)
    concrete_reg = tf.constant(0)
    sparsity_rate = tf.constant(0)
    gate_values = tf.constant(0)
    # =================== For concrete gates ==================================
    print("**** concrete heads has this : {} ****".format(params["concrete_heads"]))
    if not params["concrete_coef"] == 0:
        tf.get_default_graph().clear_collection("CONCRETE")
        tf.get_default_graph().clear_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    # =========================================================================

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    logits = model(inputs, targets)
    #print('logits')
    #print(len(logits))

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      if params["use_tpu"]:
        raise NotImplementedError("Prediction is not yet supported on TPUs.")
      print ("Logits", logits)
      #print (logits["attn_weights"], tf.transpose(tf.stack(logits["attn_weights"]).get_shape(), perm=[1,0,2,3,4]))
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions={"outputs": logits["outputs"], "scores": logits["scores"]})
          #export_outputs={
          #    "translate": tf.estimator.export.PredictOutput(logits["outputs"])
          #})

    # Explicitly set the shape of the logits for XLA (TPU). This is needed
    # because the logits are passed back to the host VM CPU for metric
    # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
    # it is known from Transformer that the first two dimensions of logits
    # are the dimensions of targets. Note that the ambiguous shape of logits is
    # not a problem when computing xentropy, because padded_cross_entropy_loss
    # resolves the shape on the TPU.
    logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

    # Calculate model loss.
    # xentropy contains the cross entropy loss of every nonpadding token in the
    # targets.
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params["label_smoothing"], params["vocab_size"])
    loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

    # Save loss as named tensor that will be logged with the logging hook.
    tf.identity(loss, "cross_entropy")

    # ============ Loss for concrete gates =================
    if not params["concrete_coef"] == 0:
        concrete_coef = params["concrete_coef"]
        sparsity_rate = tf.reduce_mean(tf.get_collection("CONCRETE"))
        concrete_reg = tf.reduce_mean(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        concrete_loss = concrete_coef * tf.reduce_mean(concrete_reg)
        
        total_loss = loss + concrete_loss

        gate_values = tf.get_collection("GATEVALUES")
        
        tf.identity(concrete_loss, "concrete_loss")
        tf.identity(total_loss, "total_loss")
        tf.identity(concrete_reg, "concrete_reg")
        tf.identity(sparsity_rate, "sparsity_rate")
        tf.identity(gate_values, "gate_values")
        loss = total_loss
    else:
        tf.identity(concrete_loss, "concrete_loss")
        tf.identity(total_loss, "total_loss")
        tf.identity(concrete_reg, "concrete_reg")
        tf.identity(sparsity_rate, "sparsity_rate")
        tf.identity(gate_values, "gate_values")
    # =======================================================
    if mode == tf.estimator.ModeKeys.EVAL:
      if params["use_tpu"]:
        # host call functions should only have tensors as arguments.
        # This lambda pre-populates params so that metric_fn is
        # TPUEstimator compliant.
        metric_fn = lambda logits, labels: (
            metrics.get_eval_metrics(logits, labels, params=params))
        eval_metrics = (metric_fn, [logits, labels])
        return tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=loss, predictions={"predictions": logits},
            eval_metrics=eval_metrics)
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op, metric_dict = get_train_op_and_metrics(loss, params)

      # Epochs can be quite long. This gives some intermediate information
      # in TensorBoard.
      metric_dict["minibatch_loss"] = loss
      if params["use_tpu"]:
        return tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=loss, train_op=train_op,
            host_call=tpu_util.construct_scalar_host_call(
                metric_dict=metric_dict, model_dir=params["model_dir"],
                prefix="training/")
        )
      record_scalars(metric_dict)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
Example #12
0
def model_fn(features, mode, params):
    # 기존 dict 의 key,value 에 사용자 입력 값을 추가함
    # extend dict values to defaultdict
    _params = Transformer_params.copy()
    for k in params:
        v = params[k]
        _params[k] = v
    params = _params

    if mode == tf.estimator.ModeKeys.PREDICT: features['answer'] = None

    # define transformer
    transformer = Transformer(params, (mode == tf.estimator.ModeKeys.TRAIN))
    logits = transformer(features['question'], features['answer'])

    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:

        # 네트워크 출력 logits 와 실제 answer 간의 loss 를 계산
        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits, features['answer'], params["label_smoothing"],
            params["vocab_size"])
        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        # loss 를 minimize
        learning_rate = get_learning_rate(params['learning_rate'],
                                          params['hidden_size'],
                                          params['learning_rate_warmup_steps'])
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate,
            beta1=params['optimizer_adam_beta1'],
            beta2=params['optimizer_adam_beta2'],
            epsilon=params['optimizer_adam_epsilon'])
        train_op = optimizer.minimize(loss,
                                      global_step=tf.train.get_global_step())

        # 매 100번 마다 logitmax 과 answer 값을 보여줌
        logging_hook = tf.train.LoggingTensorHook(
            {
                "logitmax": tf.argmax(logits[0], -1),
                "answer": features['answer'][0]
            },
            every_n_iter=100)

        # 여러가지 metric 을 계산하여 보여줌 (accuracy, BLEU score, ..)
        eval_metric_ops = metrics.get_eval_metrics(logits, features['answer'],
                                                   params)
        tensors_to_log = {}
        for k in eval_metric_ops:
            tensors_to_log[k.split('/')[-1]] = eval_metric_ops[k][1].name
            tf.summary.scalar(k.split('/')[-1], eval_metric_ops[k][1])

        tensors_to_log = {'learning_rate': learning_rate}
        tf.summary.scalar('learning_rate', learning_rate)

        train_hooks = hooks_helper.get_train_hooks(
            ['LoggingTensorHook'],
            model_dir=params['model_dir'],
            tensors_to_log=tensors_to_log,
            batch_size=params['batch_size'],
            use_tpu=params["use_tpu"])
        # train
        if mode == tf.estimator.ModeKeys.TRAIN:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op,
                                              predictions=logits,
                                              training_hooks=[logging_hook] +
                                              train_hooks,
                                              eval_metric_ops=eval_metric_ops)
        # evaluate
        elif mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              predictions=logits,
                                              eval_metric_ops=eval_metric_ops)
    # predict
    else:
        # predict 시에도 summary 저장
        summary_hook = tf.train.SummarySaverHook(
            save_secs=1000,
            output_dir='./output/ckpt/pred',
            scaffold=tf.train.Scaffold(summary_op=tf.summary.merge_all()))

        return tf.estimator.EstimatorSpec(mode,
                                          predictions=logits,
                                          prediction_hooks=[summary_hook])