def model_fn(features, labels, mode, params): """Defines how to train, evaluate and predict from the transformer model.""" with tf.variable_scope("model"): inputs, targets = features, labels # Create model and get output logits. model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN) output = model(inputs, targets) # When in prediction mode, the labels/targets is None. The model output # is the prediction if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( tf.estimator.ModeKeys.PREDICT, predictions=output) logits = output # Calculate model loss. xentropy, weights = metrics.padded_cross_entropy_loss( logits, targets, params.label_smoothing, params.vocab_size) loss = tf.reduce_sum(xentropy * weights) / tf.reduce_sum(weights) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metric_ops=metrics.get_eval_metrics(logits, labels, params)) else: train_op = get_train_op(loss, params) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def compute_bow_loss(latent_sample, targets, params, train): """ Args: latent_variable: size [batch_size, hidden_size] targets: size [batch_size, length] """ with tf.variable_scope("bow_decoder"): # feed forward bow_ffn_layer = ffn_layer.FeedFowardNetwork( params["latent_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], output_size=params["vocab_size"], activation=tf.nn.relu) expd_lv = tf.expand_dims(latent_sample, axis=1) # get [batch_size, 1, hidden_size] bow_logits = bow_ffn_layer( expd_lv, padding=None) # get [batch_size, 1, vocab_size] length = tf.shape(targets)[1] tile_bow_logits = tf.tile( bow_logits, [1, length, 1]) # get [batch_size, length, vocab_size] # compute loss xentropy, weights = metrics.padded_cross_entropy_loss( tile_bow_logits, targets, params["label_smoothing"], params["vocab_size"]) # average first in sentence, then in batch bow_predict_loss = tf.reduce_sum(xentropy) return bow_predict_loss
def model_fn(features, labels, mode, params): """Defines how to train, evaluate and predict from the transformer model.""" with tf.variable_scope("model"): inputs, targets = features, labels # Create model and get output logits. model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN) output = model(inputs, targets) # When in prediction mode, the labels/targets is None. The model output # is the prediction if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( tf.estimator.ModeKeys.PREDICT, predictions=output) logits = output # Calculate model loss. xentropy, weights = metrics.padded_cross_entropy_loss( logits, targets, params.label_smoothing, params.vocab_size) loss = tf.reduce_sum(xentropy * weights) / tf.reduce_sum(weights) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metric_ops=metrics.get_eval_metrics(logits, labels, params)) else: train_op = get_train_op(loss, params) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def tf_loss_fn(logits, targets, label_smoothing, vocab_size): xentropy, weights = tf_metrics.padded_cross_entropy_loss( logits, targets, label_smoothing, vocab_size) # Compute the weighted mean of the cross entropy losses loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) return loss, xentropy, weights
def create_tower_network(model, params, features, labels): print("features print: ", features) print("labels print: ", labels) with tf.variable_scope('forward_and_backward', reuse=False): logits = model(features, labels) logits.set_shape(labels.shape.as_list() + logits.shape.as_list()[2:]) xentropy, weights = metrics.padded_cross_entropy_loss(logits, labels, params["label_smoothing"], params["vocab_size"]) loss = tf.reduce_sum(xentropy)/tf.reduce_sum(weights) return logits, loss
def model_fn(features, labels, mode, params): """Defines how to train, evaluate and predict from the transformer model.""" with tf.variable_scope("model"): inputs, targets = features, labels # Create model and get output logits. model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN) output = model(inputs, targets) # When in prediction mode, the labels/targets is None. The model output # is the prediction if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(tf.estimator.ModeKeys.PREDICT, predictions=output) logits = output # Calculate model loss. # xentropy contains the cross entropy loss of every nonpadding token in the # targets. xentropy, weights = metrics.padded_cross_entropy_loss( logits, targets, params.label_smoothing, params.vocab_size) # Compute the weighted mean of the cross entropy losses loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) # Save loss as named tensor that will be logged with the logging hook. tf.identity(loss, "cross_entropy") if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metric_ops=metrics.get_eval_metrics( logits, labels, params)) else: train_op = get_train_op(loss, params) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def _tower_fn(model, features, labels, params): logits = model(features, labels) xentropy, weights = metrics.padded_cross_entropy_loss( logits, labels, params["label_smoothing"], params["vocab_size"]) tower_loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) with tf.variable_scope("get_train_op"): learning_rate = get_learning_rate( learning_rate=params["learning_rate"], hidden_size=params["hidden_size"], learning_rate_warmup_steps=params["learning_rate_warmup_steps"]) optimizer = tf.contrib.opt.LazyAdamOptimizer( learning_rate, beta1=params["optimizer_adam_beta1"], beta2=params["optimizer_adam_beta2"], epsilon=params["optimizer_adam_epsilon"]) model_params = tf.trainable_variables() # tower_grad = optimizer.compute_gradients(tower_loss, model_params, colocate_gradients_with_ops=True) tower_grad = tf.gradients(tower_loss, model_params) return tower_loss, zip(tower_grad, model_params), logits
def model_fn(features, labels, mode, params): """Defines how to train, evaluate and predict from the transformer model.""" with tf.variable_scope("model"): inputs, targets = features, labels # Create model and get output logits. model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN) logits = model(inputs, targets) # When in prediction mode, the labels/targets is None. The model output # is the prediction if mode == tf.estimator.ModeKeys.PREDICT: if params["use_tpu"]: raise NotImplementedError( "Prediction is not yet supported on TPUs.") return tf.estimator.EstimatorSpec( tf.estimator.ModeKeys.PREDICT, predictions=logits, export_outputs={ "translate": tf.estimator.export.PredictOutput(logits) }) # Explicitly set the shape of the logits for XLA (TPU). This is needed # because the logits are passed back to the host VM CPU for metric # evaluation, and the shape of [?, ?, vocab_size] is too vague. However # it is known from Transformer that the first two dimensions of logits # are the dimensions of targets. Note that the ambiguous shape of logits is # not a problem when computing xentropy, because padded_cross_entropy_loss # resolves the shape on the TPU. logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:]) # Calculate model loss. # xentropy contains the cross entropy loss of every nonpadding token in the # targets. xentropy, weights = metrics.padded_cross_entropy_loss( logits, targets, params["label_smoothing"], params["vocab_size"]) loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) # Save loss as named tensor that will be logged with the logging hook. tf.identity(loss, "cross_entropy") if mode == tf.estimator.ModeKeys.EVAL: if params["use_tpu"]: # host call functions should only have tensors as arguments. # This lambda pre-populates params so that metric_fn is # TPUEstimator compliant. metric_fn = lambda logits, labels: (metrics.get_eval_metrics( logits, labels, params=params)) eval_metrics = (metric_fn, [logits, labels]) return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metrics=eval_metrics) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metric_ops=metrics.get_eval_metrics( logits, labels, params)) else: train_op, metric_dict = get_train_op_and_metrics(loss, params) # Epochs can be quite long. This gives some intermediate information # in TensorBoard. metric_dict["minibatch_loss"] = loss if params["use_tpu"]: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, host_call=tpu_util.construct_scalar_host_call( metric_dict=metric_dict, model_dir=params["model_dir"], prefix="training/")) ####domyoung 2019.10.1#### #record_scalars(metric_dict) for key, value in metric_dict.items(): tf.summary.scalar(name=key, tensor=value) tf.logging.info(key) summary_hook = tf.train.SummarySaverHook( save_steps=20, output_dir=params["model_dir"], summary_op=tf.summary.merge_all()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[summary_hook])
def model_fn(features, labels, mode, params): """Defines how to train, evaluate and predict from the transformer model.""" with tf.variable_scope("model"): inputs, targets = features, labels # Create model and get output logits. model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN) logits = model(inputs, targets) # When in prediction mode, the labels/targets is None. The model output # is the prediction if mode == tf.estimator.ModeKeys.PREDICT: if params["use_tpu"]: raise NotImplementedError("Prediction is not yet supported on TPUs.") return tf.estimator.EstimatorSpec( tf.estimator.ModeKeys.PREDICT, predictions=logits, export_outputs={ "translate": tf.estimator.export.PredictOutput(logits) }) # Explicitly set the shape of the logits for XLA (TPU). This is needed # because the logits are passed back to the host VM CPU for metric # evaluation, and the shape of [?, ?, vocab_size] is too vague. However # it is known from Transformer that the first two dimensions of logits # are the dimensions of targets. Note that the ambiguous shape of logits is # not a problem when computing xentropy, because padded_cross_entropy_loss # resolves the shape on the TPU. logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:]) # Calculate model loss. # xentropy contains the cross entropy loss of every nonpadding token in the # targets. xentropy, weights = metrics.padded_cross_entropy_loss( logits, targets, params["label_smoothing"], params["vocab_size"]) loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) # Save loss as named tensor that will be logged with the logging hook. tf.identity(loss, "cross_entropy") if mode == tf.estimator.ModeKeys.EVAL: if params["use_tpu"]: # host call functions should only have tensors as arguments. # This lambda pre-populates params so that metric_fn is # TPUEstimator compliant. metric_fn = lambda logits, labels: ( metrics.get_eval_metrics(logits, labels, params=params)) eval_metrics = (metric_fn, [logits, labels]) return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metrics=eval_metrics) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metric_ops=metrics.get_eval_metrics(logits, labels, params)) else: train_op, metric_dict = get_train_op_and_metrics(loss, params) # Epochs can be quite long. This gives some intermediate information # in TensorBoard. metric_dict["minibatch_loss"] = loss if params["use_tpu"]: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, host_call=tpu_util.construct_scalar_host_call( metric_dict=metric_dict, model_dir=params["model_dir"], prefix="training/") ) record_scalars(metric_dict) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): """Defines how to train, evaluate and predict from the transformer model.""" with tf.variable_scope("model"): inputs, targets = features, labels # Create model and get output logits. train = (mode == tf.estimator.ModeKeys.TRAIN) #model = transformer.Transformer(params, train) #model = transformer2.Transformer(params, train) model = transformer3.Transformer(params, train) logits, latent_sample, prior_mu, prior_logvar, recog_mu, recog_logvar = model( inputs, targets) # debug #print('latent_sample.shape', tf.shape(latent_sample)) #print('latent_sample.shape', latent_sample.shape[-1].value) #exit() # When in prediction mode, the labels/targets is None. The model output # is the prediction if mode == tf.estimator.ModeKeys.PREDICT: if params["use_tpu"]: raise NotImplementedError( "Prediction is not yet supported on TPUs.") return tf.estimator.EstimatorSpec( tf.estimator.ModeKeys.PREDICT, predictions=logits, export_outputs={ "translate": tf.estimator.export.PredictOutput(logits) }) # Explicitly set the shape of the logits for XLA (TPU). This is needed # because the logits are passed back to the host VM CPU for metric # evaluation, and the shape of [?, ?, vocab_size] is too vague. However # it is known from Transformer that the first two dimensions of logits # are the dimensions of targets. Note that the ambiguous shape of logits is # not a problem when computing xentropy, because padded_cross_entropy_loss # resolves the shape on the TPU. logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:]) # Calculate model loss. # xentropy contains the cross entropy loss of every nonpadding token in the # targets. xentropy, weights = metrics.padded_cross_entropy_loss( logits, targets, params["label_smoothing"], params["vocab_size"]) # size: # xentropy: [batch_size, max(length_logits, length_labels)] # weights: [batch_size, max(length_logits, length_labels)], 0 or 1 #loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) real_batch_size = tf.to_float(tf.shape(logits)[0]) # get batch_size if params["word_avg"]: predict_loss_avg_in_sentence = tf.reduce_sum( xentropy, axis=1) / tf.reduce_sum(weights, axis=1) predict_loss = tf.reduce_sum( predict_loss_avg_in_sentence) / real_batch_size # 1.first average in sentence by word; # 2.then average in batch by sample. else: predict_loss = tf.reduce_sum(xentropy) / real_batch_size if train: # train mode # if use gaussian_kld_v2, the meaning of 'logvar' becomes standard deviation. if params["use_std"]: kl_loss = gaussian_kld_v2(recog_mu, recog_logvar, prior_mu, prior_logvar) else: kl_loss = gaussian_kld(recog_mu, recog_logvar, prior_mu, prior_logvar) kl_loss = tf.reduce_sum(kl_loss) / real_batch_size tf.identity(kl_loss, "kl_loss") # annealing if params["kl_weight"] == 'sigmoid': scaled_x = (tf.to_float(tf.train.get_or_create_global_step()) / params["full_kl_steps"] - 0.5) * 20.0 # sigmoid weight kl_loss_weight = 1.0 / (1 + tf.exp(-scaled_x)) elif params["kl_weight"] == 'linear': kl_loss_weights = tf.minimum( (tf.to_float(tf.train.get_or_create_global_step()) / params["full_kl_steps"]), 1.0) # linear weight else: kl_loss_weight = 1.0 weighted_kl_loss = kl_loss * kl_loss_weight tf.identity(weighted_kl_loss, "weighted_kl_loss") tf.identity(kl_loss_weight, "kl_loss_weight") if params["use_bow"]: bow_loss = compute_bow_loss(latent_sample, targets, params, train) loss = predict_loss + weighted_kl_loss + bow_loss tf.identity(bow_loss, "bow_loss") # total loss #TENSORS_TO_LOG["bow_loss"] = "model/bow_loss" else: loss = predict_loss + weighted_kl_loss else: # eval and infer modes loss = predict_loss # Save loss as named tensor that will be logged with the logging hook. tf.identity(predict_loss, "predict_loss") tf.identity(loss, "cross_entropy") # total loss if mode == tf.estimator.ModeKeys.EVAL: if params["use_tpu"]: # host call functions should only have tensors as arguments. # functools.partial() pre-populates params so that metric_fn is # TPUEstimator compliant. metric_fn = functools.partial(metrics.get_eval_metrics, params=params) eval_metrics = (metric_fn, [logits, labels]) return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metrics=eval_metrics) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metric_ops=metrics.get_eval_metrics( logits, labels, params)) else: train_op, metric_dict = get_train_op_and_metrics(loss, params) # Epochs can be quite long. This gives some intermediate information # in TensorBoard. #metric_dict["minibatch_loss"] = loss metric_dict["predict_loss"] = predict_loss metric_dict["kl_loss"] = kl_loss if params["use_bow"]: metric_dict["bow_loss"] = bow_loss if params["kl_weight"]: metric_dict["weighted_kl_loss"] = weighted_kl_loss metric_dict["kl_loss_weight"] = kl_loss_weight if params["use_tpu"]: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, host_call=tpu_util.construct_scalar_host_call( metric_dict=metric_dict, model_dir=params["model_dir"], prefix="training/")) record_scalars(metric_dict) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): """Defines how to train, evaluate and predict from the transformer model.""" #tf.set_random_seed(1367) with tf.variable_scope("model"): inputs, targets = features, labels concrete_loss = tf.constant(0) total_loss = tf.constant(0) concrete_reg = tf.constant(0) sparsity_rate = tf.constant(0) gate_values = tf.constant(0) # =================== For concrete gates ================================== print("**** concrete heads has this : {} ****".format(params["concrete_heads"])) if not params["concrete_coef"] == 0: tf.get_default_graph().clear_collection("CONCRETE") tf.get_default_graph().clear_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # ========================================================================= # Create model and get output logits. model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN) logits = model(inputs, targets) #print('logits') #print(len(logits)) # When in prediction mode, the labels/targets is None. The model output # is the prediction if mode == tf.estimator.ModeKeys.PREDICT: if params["use_tpu"]: raise NotImplementedError("Prediction is not yet supported on TPUs.") print ("Logits", logits) #print (logits["attn_weights"], tf.transpose(tf.stack(logits["attn_weights"]).get_shape(), perm=[1,0,2,3,4])) return tf.estimator.EstimatorSpec( tf.estimator.ModeKeys.PREDICT, predictions={"outputs": logits["outputs"], "scores": logits["scores"]}) #export_outputs={ # "translate": tf.estimator.export.PredictOutput(logits["outputs"]) #}) # Explicitly set the shape of the logits for XLA (TPU). This is needed # because the logits are passed back to the host VM CPU for metric # evaluation, and the shape of [?, ?, vocab_size] is too vague. However # it is known from Transformer that the first two dimensions of logits # are the dimensions of targets. Note that the ambiguous shape of logits is # not a problem when computing xentropy, because padded_cross_entropy_loss # resolves the shape on the TPU. logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:]) # Calculate model loss. # xentropy contains the cross entropy loss of every nonpadding token in the # targets. xentropy, weights = metrics.padded_cross_entropy_loss( logits, targets, params["label_smoothing"], params["vocab_size"]) loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) # Save loss as named tensor that will be logged with the logging hook. tf.identity(loss, "cross_entropy") # ============ Loss for concrete gates ================= if not params["concrete_coef"] == 0: concrete_coef = params["concrete_coef"] sparsity_rate = tf.reduce_mean(tf.get_collection("CONCRETE")) concrete_reg = tf.reduce_mean(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) concrete_loss = concrete_coef * tf.reduce_mean(concrete_reg) total_loss = loss + concrete_loss gate_values = tf.get_collection("GATEVALUES") tf.identity(concrete_loss, "concrete_loss") tf.identity(total_loss, "total_loss") tf.identity(concrete_reg, "concrete_reg") tf.identity(sparsity_rate, "sparsity_rate") tf.identity(gate_values, "gate_values") loss = total_loss else: tf.identity(concrete_loss, "concrete_loss") tf.identity(total_loss, "total_loss") tf.identity(concrete_reg, "concrete_reg") tf.identity(sparsity_rate, "sparsity_rate") tf.identity(gate_values, "gate_values") # ======================================================= if mode == tf.estimator.ModeKeys.EVAL: if params["use_tpu"]: # host call functions should only have tensors as arguments. # This lambda pre-populates params so that metric_fn is # TPUEstimator compliant. metric_fn = lambda logits, labels: ( metrics.get_eval_metrics(logits, labels, params=params)) eval_metrics = (metric_fn, [logits, labels]) return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metrics=eval_metrics) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, predictions={"predictions": logits}, eval_metric_ops=metrics.get_eval_metrics(logits, labels, params)) else: train_op, metric_dict = get_train_op_and_metrics(loss, params) # Epochs can be quite long. This gives some intermediate information # in TensorBoard. metric_dict["minibatch_loss"] = loss if params["use_tpu"]: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, host_call=tpu_util.construct_scalar_host_call( metric_dict=metric_dict, model_dir=params["model_dir"], prefix="training/") ) record_scalars(metric_dict) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def model_fn(features, mode, params): # 기존 dict 의 key,value 에 사용자 입력 값을 추가함 # extend dict values to defaultdict _params = Transformer_params.copy() for k in params: v = params[k] _params[k] = v params = _params if mode == tf.estimator.ModeKeys.PREDICT: features['answer'] = None # define transformer transformer = Transformer(params, (mode == tf.estimator.ModeKeys.TRAIN)) logits = transformer(features['question'], features['answer']) if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: # 네트워크 출력 logits 와 실제 answer 간의 loss 를 계산 xentropy, weights = metrics.padded_cross_entropy_loss( logits, features['answer'], params["label_smoothing"], params["vocab_size"]) loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) # loss 를 minimize learning_rate = get_learning_rate(params['learning_rate'], params['hidden_size'], params['learning_rate_warmup_steps']) optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate, beta1=params['optimizer_adam_beta1'], beta2=params['optimizer_adam_beta2'], epsilon=params['optimizer_adam_epsilon']) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # 매 100번 마다 logitmax 과 answer 값을 보여줌 logging_hook = tf.train.LoggingTensorHook( { "logitmax": tf.argmax(logits[0], -1), "answer": features['answer'][0] }, every_n_iter=100) # 여러가지 metric 을 계산하여 보여줌 (accuracy, BLEU score, ..) eval_metric_ops = metrics.get_eval_metrics(logits, features['answer'], params) tensors_to_log = {} for k in eval_metric_ops: tensors_to_log[k.split('/')[-1]] = eval_metric_ops[k][1].name tf.summary.scalar(k.split('/')[-1], eval_metric_ops[k][1]) tensors_to_log = {'learning_rate': learning_rate} tf.summary.scalar('learning_rate', learning_rate) train_hooks = hooks_helper.get_train_hooks( ['LoggingTensorHook'], model_dir=params['model_dir'], tensors_to_log=tensors_to_log, batch_size=params['batch_size'], use_tpu=params["use_tpu"]) # train if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, predictions=logits, training_hooks=[logging_hook] + train_hooks, eval_metric_ops=eval_metric_ops) # evaluate elif mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, predictions=logits, eval_metric_ops=eval_metric_ops) # predict else: # predict 시에도 summary 저장 summary_hook = tf.train.SummarySaverHook( save_secs=1000, output_dir='./output/ckpt/pred', scaffold=tf.train.Scaffold(summary_op=tf.summary.merge_all())) return tf.estimator.EstimatorSpec(mode, predictions=logits, prediction_hooks=[summary_hook])