def _evaluate_distributed(self, dataset): if hasattr(self.model, "targets"): model_targets = self.model.targets else: model_targets = self.model._targets return evaluate_string_metrics(sess=K.get_session(), string_metrics=self.metrics_names, dataset=dataset, inputs=self.model.inputs + model_targets, targets=model_targets, outputs=self.model.outputs, loss=self.model.total_loss)
def evaluate(self, input_fn, eval_methods, steps=None, checkpoint_path=None): """Evaluates the model given evaluation data `input_fn`. :param input_fn: A function that constructs the input data for evaluation. The function should construct and return one of the following: * A `TFDataset` object, each elements of which is a tuple `(features, labels)`. * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple `(features, labels)` with same constraints as below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. :param eval_methods: a list of strings to specify the evaluation metrics to be used in this model :param steps: Number of steps for which to evaluate model. :param checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, evaluation is run with newly initialized `Variables` instead of ones restored from checkpoint. Returns: A dict containing the evaluation metrics specified in `model_fn` keyed by name. """ if not all( isinstance(metric, six.string_types) for metric in eval_methods): raise ValueError("All metrics should be string types") from tensorflow_estimator.python.estimator.canned import prediction_keys import tensorflow as tf with tf.Graph().as_default() as g: result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.EVAL) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, result.label_tensors, tf.estimator.ModeKeys.EVAL, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) if isinstance(spec.predictions, dict): if "mae" in eval_methods: key = prediction_keys.PredictionKeys.PREDICTIONS msg = "{} is required for evaluating mse,".format(key) + \ " please add it in your model_fn predictions" assert key in spec.prediction, msg outputs = [ spec.predictions[ prediction_keys.PredictionKeys.PREDICTIONS] ] else: key = prediction_keys.PredictionKeys.LOGITS msg = "{} is required in for evaluating,".format(key) + \ " please add it in your model_fn predictions" assert key in spec.predictions, msg outputs = [ spec.predictions[ prediction_keys.PredictionKeys.LOGITS] ] else: outputs = nest.flatten(spec.predictions) if len(outputs) > 1: raise Exception( "Evaluate on more than one output is not " + "supported now") all_inputs = result._original_tensors if isinstance(all_inputs, tuple) and len(all_inputs) == 2: targets = nest.flatten(all_inputs[1]) else: targets = None return evaluate_string_metrics( sess=sess, string_metrics=eval_methods, dataset=result, inputs=nest.flatten(all_inputs), targets=targets, outputs=outputs, loss=spec.loss) return self.estimator.evaluate(input_fn, steps, checkpoint_path=checkpoint_path)