def test_update_metric_by_small_chunks(self): labels = np.random.randint(0, 2, 1234) preds = np.random.random(1234) auc = tf.keras.metrics.AUC() auc.update_state(labels, preds) auc_value_0 = auc.result() auc.reset_states() EvaluationMetrics._update_metric_by_small_chunk(auc, labels, preds) auc_value_1 = auc.result() self.assertEquals(auc_value_0, auc_value_1)
def _evaluate(self, dataset): if dataset is None: logger.info("No validation dataset is configured") return eval_metrics = EvaluationMetrics(self.eval_metrics_fn()) for features, labels in dataset: outputs = self.model_inst.call(features) if not isinstance(outputs, dict): outputs = {MetricsDictKey.MODEL_OUTPUT: outputs} eval_metrics.update_evaluation_metrics(outputs, labels) metrics = eval_metrics.get_evaluation_summary() logger.info("Evaluation metrics : {}".format(metrics)) return metrics
def __init__(self, metrics_dict, model_version, total_tasks=-1): """ Args: metrics_dict: A python dictionary. If model has only one output, `metrics_dict` is a dictionary of `{metric_name: metric}`, i.e. `{"acc": tf.keras.metrics.Accuracy()}`. If model has multiple outputs, `metric_dict` is a dictionary of `{output_name: {metric_name: metric}}`, i.e. `{ "output_a": {"acc": tf.keras.metrics.Accuracy()}, "output_b": {"auc": tf.keras.metrics.AUC()}, }`. Note that for model with multiple outputs, each metric only uses one output. model_version: The version of the model to be evaluated. total_tasks: The number of evaluation tasks. """ self.model_version = model_version self._total_tasks = total_tasks self._completed_tasks = 0 self.evaluation_metrics = EvaluationMetrics(metrics_dict)
class EvaluationJob(object): """Representation of an evaluation job""" def __init__(self, metrics_dict, model_version, total_tasks=-1): """ Args: metrics_dict: A python dictionary. If model has only one output, `metrics_dict` is a dictionary of `{metric_name: metric}`, i.e. `{"acc": tf.keras.metrics.Accuracy()}`. If model has multiple outputs, `metric_dict` is a dictionary of `{output_name: {metric_name: metric}}`, i.e. `{ "output_a": {"acc": tf.keras.metrics.Accuracy()}, "output_b": {"auc": tf.keras.metrics.AUC()}, }`. Note that for model with multiple outputs, each metric only uses one output. model_version: The version of the model to be evaluated. total_tasks: The number of evaluation tasks. """ self.model_version = model_version self._total_tasks = total_tasks self._completed_tasks = 0 self.evaluation_metrics = EvaluationMetrics(metrics_dict) def complete_task(self): self._completed_tasks += 1 def finished(self): return self._completed_tasks >= self._total_tasks def report_evaluation_metrics(self, model_outputs_pb, labels): labels = pb_to_ndarray(labels) model_outputs = {} for name, tensor_pb in model_outputs_pb.items(): model_outputs[name] = pb_to_ndarray(tensor_pb) self.evaluation_metrics.update_evaluation_metrics( model_outputs, labels )