def test_update_metric_by_small_chunks(self):
        labels = np.random.randint(0, 2, 1234)
        preds = np.random.random(1234)
        auc = tf.keras.metrics.AUC()
        auc.update_state(labels, preds)
        auc_value_0 = auc.result()

        auc.reset_states()
        EvaluationMetrics._update_metric_by_small_chunk(auc, labels, preds)
        auc_value_1 = auc.result()
        self.assertEquals(auc_value_0, auc_value_1)
Esempio n. 2
0
 def _evaluate(self, dataset):
     if dataset is None:
         logger.info("No validation dataset is configured")
         return
     eval_metrics = EvaluationMetrics(self.eval_metrics_fn())
     for features, labels in dataset:
         outputs = self.model_inst.call(features)
         if not isinstance(outputs, dict):
             outputs = {MetricsDictKey.MODEL_OUTPUT: outputs}
         eval_metrics.update_evaluation_metrics(outputs, labels)
     metrics = eval_metrics.get_evaluation_summary()
     logger.info("Evaluation metrics : {}".format(metrics))
     return metrics
Esempio n. 3
0
    def __init__(self, metrics_dict, model_version, total_tasks=-1):
        """
        Args:
            metrics_dict: A python dictionary. If model has only one output,
                `metrics_dict` is a dictionary of `{metric_name: metric}`,
                i.e. `{"acc": tf.keras.metrics.Accuracy()}`.
                If model has multiple outputs, `metric_dict` is a dictionary of
                `{output_name: {metric_name: metric}}`,
                i.e. `{
                    "output_a": {"acc": tf.keras.metrics.Accuracy()},
                    "output_b": {"auc": tf.keras.metrics.AUC()},
                }`. Note that for model with multiple outputs, each metric
                only uses one output.
            model_version: The version of the model to be evaluated.
            total_tasks: The number of evaluation tasks.
        """

        self.model_version = model_version
        self._total_tasks = total_tasks
        self._completed_tasks = 0
        self.evaluation_metrics = EvaluationMetrics(metrics_dict)
Esempio n. 4
0
class EvaluationJob(object):
    """Representation of an evaluation job"""

    def __init__(self, metrics_dict, model_version, total_tasks=-1):
        """
        Args:
            metrics_dict: A python dictionary. If model has only one output,
                `metrics_dict` is a dictionary of `{metric_name: metric}`,
                i.e. `{"acc": tf.keras.metrics.Accuracy()}`.
                If model has multiple outputs, `metric_dict` is a dictionary of
                `{output_name: {metric_name: metric}}`,
                i.e. `{
                    "output_a": {"acc": tf.keras.metrics.Accuracy()},
                    "output_b": {"auc": tf.keras.metrics.AUC()},
                }`. Note that for model with multiple outputs, each metric
                only uses one output.
            model_version: The version of the model to be evaluated.
            total_tasks: The number of evaluation tasks.
        """

        self.model_version = model_version
        self._total_tasks = total_tasks
        self._completed_tasks = 0
        self.evaluation_metrics = EvaluationMetrics(metrics_dict)

    def complete_task(self):
        self._completed_tasks += 1

    def finished(self):
        return self._completed_tasks >= self._total_tasks

    def report_evaluation_metrics(self, model_outputs_pb, labels):
        labels = pb_to_ndarray(labels)
        model_outputs = {}
        for name, tensor_pb in model_outputs_pb.items():
            model_outputs[name] = pb_to_ndarray(tensor_pb)
        self.evaluation_metrics.update_evaluation_metrics(
            model_outputs, labels
        )