Exemple #1
0
    def testEvaluationJob(self):
        model_version = 1
        total_tasks = 5
        latest_chkp_version = 2
        job = _EvaluationJob(model_version, total_tasks)
        self.assertEqual(0, job._completed_tasks)
        self.assertFalse(job.finished())
        self.assertFalse(self.ok_to_new_job(job, latest_chkp_version))

        # Now make 4 tasks finished
        for i in range(4):
            job.complete_task()
        self.assertEqual(4, job._completed_tasks)
        self.assertFalse(job.finished())
        self.assertFalse(self.ok_to_new_job(job, latest_chkp_version))

        # One more task finishes
        job.complete_task()
        self.assertEqual(5, job._completed_tasks)
        self.assertTrue(job.finished())
        self.assertTrue(self.ok_to_new_job(job, latest_chkp_version))

        # No new model checkpoint
        latest_chkp_version = job.model_version
        self.assertFalse(self.ok_to_new_job(job, latest_chkp_version))
        latest_chkp_version = job.model_version + 1
        self.assertTrue(self.ok_to_new_job(job, latest_chkp_version))

        # At the beginning, no metrics
        self.assertFalse(job._evaluation_metrics)

        # Start to report metrics
        evaluation_version = job.model_version + 1
        evaluation_metrics = {
            "mse": ndarray_to_tensor(np.array([100, 200], dtype=np.float32))
        }
        self.assertFalse(
            job.report_evaluation_metrics(
                evaluation_version, evaluation_metrics
            )
        )
        self.assertFalse(job._evaluation_metrics)
        evaluation_version = job.model_version
        self.assertTrue(
            job.report_evaluation_metrics(
                evaluation_version, evaluation_metrics
            )
        )
        # One more
        evaluation_metrics = {
            "mse": ndarray_to_tensor(np.array([300, 400], dtype=np.float32))
        }
        job.report_evaluation_metrics(evaluation_version, evaluation_metrics)
        self.assertTrue(
            np.array_equal(
                np.array([200, 300], dtype=np.float32),
                job.get_evaluation_summary().get("mse"),
            )
        )
    def testEvaluationJob(self):
        model_version = 1
        total_tasks = 5
        latest_chkp_version = 2
        job = _EvaluationJob(_eval_metrics_fn(), model_version, total_tasks)
        self.assertEqual(0, job._completed_tasks)
        self.assertFalse(job.finished())
        self.assertFalse(self.ok_to_new_job(job, latest_chkp_version))

        # Now make 4 tasks finished
        for i in range(4):
            job.complete_task()
        self.assertEqual(4, job._completed_tasks)
        self.assertFalse(job.finished())
        self.assertFalse(self.ok_to_new_job(job, latest_chkp_version))

        # One more task finishes
        job.complete_task()
        self.assertEqual(5, job._completed_tasks)
        self.assertTrue(job.finished())
        self.assertTrue(self.ok_to_new_job(job, latest_chkp_version))

        # No new model checkpoint
        latest_chkp_version = job.model_version
        self.assertFalse(self.ok_to_new_job(job, latest_chkp_version))
        latest_chkp_version = job.model_version + 1
        self.assertTrue(self.ok_to_new_job(job, latest_chkp_version))

        model_outputs = [
            Tensor(
                np.array([[1], [6], [3]], np.float32),
                name=MetricsDictKey.MODEL_OUTPUT,
            ).to_tensor_pb()
        ]
        labels = Tensor(np.array([[1], [0], [3]], np.float32)).to_tensor_pb()
        job.report_evaluation_metrics(model_outputs, labels)
        job.report_evaluation_metrics(
            [
                Tensor(
                    np.array([[4], [5], [6], [7], [8]], np.float32),
                    name=MetricsDictKey.MODEL_OUTPUT,
                ).to_tensor_pb()
            ],
            Tensor(np.array([[7], [8], [9], [10], [11]],
                            np.float32)).to_tensor_pb(),
        )
        expected_acc = 0.25
        evaluation_metrics = job.get_evaluation_summary()
        self.assertAlmostEqual(expected_acc,
                               evaluation_metrics.get("acc").numpy())
        self.assertAlmostEqual(expected_acc,
                               evaluation_metrics.get("acc_fn").numpy())
        self.assertAlmostEqual(10.125, evaluation_metrics.get("mse").numpy())