def testEvaluationJob(self): model_version = 1 total_tasks = 5 latest_chkp_version = 2 job = _EvaluationJob(model_version, total_tasks) self.assertEqual(0, job._completed_tasks) self.assertFalse(job.finished()) self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) # Now make 4 tasks finished for i in range(4): job.complete_task() self.assertEqual(4, job._completed_tasks) self.assertFalse(job.finished()) self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) # One more task finishes job.complete_task() self.assertEqual(5, job._completed_tasks) self.assertTrue(job.finished()) self.assertTrue(self.ok_to_new_job(job, latest_chkp_version)) # No new model checkpoint latest_chkp_version = job.model_version self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) latest_chkp_version = job.model_version + 1 self.assertTrue(self.ok_to_new_job(job, latest_chkp_version)) # At the beginning, no metrics self.assertFalse(job._evaluation_metrics) # Start to report metrics evaluation_version = job.model_version + 1 evaluation_metrics = { "mse": ndarray_to_tensor(np.array([100, 200], dtype=np.float32)) } self.assertFalse( job.report_evaluation_metrics( evaluation_version, evaluation_metrics ) ) self.assertFalse(job._evaluation_metrics) evaluation_version = job.model_version self.assertTrue( job.report_evaluation_metrics( evaluation_version, evaluation_metrics ) ) # One more evaluation_metrics = { "mse": ndarray_to_tensor(np.array([300, 400], dtype=np.float32)) } job.report_evaluation_metrics(evaluation_version, evaluation_metrics) self.assertTrue( np.array_equal( np.array([200, 300], dtype=np.float32), job.get_evaluation_summary().get("mse"), ) )
def testEvaluationJob(self): model_version = 1 total_tasks = 5 latest_chkp_version = 2 job = _EvaluationJob(_eval_metrics_fn(), model_version, total_tasks) self.assertEqual(0, job._completed_tasks) self.assertFalse(job.finished()) self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) # Now make 4 tasks finished for i in range(4): job.complete_task() self.assertEqual(4, job._completed_tasks) self.assertFalse(job.finished()) self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) # One more task finishes job.complete_task() self.assertEqual(5, job._completed_tasks) self.assertTrue(job.finished()) self.assertTrue(self.ok_to_new_job(job, latest_chkp_version)) # No new model checkpoint latest_chkp_version = job.model_version self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) latest_chkp_version = job.model_version + 1 self.assertTrue(self.ok_to_new_job(job, latest_chkp_version)) model_outputs = [ Tensor( np.array([[1], [6], [3]], np.float32), name=MetricsDictKey.MODEL_OUTPUT, ).to_tensor_pb() ] labels = Tensor(np.array([[1], [0], [3]], np.float32)).to_tensor_pb() job.report_evaluation_metrics(model_outputs, labels) job.report_evaluation_metrics( [ Tensor( np.array([[4], [5], [6], [7], [8]], np.float32), name=MetricsDictKey.MODEL_OUTPUT, ).to_tensor_pb() ], Tensor(np.array([[7], [8], [9], [10], [11]], np.float32)).to_tensor_pb(), ) expected_acc = 0.25 evaluation_metrics = job.get_evaluation_summary() self.assertAlmostEqual(expected_acc, evaluation_metrics.get("acc").numpy()) self.assertAlmostEqual(expected_acc, evaluation_metrics.get("acc_fn").numpy()) self.assertAlmostEqual(10.125, evaluation_metrics.get("mse").numpy())