def test_score_matrix(self): t, t1, t2, m1, m2 = self.prep_models_and_tests() sm = t.judge(m1) self.assertRaises(TypeError, sm.__getitem__, 0) self.assertEqual(str(sm.get_group((t1, m1))), "Pass") self.assertEqual(str(sm.get_group((m1, t1))), "Pass") self.assertEqual(str(sm.get_group((m1.name, t1.name))), "Pass") self.assertEqual(str(sm.get_group((t1.name, m1.name))), "Pass") self.assertRaises(TypeError, sm.get_group, (0, 0)) self.assertRaises(KeyError, sm.get_by_name, "This name does not exist") self.assertIsInstance(sm.__getattr__("score"), DataFrame) self.assertIsInstance(sm.norm_scores, DataFrame) self.assertIsInstance(sm.T, ScoreMatrix) self.assertIsInstance(sm.to_html(True, True, True), str) self.assertIsInstance(sm.to_html(), str) self.assertTrue(type(sm) is ScoreMatrix) self.assertTrue(sm[t1][m1].score) self.assertTrue(sm["test1"][m1].score) self.assertTrue(sm[m1]["test1"].score) self.assertFalse(sm[t2][m1].score) self.assertEqual(sm[(m1, t1)].score, True) self.assertEqual(sm[(m1, t2)].score, False) sm = t.judge([m1, m2]) self.assertEqual(sm.stature(t1, m1), 1) self.assertEqual(sm.stature(t1, m2), 2) display(sm) ######### m2m ################# t1.observation = [2, 3] smm2m = ScoreMatrixM2M(test=t1, models=[m1], scores=[[Score(1), Score(1)], [Score(1), Score(1)]]) self.assertIsInstance(smm2m.__getattr__("score"), DataFrame) self.assertIsInstance(smm2m.__getattr__("norm_scores"), DataFrame) self.assertIsInstance(smm2m.__getattr__("related_data"), DataFrame) self.assertRaises(KeyError, smm2m.get_by_name, "Not Exist") self.assertIsInstance(smm2m.norm_scores, DataFrame) self.assertRaises(KeyError, smm2m.get_by_name, "Not Exist") self.assertRaises(TypeError, smm2m.get_group, [0]) self.assertIsInstance(smm2m.get_group([m1.name, t1.name]), Score) self.assertEqual(smm2m.get_group([m1.name, t1.name]).score, 1) self.assertIsInstance(smm2m.get_group([m1, t1]), Score) self.assertEqual(smm2m.get_group([m1, t1]).score, 1)
def judge(self, models, skip_incapable=False, stop_on_error=True, deep_error=False): """Generate a score matrix for the provided model(s). Operates as follows: 1. Check if models have been specified as a list/tuple/set. If not, raise exception. 2. Create a list of predictions. If a test observation is provided, add it to predictions. 3. Checks if all models have all the required capabilities. If a model does not, then a CapabilityError is raised. 4. Calls generate_prediction to generate predictions for each model, and these are appeneded to the predictions list. 5. Generate a 2D list as a placeholder for all the scores. 6. Calls score_prediction to generate scores for each comparison. 7. Checks that the score is of score_type, raising an InvalidScoreError. 8. Equips the score with metadata: a) Reference(s) to the model(s), in attribute model1 (and model2). b) A reference to the test, in attribute test. c) A reference to the predictions, in attributes prediction1 and prediction2. 9. Returns the score as a Pandas DataFrame. If stop_on_error is true (default), exceptions propagate upward. If false, an ErrorScore is generated containing the exception. If deep_error is true (not default), the traceback will contain the actual code execution error, instead of the content of an ErrorScore. """ # 1. if not isinstance(models, (list, tuple, set)): raise TypeError(("Models must be specified as a list, tuple or " "set. For single model tests, use 'Test' class.")) else: models = list(models) # 2. predictions = [] # If observation exists, store it as first element in predictions[] if self.observation: predictions.append(self.observation) for model in models: if not isinstance(model, Model): raise TypeError(("TestM2M's judge method received a non-Model." "Invalid model name: '%s'" % model)) else: try: # 3. self.check_capabilities(model, skip_incapable=skip_incapable) # 4. prediction = self.generate_prediction(model) self.check_prediction(prediction) predictions.append(prediction) except CapabilityError as e: raise CapabilityError(model, e.capability, ("TestM2M's judge method resulted in" " error for '%s'. Error: '%s'" % (model, str(e)))) except Exception as e: raise Exception( ("TestM2M's judge method resulted in error" "for '%s'. Error: '%s'" % (model, str(e)))) # 5. 2D list for scores; num(rows) = num(cols) = num(predictions) scores = [[NoneScore for x in range(len(predictions))] for y in range(len(predictions))] for i in range(len(predictions)): for j in range(len(predictions)): if not self.observation: model1 = models[i] model2 = models[j] elif i == 0 and j == 0: model1 = None model2 = None elif i == 0: model1 = models[j - 1] model2 = None elif j == 0: model1 = models[i - 1] model2 = None else: model1 = models[i - 1] model2 = models[j - 1] scores[i][j] = self._judge(predictions[i], predictions[j], model1, model2) if isinstance(scores[i][j], ErrorScore) and stop_on_error: raise scores[i][j].score # An exception. # 9. from sciunit.scores.collections_m2m import ScoreMatrixM2M sm = ScoreMatrixM2M(self, models, scores=scores) return sm
def judge(self, models: List[Model], skip_incapable: bool = False, stop_on_error: bool = True, deep_error: bool = False, only_lower_triangle: bool = False) -> "ScoreMatrixM2M": """Generate a score matrix for the provided model(s). `only_lower_triangle`: Only compute the lower triangle (not include the diagonal) of this square ScoreMatrix and copy the other values across. Leave the diagonal blank. If False, compute all. Operates as follows: 1. Check if models have been specified as a list/tuple/set. If not, raise exception. 2. Create a list of predictions. If a test observation is provided, add it to predictions. 3. Checks if all models have all the required capabilities. If a model does not, then a CapabilityError is raised. 4. Calls generate_prediction to generate predictions for each model, and these are appeneded to the predictions list. 5. Generate a 2D list as a placeholder for all the scores. 6. Calls score_prediction to generate scores for each comparison. 7. Checks that the score is of score_type, raising an InvalidScoreError. 8. Equips the score with metadata: a) Reference(s) to the model(s), in attribute model1 (and model2). b) A reference to the test, in attribute test. c) A reference to the predictions, in attributes prediction1 and prediction2. 9. Returns the score as a Pandas DataFrame. If stop_on_error is true (default), exceptions propagate upward. If false, an ErrorScore is generated containing the exception. If deep_error is true (not default), the traceback will contain the actual code execution error, instead of the content of an ErrorScore. Args: models (List[Model]): A list of sciunit model instances. skip_incapable (bool, optional): Skip the incapable tests. Defaults to False. stop_on_error (bool, optional): Whether to stop on an error.. Defaults to True. deep_error (bool, optional): [description]. Defaults to False. only_lower_triangle (bool, optional): [description]. Defaults to False. Raises: TypeError: The `model` is not a sciunit model. Exception: TestM2M's judge method resulted in error. CapabilityError: Encounter capability error when checking the capabilities. Returns: ScoreMatrixM2M: The created ScoreMatrixM2M instance. """ # 1. if not isinstance(models, (list, tuple, set)): raise TypeError(("Models must be specified as a list, tuple or " "set. For single model tests, use 'Test' class.")) else: models = list(models) # 2. predictions = [] # If observation exists, store it as first element in predictions[] if self.observation: predictions.append(self.observation) for model in models: if not isinstance(model, Model): raise TypeError(("TestM2M's judge method received a non-Model." "Invalid model name: '%s'" % model)) else: try: # 3. self.check_capabilities(model, skip_incapable=skip_incapable) # 4. prediction = self.generate_prediction(model) self.check_prediction(prediction) predictions.append(prediction) except CapabilityError as e: raise CapabilityError(model, e.capability, ("TestM2M's judge method resulted in" " error for '%s'. Error: '%s'" % (model, str(e)))) except Exception as e: raise Exception( ("TestM2M's judge method resulted in error" "for '%s'. Error: '%s'" % (model, str(e)))) # 5. 2D list for scores; num(rows) = num(cols) = num(predictions) scores = [[NoneScore for x in range(len(predictions))] for y in range(len(predictions))] for i in range(len(predictions)): for j in range(len(predictions)): if not self.observation: model1 = models[i] model2 = models[j] elif i == 0 and j == 0: model1 = None model2 = None elif i == 0: model1 = models[j - 1] model2 = None elif j == 0: model1 = models[i - 1] model2 = None else: model1 = models[i - 1] model2 = models[j - 1] if i == j and only_lower_triangle: # Perfect score for self-comparison scores[i][j] = self.ace() elif i > j and only_lower_triangle: # Should already be computed earlier in this loop scores[i][j] = scores[j][i] else: scores[i][j] = self._judge(predictions[i], predictions[j], model1, model2) if isinstance(scores[i][j], ErrorScore) and stop_on_error: raise scores[i][j].score # An exception. # 9. from sciunit.scores.collections_m2m import ScoreMatrixM2M sm = ScoreMatrixM2M(self, models, scores=scores) return sm