def test_measures(self): sheet_num_universe = EvaluationSheet(self.scores, relevant=self.rel, universe=self.num_universe) sheet_universe = EvaluationSheet(self.scores, relevant=self.rel, universe=self.universe) sheet_no_universe = EvaluationSheet(self.scores, relevant=self.rel) # Measures that don't require universe for sheet in (sheet_num_universe, sheet_universe, sheet_no_universe): assert_array_equal(sheet.precision(), np.array([0, 0.5, 2 / 3, 0.5, 3 / 5, 2 / 3, 4 / 7])) assert_array_equal(sheet.recall(), np.array([0, 0.25, 0.5, 0.5, 0.75, 1, 1])) # Measures that do require universe for sheet in (sheet_num_universe, sheet_universe): # XXX The following ones look wrong?! expected = np.array([1 / 16, 1 / 16, 1 / 16, 1 / 8, 1 / 8, 1 / 8, 3 / 16]) assert_array_equal(sheet.fallout(), expected) expected = np.array([4 / 19, 3 / 18, 2 / 17, 2 / 16, 1 / 15, 0, 0]) assert_array_equal(sheet.miss(), expected) expected = np.array([0.75, 0.8, 17 / 20, 0.8, 17 / 20, 0.9, 17 / 20]) assert_array_equal(sheet.accuracy(), expected) assert_array_equal(sheet.generality(), 0.2) assert_raises(UndefinedError, sheet_no_universe.fallout) assert_raises(UndefinedError, sheet_no_universe.miss) assert_raises(UndefinedError, sheet_no_universe.accuracy) assert_raises(UndefinedError, sheet_no_universe.generality)
def test_f_score(self): sheet = EvaluationSheet(self.scores, relevant=self.rel) expected = np.array([0, 2 / 6, 4 / 7, 4 / 8, 6 / 9, 8 / 10, 8 / 11]) assert_array_equal(sheet.f_score(), expected) # $F_\beta = \frac{\beta^2 + 1 |rel \cap ret|}{\beta^2 |rel| + |ret|}$ expected = np.array([ 0, 1.25 * 1 / (0.25 * 4 + 2), 1.25 * 2 / (0.25 * 4 + 3), 1.25 * 2 / (0.25 * 4 + 4), 1.25 * 3 / (0.25 * 4 + 5), 1.25 * 4 / (0.25 * 4 + 6), 1.25 * 4 / (0.25 * 4 + 7) ]) assert_array_equal(sheet.f_score(0.5), expected) expected = np.array([ 0, 5 * 1 / (4 * 4 + 2), 5 * 2 / (4 * 4 + 3), 5 * 2 / (4 * 4 + 4), 5 * 3 / (4 * 4 + 5), 5 * 4 / (4 * 4 + 6), 5 * 4 / (4 * 4 + 7) ]) assert_array_equal(sheet.f_score(2), expected)
def test_init(self): sheet = EvaluationSheet(self.scores, relevant=self.rel) expected = np.array([ [0, 1, 2, 2, 3, 4, 4], [1, 1, 1, 2, 2, 2, 3], [4, 3, 2, 2, 1, 0, 0], [-1, -1, -1, -1, -1, -1, -1], ]).T assert_array_equal(sheet.data, expected) sheet = EvaluationSheet(self.scores, relevant=self.rel, universe=self.universe) expected = np.array([ [0, 1, 2, 2, 3, 4, 4], [1, 1, 1, 2, 2, 2, 3], [4, 3, 2, 2, 1, 0, 0], [15, 15, 15, 14, 14, 14, 13], ]).T assert_array_equal(sheet.data, expected) sheet = EvaluationSheet(self.scores, relevant=self.rel, universe=self.num_universe) # Same expected applies as above assert_array_equal(sheet.data, expected) data = np.array([[1, 0, 0, 1], [1, 1, 0, 0]]) sheet = EvaluationSheet(data) assert_array_equal(sheet.data, data)
def test_to_file_from_file(self): data = np.array([[1, 0, 0, 1], [1, 1, 0, 0]]) sheet = EvaluationSheet(data) with temp_file() as fname: sheet.to_file(fname) newsheet = EvaluationSheet.from_file(fname) assert_array_equal(sheet.data, newsheet.data)
def test_CacheEvaluationListener(): l = CacheEvaluationListener() scores = BaseScoresheet({1: 10, 2: 5}) ev = EvaluationSheet(scores, {1}) smokesignal.emit("evaluation_finished", ev, "d", "p") ev2 = EvaluationSheet.from_file(l.fname) assert_array_equal(ev.data, ev2.data) smokesignal.clear_all() os.unlink(l.fname)
def test_measures_with_empty_rel_and_ret(self): sheet1 = EvaluationSheet(Scoresheet(), [], []) sheet2 = EvaluationSheet(Scoresheet(), [], 10) sheet3 = EvaluationSheet(Scoresheet(), []) for sheet in (sheet1, sheet2, sheet3): assert_raises(UndefinedError, sheet.precision) assert_raises(UndefinedError, sheet.recall) assert_raises(UndefinedError, sheet.f_score) assert_raises(UndefinedError, sheet.fallout) assert_raises(UndefinedError, sheet.miss) assert_raises(UndefinedError, sheet.accuracy) assert_raises(UndefinedError, sheet.generality)
def test_measures_with_empty_rel_and_ret(self): sheet1 = EvaluationSheet(Scoresheet(), [], []) sheet2 = EvaluationSheet(Scoresheet(), [], 10) sheet3 = EvaluationSheet(Scoresheet(), []) for sheet in (sheet1, sheet2, sheet3): for method in [ "precision", "recall", "f_score", "fallout", "miss", "accuracy", "generality", ]: with pytest.raises(UndefinedError): getattr(sheet, method)()
def test_CacheEvaluationListener(): l = CacheEvaluationListener() scores = BaseScoresheet({1: 10, 2: 5}) ev = EvaluationSheet(scores, {1}) smokesignal.emit('evaluation_finished', ev, 'd', 'p') ev2 = EvaluationSheet.from_file(l.fname) assert_array_equal(ev.data, ev2.data) smokesignal.clear_all() os.unlink(l.fname)