def setup(self): self.reference = 'DET NN VB DET JJ NN NN IN DET NN zero'.split() self.test = 'DET VB VB DET NN NN NN IN DET NN unseen'.split() self.performance = evaluate(self.reference, self.test) self.labels = set(self.reference + self.test) self.expected_recall = [('DET', 1), ('IN', 1), ('JJ', 0), ('NN', 0.75), ('VB', 1), ('zero', 0), ('unseen', 1)] self.expected_precision = [('DET', 1), ('IN', 1), ('JJ', 1), ('NN', 0.75), ('VB', 0.5), ('zero', 1), ('unseen', 0)] self.expected_f_measure = [('DET', 1), ('IN', 1), ('JJ', 0), ('NN', 0.75), ('VB', 2 / 3), ('zero', 0), ('unseen', 0)]
def test_f_measure_beta(self): failed = [] for beta in (0.5, 2): for x, y in zip(self.expected_precision, self.expected_recall): label, precision, recall = x[0], x[1], y[1] performance = evaluate(self.reference, self.test, beta=beta) result = performance['f-{0}'.format(label)] if precision == 0 and recall == 0: expected = 0 else: expected = (((1 + beta**2) * precision * recall) / (((beta**2) * precision) + recall)) try: assert_almost_equal(result, expected) except AssertionError: failed.append((label, beta, result, expected)) assert not failed
def evaluate(reference, test, beta=1): """Compute score for SemEval and various performance metrics. Args: reference: An ordered list of correct class labels. test: A corresponding ordered list of class labels to evaluate. beta: A float parameter for F-measure (default = 1). Returns: A dictionary with an entry for each metric. An additional entry is made with the key 'semeval f_measure', which is the performance metric used by SemEval-2013. """ performance = classify.evaluate(reference, test, beta) semeval = (performance['f-positive'] + performance['f-negative']) / 2 performance['semeval f_measure'] = semeval return performance
def test_f_measure_beta(self): failed = [] for beta in (0.5, 2): for x, y in zip(self.expected_precision, self.expected_recall): label, precision, recall = x[0], x[1], y[1] performance = evaluate(self.reference, self.test, beta=beta) result = performance['f-{0}'.format(label)] if precision == 0 and recall == 0: expected = 0 else: expected = (((1 + beta ** 2) * precision * recall) / (((beta ** 2) * precision) + recall)) try: assert_almost_equal(result, expected) except AssertionError: failed.append((label, beta, result, expected)) assert not failed
def test_f_measure_zero(self): # Issue gh-19. reference = [1, 2] test = [2, 3] evaluate(reference, test)