Exemple #1
0
    def testEvaluation_4(self):
        golden_list = [['B-TAR', 'I-TAR', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['O', 'O', 'O', 'O'], ['O', 'O', 'O', 'O']]

        f1 = evaluate(golden_list, predict_list)
        self.assertEqual(f1, 0.0)
Exemple #2
0
    def testEvaluation_8(self):
        golden_list = [['B-TAR', 'B-TAR', 'I-TAR', 'B-HYP', 'O', 'O'],
                       ['B-TAR', 'O', 'O', 'B-HYP', '0', 'I-HYP']]
        predict_list = [['B-TAR', 'B-TAR', 'I-TAR', 'O', 'B-TAR', 'I-TAR'],
                        ['I-TAR', 'O', 'B-HYP', 'B-HYP', 'B-TAR']]

        f1 = evaluate(golden_list, predict_list)
        self.assertEqual(f1, f1_score(3 / 6, 3 / 5))
Exemple #3
0
    def testEvaluation_6(self):
        golden_list = [['B-TAR', 'I-TAR', 'B-TAR', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'I-TAR', 'I-TAR', 'O'],
                        ['I-TAR', 'O', 'O', 'O']]

        f1 = evaluate(golden_list, predict_list)
        self.assertEqual(f1, f1_score(1 / 5, 1 / 1))
Exemple #4
0
    def testEvaluation_5(self):
        golden_list = [
            ['B-TAR', 'O', 'O', 'B-HYP', 'I-HYP', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
             'O'],
            ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-TAR', 'O', 'O', 'O', 'O', 'B-HYP', 'I-HYP', 'I-HYP', 'O',
             'O', 'O', 'O', 'O', 'O', 'O', 'O']]
        predict_list = [
            ['B-TAR', 'O', 'O', 'O', 'B-HYP', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
             'O'], [
                'O', 'I-TAR', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-TAR', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                'O',
                'O', 'O', 'O', 'I-HYP', 'O']]

        f1 = evaluate(golden_list, predict_list)
        self.assertEqual(f1, f1_score(2 / 4, 2 / 3))
Exemple #5
0
    def testEvaluation_9(self):
        golden_list = [[
            'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
            'O', 'O', 'O', 'O', 'O', 'O', 'O'
        ],
                       [
                           'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                           'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                           'O', 'O', 'O', 'O', 'O'
                       ]]
        predict_list = [[
            'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
            'O', 'O', 'O', 'O', 'O', 'O', 'O'
        ],
                        [
                            'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                            'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                            'O', 'O', 'O', 'O', 'O'
                        ]]

        f1 = evaluate(golden_list, predict_list)
        self.assertEqual(f1, 1)
Exemple #6
0
                                     loss.view(-1).data.tolist()[0])
                pbar.update(1)

        # keep the model with best f1 on development set, if the flag is True
        if _config.use_f1:
            model.eval()
            pred_dev_ins, golden_dev_ins = [], []
            for batch_sentence_len_list, batch_word_index_lists, batch_word_mask, batch_char_index_matrices, batch_char_mask, batch_word_len_lists, batch_tag_index_list in dev:
                pred_batch_tag = model.decode(batch_word_index_lists,
                                              batch_sentence_len_list,
                                              batch_char_index_matrices,
                                              batch_word_len_lists,
                                              batch_char_mask)
                pred_dev_ins += [[
                    reversed_tag_dict[t] for t in tag[:l]
                ] for tag, l in zip(pred_batch_tag.data.tolist(),
                                    batch_sentence_len_list.data.tolist())]
                golden_dev_ins += [[
                    reversed_tag_dict[t] for t in tag[:l]
                ] for tag, l in zip(batch_tag_index_list.data.tolist(),
                                    batch_sentence_len_list.data.tolist())]
#			print(golden_dev_ins)
            new_f1 = evaluate(golden_dev_ins, pred_dev_ins)
            if new_f1 > best_f1:
                model_state = model.state_dict()
                torch.save(model_state, _config.model_file)
                best_f1 = new_f1
        # else we just keep the newest model
        else:
            model_state = model.state_dict()
            torch.save(model_state, _config.model_file)
from todo import evaluate

list_1 = [['B-TAR', 'I-TAR', 'I-TAR', 'B-HYP'], ['B-TAR', 'O', 'O', 'B-HYP']]
#[['B-TAR', 'I-TAR','I-TAR', 'I-TAR','O', 'B-HYP']]
#[['B-TAR', 'I-TAR', 'O', 'B-HYP'], ['B-TAR', 'O', 'O', 'B-HYP']]

list_2 = [['B-TAR', 'B-TAR', 'I-HYP', 'O'], ['I-TAR', 'O', 'O', 'O']]
#[['B-TAR','I-TAR', 'B-HYP','I-HYP','O', 'B-HYP']]
#[['B-TAR', 'O', 'O', 'O'], ['B-TAR', 'O', 'B-HYP', 'I-HYP']]

print(evaluate(list_1, list_2))
Exemple #8
0
    def test_evaluation(self):

        golden_list = [['B-TAR', 'I-TAR', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'O', 'O'],
                        ['B-TAR', 'O', 'B-HYP', 'I-HYP']]
        result = todo.evaluate(golden_list, predict_list)
        print("answers shuld be this " + str(result))
        self.assertEqual(result, 0.286)
        #auto generate at the end
        golden_list = [['B-TAR', 'I-TAR', 'O', 'B-HYP'],
                       ['B-TAR', 'I-TAR', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'I-TAR', 'O', 'B-HYP'],
                        ['B-TAR', 'I-TAR', 'O', 'B-HYP']]
        result = todo.evaluate(golden_list, predict_list)
        print("answers shuld be this " + str(result))

        golden_list = [[
            'B-TAR', 'I-TAR', 'I-TAR', 'B-HYP', 'I-HYP', 'I-HYP', 'O'
        ]]
        predict_list = [[
            'B-TAR', 'I-TAR', 'O', 'B-HYP', 'I-HYP', 'I-HYP', 'O'
        ]]

        result = todo.evaluate(golden_list, predict_list)
        print("answers shuld be this " + str(result))

        golden_list = [['O', 'O']]
        predict_list = [['O', 'O']]

        result = todo.evaluate(golden_list, predict_list)
        print("answers shuld be this " + str(result))

        #B-hyp with I-HYP
        #SIMPLE CASES
        #2 true positive
        #2 false negative
        #2 false positive

        golden_list = [['B-TAR', 'O', 'B-HYP', 'I-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                        ['B-TAR', 'O', 'B-HYP', 'I-HYP']]

        result = todo.evaluate(golden_list, predict_list)
        print("answers shuld be this " + str(result))
        self.assertEqual(result, 0.5)

        #two different way for simple B-HYP prediction mistake
        #2 true positive
        #2 false negative

        golden_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP', 'O']]
        predict_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                        ['B-TAR', 'O', 'O', 'O', 'O']]

        result = todo.evaluate(golden_list, predict_list)
        print("answers shuld be this " + str(result))

        self.assertEqual(result, 0.857)

        #BTAR WITH ITAR in golden and BTAR with ITAR in golden

        golden_list = [['B-TAR', 'I-TAR', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP'],
                       ['B-TAR', 'I-TAR', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                        ['B-TAR', 'I-TAR', 'O', 'B-HYP'],
                        ['B-TAR', 'I-TAR', 'O', 'B-HYP']]

        result = todo.evaluate(golden_list, predict_list)
        print("answers shuld be this " + str(result))

        self.assertEqual(result, 0.667)
        """

        #when B-Tar is equal at one and not equal in another
        #simple BTAR AND BHYP
        """
        golden_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                        ['B-TAR', 'I-TAR', 'O', 'B-HYP']]

        result = todo.evaluate(golden_list, predict_list)

        print("answers shuld be this " + str(result))
        self.assertEqual(result, 0.75)

        #more exhaustive test needed
        golden_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'O', 'O'], ['B-TAR', 'O', 'B-HYP', 'O']]
        result = todo.evaluate(golden_list, predict_list)

        print("answers shuld be this " + str(result))
        self.assertEqual(result, 0.571)

        golden_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'O', 'O'], ['B-TAR', 'O', 'B-HYP', 'O']]
        result = todo.evaluate(golden_list, predict_list)

        print("answers shuld be this " + str(result))
        self.assertEqual(result, 0.571)

        golden_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'O', 'O'], ['B-TAR', 'O', 'B-HYP', 'O']]
        result = todo.evaluate(golden_list, predict_list)

        print("answers shuld be this " + str(result))
        self.assertEqual(result, 0.571)

        golden_list = [['B-TAR', 'O', 'O', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'O', 'O'], ['B-TAR', 'O', 'B-HYP', 'O']]
        result = todo.evaluate(golden_list, predict_list)

        print("answers shuld be this " + str(result))
        self.assertEqual(result, 0.571)

        golden_list = [['B-TAR', 'I-TAR', 'I-TAR', 'B-HYP'],
                       ['B-TAR', 'O', 'O', 'B-HYP']]
        predict_list = [['B-TAR', 'O', 'B-HYP', 'O'],
                        ['B-TAR', 'O', 'B-HYP', 'O']]
        result = todo.evaluate(golden_list, predict_list)

        print("answers shuld be this " + str(result))
        self.assertEqual(result, 0.571)