예제 #1
0
    def test_decodememm(self):
        memm = hw5.NamedEntityRecognitionMEMM()
        memm.train(self.train_tups)
        test = [w[0] for w in self.train_tups[0]]
        probs, pointers = memm.generate_probabilities(test)

        # find the actual labels
        labels = hw5.decode(test, probs, pointers)
        label_answers = [('Comparison', 'O'), ('in', 'O'), ('alkaline', 'B'), ('phosphatases', 'I'), ('in', 'O'), ('5', 'B'), ('-', 'I'), ('nucleotidase', 'I'), ('.', 'O')]
        self.assertEqual(label_answers, labels)
    def test_memmprobspointers(self):
        memm = hw5.NamedEntityRecognitionMEMM()
        memm.train(self.train_tups)  #, iterations = 100)
        test = [w[0] for w in self.train_tups[0]]
        probs, pointers = memm.generate_probabilities(test)
        labeled = hw5.decode(test, probs, pointers)
        # print(labeled)
        # # correct shape
        self.assertEqual(len(test), len(probs))
        self.assertEqual(len(test), len(pointers))
        for row in probs:
            self.assertEqual(3, len(row))
            self.assertTrue(type(row) is dict)
            self.assertTrue(type(row['O'] is float))

        for row in pointers:
            self.assertEqual(3, len(row))
            self.assertTrue(type(row) is dict)
            self.assertTrue(type(row['O'] is str))

        # ensure that back pointers are correct
        # you should be able to end up with these answers within
        # 100 or fewer iterations of SGD
        point_answers = [{
            'B': None,
            'I': None,
            'O': None
        }, {
            'B': 'O',
            'I': 'O',
            'O': 'O'
        }, {
            'B': 'O',
            'I': 'O',
            'O': 'O'
        }, {
            'B': 'B',
            'I': 'B',
            'O': 'B'
        }, {
            'B': 'I',
            'I': 'I',
            'O': 'I'
        }, {
            'B': 'O',
            'I': 'O',
            'O': 'O'
        }, {
            'B': 'B',
            'I': 'B',
            'O': 'B'
        }, {
            'B': 'I',
            'I': 'I',
            'O': 'I'
        }, {
            'B': 'I',
            'I': 'I',
            'O': 'I'
        }]
        self.assertEqual(point_answers, pointers)