def test_decodememm(self): memm = hw5.NamedEntityRecognitionMEMM() memm.train(self.train_tups) test = [w[0] for w in self.train_tups[0]] probs, pointers = memm.generate_probabilities(test) # find the actual labels labels = hw5.decode(test, probs, pointers) label_answers = [('Comparison', 'O'), ('in', 'O'), ('alkaline', 'B'), ('phosphatases', 'I'), ('in', 'O'), ('5', 'B'), ('-', 'I'), ('nucleotidase', 'I'), ('.', 'O')] self.assertEqual(label_answers, labels)
def test_memmprobspointers(self): memm = hw5.NamedEntityRecognitionMEMM() memm.train(self.train_tups) #, iterations = 100) test = [w[0] for w in self.train_tups[0]] probs, pointers = memm.generate_probabilities(test) labeled = hw5.decode(test, probs, pointers) # print(labeled) # # correct shape self.assertEqual(len(test), len(probs)) self.assertEqual(len(test), len(pointers)) for row in probs: self.assertEqual(3, len(row)) self.assertTrue(type(row) is dict) self.assertTrue(type(row['O'] is float)) for row in pointers: self.assertEqual(3, len(row)) self.assertTrue(type(row) is dict) self.assertTrue(type(row['O'] is str)) # ensure that back pointers are correct # you should be able to end up with these answers within # 100 or fewer iterations of SGD point_answers = [{ 'B': None, 'I': None, 'O': None }, { 'B': 'O', 'I': 'O', 'O': 'O' }, { 'B': 'O', 'I': 'O', 'O': 'O' }, { 'B': 'B', 'I': 'B', 'O': 'B' }, { 'B': 'I', 'I': 'I', 'O': 'I' }, { 'B': 'O', 'I': 'O', 'O': 'O' }, { 'B': 'B', 'I': 'B', 'O': 'B' }, { 'B': 'I', 'I': 'I', 'O': 'I' }, { 'B': 'I', 'I': 'I', 'O': 'I' }] self.assertEqual(point_answers, pointers)