def test(self): """Tests the coverage of GeoPar's lexicon. For each MLU-NR pair in the training data, for each lexical term in the MR, it checks whether there is a word or multiword (up to length 3) in the NLU that in the lexicon is associated with that lexical term. """ missing_terms = [] lex = lexicon.read_lexicon('lexicon.txt') for words, mr in data.geo880_train(): printed = False self.assertEqual(mr.functor_name, 'answer') self.assertEqual(len(mr.args), 2) self.assertIsInstance(mr.args[0], terms.Variable) for lexterm in lexicon.lexical_subterms(mr.args[1]): word_found = False unigrams = util.ngrams(1, words) bigrams = util.ngrams(2, words) trigrams = util.ngrams(3, words) for word in itertools.chain(unigrams, bigrams, trigrams): for term in lex.meanings(word): if term.equivalent(lexterm): word_found = True break if not word_found: if not printed: print(str(words)) printed = True print('WARNING: no word found that means ' + lexterm.to_string()) missing_terms.append(lexterm) self.assertEqual(missing_terms, [])
def _test_action_sequence(self, words, actions, target_mr): """Tests that the given action sequence is found. Tests that given the words and target_mr, the given actions are found and allowed by the oracle. """ lex = augment.AugmentingLexicon(lexicon.read_lexicon('lexicon.txt'), target_mr) beam = oracle.initial_beam(words, target_mr.augment(), lex) item = beam.items[0] for action in actions: #for i in beam.items: # print(i) #print() item.successor(action, lex) beam = beam.next() #for i in beam.items: # print('~', i) #print() beam.items = [s for s in beam.items if s.action == action] self.assertTrue( beam.items, '{} not applied to {}, or rejected'.format(action, item)) self.assertEqual(len(beam.items), 1) item = beam.items[0] self.assertTrue(item.finished)
def test_augment1(self): lex = lexicon.read_lexicon('lexicon.txt') t = terms.from_string( 'answer(A,longest(A,(river(A),traverse(A,B),state(B),next_to(B,C),most(C,D,(state(C),next_to(C,D),state(D))))))' ) alex = augment.AugmentingLexicon(lex, t) word = ('longest', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['longest_1(A,B)']) word = ('river', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['river_1(A)']) word = ('passes', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['traverse_1(A,B)']) word = ('states', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['state_1(A)', 'state_2(A)', 'state_3(A)']) word = ('border', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['next_to_1(A,B)', 'next_to_2(A,B)']) word = ('state', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['state_1(A)', 'state_2(A)', 'state_3(A)']) word = ('borders', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['next_to_1(A,B)', 'next_to_2(A,B)']) word = ('most', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['most_1(A,B,C)'])
def action_sequence(words, target_mr): """Looks for action sequences that lead from words to target_mr. Returns the first that it finds. """ lex = augment.AugmentingLexicon(lexicon.read_lexicon('lexicon.txt'), target_mr) beam = initial_beam(words, target_mr.augment(), lex) while beam.items: #print(len(beam.items), random.choice(beam.items)) beam = beam.next() finished = [i for i in beam.items if i.finished] if finished: return finished[0].action_sequence() raise ValueError('no action sequence found')
def test_augment2(self): lex = lexicon.read_lexicon('lexicon.txt') t = terms.from_string( 'answer(A,lowest(B,(state(A),traverse(C,A),const(C,riverid(mississippi)),loc(B,A),place(B))))' ) alex = augment.AugmentingLexicon(lex, t) word = ('states', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['state_1(A)']) word = ('washed', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['traverse_1(A,B)']) word = ('mississippi', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['const_1(A,riverid(mississippi))']) word = ('has', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['loc_1(A,B)']) word = ('lowest', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['lowest_1(A,B)']) word = ('point', ) meanings = [m.to_string() for m in alex.meanings(word)] self.assertEqual(meanings, ['place_1(A)'])
#!/usr/bin/env python3 """Train a semantic parsing model on the Geo880 data. """ import data import lexicon import parser import pickle import random random.seed(1336) # for reproducibility if __name__ == '__main__': lex = lexicon.read_lexicon('lexicon.txt') train_oracles, val_examples = data.geo880_train_val() model = parser.train(train_oracles, val_examples, lex, max_epochs=20, patience=3) with open('model.pickle', 'wb') as f: pickle.dump(model, f)