Example #1
0
    def test(self):
        """Tests the coverage of GeoPar's lexicon.

        For each MLU-NR pair in the training data, for each lexical term in the
        MR, it checks whether there is a word or multiword (up to length 3) in
        the NLU that in the lexicon is associated with that lexical term.
        """
        missing_terms = []
        lex = lexicon.read_lexicon('lexicon.txt')
        for words, mr in data.geo880_train():
            printed = False
            self.assertEqual(mr.functor_name, 'answer')
            self.assertEqual(len(mr.args), 2)
            self.assertIsInstance(mr.args[0], terms.Variable)
            for lexterm in lexicon.lexical_subterms(mr.args[1]):
                word_found = False
                unigrams = util.ngrams(1, words)
                bigrams = util.ngrams(2, words)
                trigrams = util.ngrams(3, words)
                for word in itertools.chain(unigrams, bigrams, trigrams):
                    for term in lex.meanings(word):
                        if term.equivalent(lexterm):
                            word_found = True
                            break
                if not word_found:
                    if not printed:
                        print(str(words))
                        printed = True
                    print('WARNING: no word found that means ' +
                          lexterm.to_string())
                    missing_terms.append(lexterm)
        self.assertEqual(missing_terms, [])
Example #2
0
    def _test_action_sequence(self, words, actions, target_mr):
        """Tests that the given action sequence is found.

        Tests that given the words and target_mr, the given actions are found
        and allowed by the oracle.
        """
        lex = augment.AugmentingLexicon(lexicon.read_lexicon('lexicon.txt'),
                                        target_mr)
        beam = oracle.initial_beam(words, target_mr.augment(), lex)
        item = beam.items[0]
        for action in actions:
            #for i in beam.items:
            #    print(i)
            #print()
            item.successor(action, lex)
            beam = beam.next()
            #for i in beam.items:
            #    print('~', i)
            #print()
            beam.items = [s for s in beam.items if s.action == action]
            self.assertTrue(
                beam.items,
                '{} not applied to {}, or rejected'.format(action, item))
            self.assertEqual(len(beam.items), 1)
            item = beam.items[0]
        self.assertTrue(item.finished)
Example #3
0
 def test_augment1(self):
     lex = lexicon.read_lexicon('lexicon.txt')
     t = terms.from_string(
         'answer(A,longest(A,(river(A),traverse(A,B),state(B),next_to(B,C),most(C,D,(state(C),next_to(C,D),state(D))))))'
     )
     alex = augment.AugmentingLexicon(lex, t)
     word = ('longest', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['longest_1(A,B)'])
     word = ('river', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['river_1(A)'])
     word = ('passes', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['traverse_1(A,B)'])
     word = ('states', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['state_1(A)', 'state_2(A)', 'state_3(A)'])
     word = ('border', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['next_to_1(A,B)', 'next_to_2(A,B)'])
     word = ('state', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['state_1(A)', 'state_2(A)', 'state_3(A)'])
     word = ('borders', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['next_to_1(A,B)', 'next_to_2(A,B)'])
     word = ('most', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['most_1(A,B,C)'])
Example #4
0
def action_sequence(words, target_mr):
    """Looks for action sequences that lead from words to target_mr.

    Returns the first that it finds.
    """
    lex = augment.AugmentingLexicon(lexicon.read_lexicon('lexicon.txt'),
                                    target_mr)
    beam = initial_beam(words, target_mr.augment(), lex)
    while beam.items:
        #print(len(beam.items), random.choice(beam.items))
        beam = beam.next()
        finished = [i for i in beam.items if i.finished]
        if finished:
            return finished[0].action_sequence()
    raise ValueError('no action sequence found')
Example #5
0
 def test_augment2(self):
     lex = lexicon.read_lexicon('lexicon.txt')
     t = terms.from_string(
         'answer(A,lowest(B,(state(A),traverse(C,A),const(C,riverid(mississippi)),loc(B,A),place(B))))'
     )
     alex = augment.AugmentingLexicon(lex, t)
     word = ('states', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['state_1(A)'])
     word = ('washed', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['traverse_1(A,B)'])
     word = ('mississippi', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['const_1(A,riverid(mississippi))'])
     word = ('has', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['loc_1(A,B)'])
     word = ('lowest', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['lowest_1(A,B)'])
     word = ('point', )
     meanings = [m.to_string() for m in alex.meanings(word)]
     self.assertEqual(meanings, ['place_1(A)'])
Example #6
0
#!/usr/bin/env python3
"""Train a semantic parsing model on the Geo880 data.
"""

import data
import lexicon
import parser
import pickle
import random

random.seed(1336)  # for reproducibility

if __name__ == '__main__':
    lex = lexicon.read_lexicon('lexicon.txt')
    train_oracles, val_examples = data.geo880_train_val()
    model = parser.train(train_oracles,
                         val_examples,
                         lex,
                         max_epochs=20,
                         patience=3)
    with open('model.pickle', 'wb') as f:
        pickle.dump(model, f)