class LevinTest(unittest.TestCase): def setUp(self): self.lexicon = vol.lexicon.clone() self.learner = WordLearner(self.lexicon, Compressor(self.lexicon.ontology, **EC_kwargs), bootstrap=True) def test_functional(self): # We should get five derived categories. self.learner.compress_lexicon() self.assertEquals(len(self.learner.lexicon._derived_categories), 2) # Learn examples for sentence, scene, answer in vol.examples: sentence = sentence.split() model = Model(scene, self.ontology) weighted_results = self.learner.update_with_distant( sentence, model, answer) final_sem = weighted_results[0][0].label()[0].semantics() print(" ".join(sentence), len(weighted_results), final_sem) print("\t", model.evaluate(final_sem))
class LevinTest(unittest.TestCase): def setUp(self): lexicon = levin.lexicon.clone() self.learner = WordLearner(lexicon, Compressor(lexicon.ontology, **EC_kwargs), bootstrap=True) def test_functional(self): expected_derived = { # Levin verb classes "9.1": {"set", "put"}, "9.2": {"lay", "hang"}, "9.4": {"drop", "hoist"}, "9.5": {"pour", "spill"}, "9.7": {"spray", "load"}, "9.8": {"fill", "stuff"}, # PPs "contact": {"on", "onto"}, } ####### self.learner.compress_lexicon() ####### # self.assertEquals(set(frozenset(token._token for token in tokens) # for _, tokens in self.learner.lexicon._derived_categories.values()), # set(frozenset(xs) for xs in expected_derived.values())) # OK, now try to bootstrap with an example. for sentence, scene, answer in levin.examples: sentence = sentence.split() model = Model(scene, self.learner.ontology) weighted_results = self.learner.update_with_distant( sentence, model, answer) final_sem = weighted_results[0][0].label()[0].semantics() print(" ".join(sentence), len(weighted_results), final_sem) print("\t", model.evaluate(final_sem))
def eval_model(compress=True, bootstrap=True, **learner_kwargs): L.info("Building model.") pprint(learner_kwargs) default_weight = learner_kwargs.pop("weight_init") lexicon = levin.make_lexicon(default_weight=default_weight) compressor = Compressor(lexicon.ontology, **EC_kwargs) if compress else None learner = WordLearner(lexicon, compressor, bootstrap=bootstrap, **learner_kwargs) # Run compression. learner.compress_lexicon() try: PP_CONTACT_CATEGORY, _ = learner.lexicon._derived_categories["D0"] PUT_CATEGORY, _ = learner.lexicon._derived_categories["D1"] DROP_CATEGORY, _ = learner.lexicon._derived_categories["D2"] POUR_CATEGORY, _ = learner.lexicon._derived_categories["D3"] FILL_CATEGORY, _ = learner.lexicon._derived_categories["D4"] # sanity check _assert(str(PP_CONTACT_CATEGORY.base) == "PP", "PP contact derived cat has correct base") _assert(str(PUT_CATEGORY.base) == "S", "Put verb derived cat has correct base") _assert(str(DROP_CATEGORY.base) == "S", "Drop verb derived cat has correct base") _assert(str(FILL_CATEGORY.base) == "S", "Fill verb derived cat has correct base") _assert(str(POUR_CATEGORY.base) == "S", "Pour verb derived cat has correct base") except KeyError: _assert(False, "Derived categories not available", False) PP_CONTACT_CATEGORY = None PUT_CATEGORY = None DROP_CATEGORY = None FILL_CATEGORY = None POUR_CATEGORY = None # Constructions in which different derived verb cats appear locative_construction = learner.lexicon.parse_category("S/N/PP") locative_construction._arg = PP_CONTACT_CATEGORY constructions = [ locative_construction, learner.lexicon.parse_category("S/N/PP"), learner.lexicon.parse_category("S/N"), ] ########### # Run initial weight updates. for example in examples[:3]: sentence, model, answer = prep_example(learner, example) learner.update_with_distant(sentence, model, answer) # Ensure that derived categories are present in the highest-scoring entries' # yields. expected = [("put", PUT_CATEGORY), ("fill", FILL_CATEGORY)] for token, expected_top_yield in expected: entries = learner.lexicon._entries[token] top_entry = max(entries, key=lambda e: e.weight()) top_yield = get_yield(top_entry.categ()) _assert(top_yield == expected_top_yield, "Top-scoring category for '%s' has yield %s: %s" % (token, expected_top_yield, top_yield)) # Zero-shot predictions with bootstrapping in known frames def make_extra(target): def extra_check(token, cand_cats, cand_joint): top_cat, top_expr = cand_joint.argmax() _assert(top_cat.arg() == target, "Top cat for %s should have first arg of type %s: %s" % (token, target, top_cat)) return extra_check eval_bootstrap_example(learner, examples[3], "place", PUT_CATEGORY, bootstrap=bootstrap, extra=make_extra(PP_CONTACT_CATEGORY)) eval_oneshot_example(learner, examples[3], "place", PUT_CATEGORY, extra=make_extra(PP_CONTACT_CATEGORY)) print(compute_alternations(learner, constructions)) eval_bootstrap_example(learner, examples[4], "cover", FILL_CATEGORY, bootstrap=bootstrap, extra=make_extra(learner.lexicon.parse_category("PP"))) eval_oneshot_example(learner, examples[4], "cover", FILL_CATEGORY, extra=make_extra(learner.lexicon.parse_category("PP"))) print(compute_alternations(learner, constructions)) # Learn a novel frame for the fill class. # Skip 0-shot asserts -- don't expect to have correct guess for an entirely # new frame. eval_bootstrap_example(learner, examples[5], "fill", FILL_CATEGORY, bootstrap=bootstrap, asserts=False) eval_oneshot_example(learner, examples[5], "fill", FILL_CATEGORY) eval_oneshot_example(learner, examples[6], "fill", FILL_CATEGORY) print(compute_alternations(learner, constructions)) # Zero-shot predictions for the newly learned frame. eval_bootstrap_example(learner, examples[7], "stuff", FILL_CATEGORY, bootstrap=bootstrap) eval_oneshot_example(learner, examples[7], "stuff", FILL_CATEGORY) print(compute_alternations(learner, constructions)) eval_oneshot_example(learner, examples[8], "lower", DROP_CATEGORY) eval_oneshot_example(learner, examples[9], "raise", DROP_CATEGORY, extra=make_extra(PP_CONTACT_CATEGORY)) print(compute_alternations(learner, constructions)) eval_oneshot_example(learner, examples[10], "drip", POUR_CATEGORY, extra=make_extra(PP_CONTACT_CATEGORY)) ########### # Produce alternation table. table = compute_alternations(learner, constructions) print(table) table.to_csv(args.out_dir / "alternations.csv") plt.clf() sns.heatmap(table) plt.savefig(args.out_dir / "alternations.png")