def test_parse_typechecking_complex(): """ Chart parser linked to an ontology should (by default) not produce sentence-level LFs which fail typechecks. """ types = TypeSystem(["object", "boolean"]) functions = [ types.new_function("unique", (("object", "boolean"), "object"), lambda objs: [x for x, v in objs.items() if v][0]), types.new_function("big", ("object", "boolean"), lambda o: o['size'] == "big"), types.new_function("box", ("object", "boolean"), lambda o: o["shape"] == "box"), types.new_function("and_", ("boolean", "boolean", "boolean"), lambda a, b: a and b), types.new_function("apply", (("object", "boolean"), "object", "boolean"), lambda f, o: f(o)), ] constants = [] ontology = Ontology(types, functions, constants) lex = Lexicon.fromstring(r""" :- S, N the => S/N {unique} the => N/N {unique} big => N/N {\f x.and_(apply(f,x),big(x))} box => N {box} """, ontology=ontology, include_semantics=True) parser = WeightedCCGChartParser(lex, ruleset=ApplicationRuleSet) parses = parser.parse("the the big box".split()) eq_(len(parses), 0, "Should disallow non-typechecking parses for 'the the big box'")
def test_get_derivation_tree(): lex = Lexicon.fromstring(r""" :- S, N John => N saw => S\N/N Mary => N """) parser = WeightedCCGChartParser(lex, ruleset=DefaultRuleSet) top_parse = parser.parse("Mary saw John".split())[0] from io import StringIO stream = StringIO() get_clean_parse_tree(top_parse).pretty_print(stream=stream) eq_([line.strip() for line in stream.getvalue().strip().split("\n")], [line.strip() for line in r""" S _______|_______ | (S\N) | _______|____ N ((S\N)/N) N | | | Mary saw John""".strip().split("\n")])
def make_initial_lexicons(self, ontology, groundtruth=False): initial_lex = r""" :- S, N """ if groundtruth: initial_lex += r""" any => S/N {\x.exist_(x)} object => N {scene} """ for concept in self.all_attribute_concepts: if self.concept2attribute[concept] == 'shape': initial_lex += r"%% => N {(\x.filter(x, %%))(scene)}".replace('%%', concept) + '\n' else: initial_lex += r"%% => N/N {\x.filter(x, %%)}".replace('%%', concept) + '\n' else: initial_lex += r""" _dummy_verb => S/N {\x.exist_(x)} _dummy_adj => N/N {\x.filter(x, concept_000001)} _dummy_noun => N {scene} """ initial_lex = Lexicon.fromstring(initial_lex, ontology, include_semantics=True) return initial_lex
def test_parse_oblique_raised(): lex = Lexicon.fromstring(r""" :- S, NP, PP place => S/NP/(PP/NP)/NP it => NP on => PP/NP the_table => NP """) parser = WeightedCCGChartParser(lex, DefaultRuleSet) printCCGDerivation(parser.parse("place it on the_table".split())[0])
def test_parse_oblique(): """ Test parsing a verb with an oblique PP -- this shouldn't require type raising? """ lex = Lexicon.fromstring(r""" :- S, NP, PP place => S/PP/NP it => NP on => PP/NP the_table => NP """) parser = WeightedCCGChartParser(lex, ApplicationRuleSet) printCCGDerivation(parser.parse("place it on the_table".split())[0])
def _make_lexicon_with_derived_category(): lex = Lexicon.fromstring(r""" :- S, NP the => S/NP {\x.unique(x)} foo => NP {\x.foo(x)} bar => NP {\x.bar(x)} baz => NP {\x.baz(x)} """, include_semantics=True) old_lex = lex.clone() # Induce a derived category involving `foo` and `bar`. involved_tokens = [lex._entries["foo"][0], lex._entries["bar"][0]] derived_categ = lex.add_derived_category(involved_tokens) return old_lex, lex, involved_tokens, derived_categ
def test_parse_typechecking(): """ Chart parser linked to an ontology should (by default) not produce sentence-level LFs which fail typechecks. """ types = TypeSystem(["agent", "action", "object"]) functions = [ types.new_function("see", ("agent", "agent", "action"), lambda a, b: ("see", a, b)), types.new_function("request", ("agent", "object", "action"), lambda a, b: ("request", a, b)), ] constants = [ types.new_constant("john", "agent"), types.new_constant("mary", "agent"), types.new_constant("help", "object") ] ontology = Ontology(types, functions, constants) lex = Lexicon.fromstring(r""" :- S, N John => N {john} saw => S\N/N {see} saw => S\N/N {request} requested => S\N/N {request} Mary => N {mary} """, ontology=ontology, include_semantics=True) parser = WeightedCCGChartParser(lex, ruleset=ApplicationRuleSet) parses = parser.parse("Mary saw John".split()) parse_lfs = [str(parse.label()[0].semantics()) for parse in parses] from pprint import pprint pprint(parse_lfs) ok_( r"see(john,mary)" in parse_lfs, "Parses of 'Mary saw John' should include typechecking see(john,mary)") ok_( r"request(john,mary)" not in parse_lfs, "Parses of 'Mary saw John' should not include non-typechecking request(john,mary)" )
def _make_mock_lexicon(): types = TypeSystem(["obj", "boolean"]) functions = [ types.new_function("unique", (("obj", "boolean"), "obj"), lambda x: x[0]), types.new_function("twoplace", ("boolean", ("obj", "boolean"), "obj"), lambda a, b: b[0]), types.new_function("dog", ("obj", "boolean"), lambda x: x["dog"]), types.new_function("not_", ("boolean", "boolean"), lambda a: not a), types.new_function("enlarge", ("obj", "obj"), lambda x: x), ] constants = [types.new_constant("true", "boolean")] ontology = Ontology(types, functions, constants) lex = Lexicon.fromstring(r""" :- S, N the => N/N {\x.unique(x)} thee => N\N {\x.unique(x)} twoplace => N/N {\x.twoplace(true,x)} twoplacee => N\N {\x.twoplace(true,x)} abc => N/N {\a.not_(a)} def => N/N {\b.not_(b)} qrs => N/N {\a.enlarge(a)} tuv => N/N {\b.enlarge(b)} twoarg => N/N/N {\a b.twoplace(a,b)} doggish => N/N {\x.dog(x)} dog => N {dog} # NB, won't typecheck cat => N {unique} """, ontology=ontology, include_semantics=True) # TODO hack: this needs to be integrated into lexicon construction.. for w in ["the", "twoplace", "thee", "twoplacee"]: e = lex._entries[w][0] sem = e.semantics() tx = lex.ontology.infer_type(sem, "x") sem.variable.type = tx lex.ontology.typecheck(sem) return lex
def test_parse_with_derived_root_category(): """ Ensure that we can parse with a derived category whose base is the root category. """ lex = Lexicon.fromstring(r""" :- S, N the => S/N {\x.unique(x)} foo => N {\x.foo(x)} """, include_semantics=True) involved_tokens = [lex._entries["the"][0]] derived_categ = lex.add_derived_category(involved_tokens) lex.propagate_derived_category(derived_categ) derived_categ_obj, _ = lex._derived_categories[derived_categ] results = WeightedCCGChartParser(lex).parse("the foo".split()) eq_(set(str(result.label()[0].categ()) for result in results), {"S", str(derived_categ_obj)})
lexicon = Lexicon.fromstring(r""" :- S, N the => S/N {\x.unique(x)} the => N/N {\x.unique(x)} the => S/N {\x.x} the => N/N {\x.x} object => N {scene} objects => N {scene} metallic => N/N {\x.filter(material,x,metal)} shiny => N/N {\x.filter(material,x,metal)} big => N/N {\x.filter(size,x,large)} purple => N/N {\x.filter(color,x,purple)} material => N {material} shape => N {shape} color => N {color} size => N {size} same => N/N/N {\a o.same(a,o)} as => N/N {\x.x} with => S\N/N {\p x.filter_(x,p)} of => N\N/N {\o a.query(a,o)} that => N\N/S {\p x.filter_(x,p)} how_many => S/S/N {\x p.count(x,p)} what_number_of => S/S/N {\x p.count(x,p)} what => S/S {\x.x} is => S/N {\x.x} is => S/S {\x.x} are => S/N {\x.x} are => S/S {\x.x} # TODO this is wrong -- actually a very complicated and interesting operator.. # Need to rule out the particular object given in the complement # in "what number of other objects are the same size as the purple shiny object" other => N/N {\x.x} """, ontology, include_semantics=True)
initial_puddleworld_lex = Lexicon.fromstring(r""" :- S:N reach => S/N {\x.move(x)} reach => S/N {\x.move(unique(x))} below => S/N {\x.move(unique(\y.relate(y,x,down)))} above => S/N {\x.move(unique(\y.relate(y,x,up)))} , => S\S/S {\a b.a} , => S\S/S {\a b.b} of => N\N/N {\x d y.relate(x,y,d)} of => N\N/N {\x d y.relate(unique(x),d,y)} to => N\N/N {\x y.x} one => S/N/N {\d x.move(unique(\y.relate(y,x,d)))} one => S/N/N {\d x.move(unique(\y.relate_n(y,x,d,1)))} right => N/N {\f x.and_(apply(f, x),in_half(x,right))} most => N\N/N {\x d.max_in_dir(x, d)} the => N/N {\x.unique(x)} left => N {left} below => N {down} above => N {up} right => N {right} horse => N {\x.horse(x)} rock => N {\x.rock(x)} rock => N {unique(\x.rock(x))} cell => N {\x.true} spade => N {\x.spade(x)} spade => N {unique(\x.spade(x))} heart => N {\x.heart(x)} heart => N {unique(\x.heart(x))} circle => N {\x.circle(x)} # triangle => N {\x.triangle(x)} """, ec_ontology, include_semantics=True)
types.new_constant("sphere", "shape"), types.new_constant("cube", "shape"), types.new_constant("cylinder", "shape"), types.new_constant("true", "boolean"), ] ontology = Ontology(types, functions, constants) ####### # Lexicon: defines an initial set of word -> (syntax, meaning) mappings. # Weights are initialized uniformly by default. initial_lex = Lexicon.fromstring(r""" :- S, N any => S/N {\x.object_exists(x)} _dummy_noun => N {\x.true} """, ontology, include_semantics=True) ####### # VQA Dataset: defines the dataset. class VQADataset(object): """ A dummy dataset contains tuples of (scene, question, answer). Each scene contains only one objects with one of the three shapes ('sphere', 'cube' and 'cylinder'). There are three types of questions: "any sphere", "any cube", "any cylinder". The answer is True if the shape of interest in the question match the shape of the visual object. """
constants = [ types.new_constant("sphere", "shape"), types.new_constant("cube", "shape"), types.new_constant("cylinder", "shape"), ] ontology = Ontology(types, functions, constants) ####### # Lexicon: defines an initial set of word -> (syntax, meaning) mappings. # Weights are initialized uniformly by default. lex = Lexicon.fromstring(r""" :- N the => N/N {\x.unique(x)} ball => N {\x.has_shape(x,sphere)} """, ontology, include_semantics=True) ####### # Execute on a scene. scene = { "objects": [ Object("sphere", "big", "rubber"), Object("cube", "small", "metal"), Object("cylinder", "small", "rubber"), ] }