Ejemplo n.º 1
0
def test_parse_typechecking_complex():
  """
  Chart parser linked to an ontology should (by default) not produce
  sentence-level LFs which fail typechecks.
  """
  types = TypeSystem(["object", "boolean"])
  functions = [
    types.new_function("unique", (("object", "boolean"), "object"), lambda objs: [x for x, v in objs.items() if v][0]),
    types.new_function("big", ("object", "boolean"), lambda o: o['size'] == "big"),
    types.new_function("box", ("object", "boolean"), lambda o: o["shape"] == "box"),
    types.new_function("and_", ("boolean", "boolean", "boolean"), lambda a, b: a and b),
    types.new_function("apply", (("object", "boolean"), "object", "boolean"), lambda f, o: f(o)),
  ]
  constants = []
  ontology = Ontology(types, functions, constants)

  lex = Lexicon.fromstring(r"""
  :- S, N

  the => S/N {unique}
  the => N/N {unique}
  big => N/N {\f x.and_(apply(f,x),big(x))}
  box => N {box}
  """, ontology=ontology, include_semantics=True)

  parser = WeightedCCGChartParser(lex, ruleset=ApplicationRuleSet)

  parses = parser.parse("the the big box".split())
  eq_(len(parses), 0, "Should disallow non-typechecking parses for 'the the big box'")
Ejemplo n.º 2
0
def test_get_derivation_tree():
  lex = Lexicon.fromstring(r"""
  :- S, N

  John => N
  saw => S\N/N
  Mary => N
  """)

  parser = WeightedCCGChartParser(lex, ruleset=DefaultRuleSet)
  top_parse = parser.parse("Mary saw John".split())[0]

  from io import StringIO
  stream = StringIO()
  get_clean_parse_tree(top_parse).pretty_print(stream=stream)

  eq_([line.strip() for line in stream.getvalue().strip().split("\n")],
      [line.strip() for line in r"""
         S
  _______|_______
 |             (S\N)
 |        _______|____
 N   ((S\N)/N)        N
 |       |            |
Mary    saw          John""".strip().split("\n")])
Ejemplo n.º 3
0
    def make_initial_lexicons(self, ontology, groundtruth=False):
        initial_lex = r"""
        :- S, N
        """

        if groundtruth:
            initial_lex += r"""
            any => S/N {\x.exist_(x)}
            object => N {scene}
            """

            for concept in self.all_attribute_concepts:
                if self.concept2attribute[concept] == 'shape':
                    initial_lex += r"%% => N {(\x.filter(x, %%))(scene)}".replace('%%', concept) + '\n'
                else:
                    initial_lex += r"%% => N/N {\x.filter(x, %%)}".replace('%%', concept) + '\n'
        else:
            initial_lex += r"""
            _dummy_verb => S/N {\x.exist_(x)}
            _dummy_adj => N/N {\x.filter(x, concept_000001)}
            _dummy_noun => N {scene}
            """

        initial_lex = Lexicon.fromstring(initial_lex, ontology, include_semantics=True)

        return initial_lex
Ejemplo n.º 4
0
def test_parse_oblique_raised():
  lex = Lexicon.fromstring(r"""
  :- S, NP, PP

  place => S/NP/(PP/NP)/NP
  it => NP
  on => PP/NP
  the_table => NP
  """)

  parser = WeightedCCGChartParser(lex, DefaultRuleSet)
  printCCGDerivation(parser.parse("place it on the_table".split())[0])
Ejemplo n.º 5
0
def test_parse_oblique():
  """
  Test parsing a verb with an oblique PP -- this shouldn't require type raising?
  """

  lex = Lexicon.fromstring(r"""
  :- S, NP, PP

  place => S/PP/NP
  it => NP
  on => PP/NP
  the_table => NP
  """)

  parser = WeightedCCGChartParser(lex, ApplicationRuleSet)
  printCCGDerivation(parser.parse("place it on the_table".split())[0])
Ejemplo n.º 6
0
def _make_lexicon_with_derived_category():
  lex = Lexicon.fromstring(r"""
  :- S, NP

  the => S/NP {\x.unique(x)}

  foo => NP {\x.foo(x)}
  bar => NP {\x.bar(x)}
  baz => NP {\x.baz(x)}
  """, include_semantics=True)
  old_lex = lex.clone()

  # Induce a derived category involving `foo` and `bar`.
  involved_tokens = [lex._entries["foo"][0], lex._entries["bar"][0]]
  derived_categ = lex.add_derived_category(involved_tokens)

  return old_lex, lex, involved_tokens, derived_categ
Ejemplo n.º 7
0
def test_parse_typechecking():
    """
  Chart parser linked to an ontology should (by default) not produce
  sentence-level LFs which fail typechecks.
  """
    types = TypeSystem(["agent", "action", "object"])
    functions = [
        types.new_function("see", ("agent", "agent", "action"), lambda a, b:
                           ("see", a, b)),
        types.new_function("request", ("agent", "object", "action"),
                           lambda a, b: ("request", a, b)),
    ]
    constants = [
        types.new_constant("john", "agent"),
        types.new_constant("mary", "agent"),
        types.new_constant("help", "object")
    ]
    ontology = Ontology(types, functions, constants)

    lex = Lexicon.fromstring(r"""
  :- S, N

  John => N {john}
  saw => S\N/N {see}
  saw => S\N/N {request}
  requested => S\N/N {request}
  Mary => N {mary}
  """,
                             ontology=ontology,
                             include_semantics=True)

    parser = WeightedCCGChartParser(lex, ruleset=ApplicationRuleSet)

    parses = parser.parse("Mary saw John".split())
    parse_lfs = [str(parse.label()[0].semantics()) for parse in parses]
    from pprint import pprint
    pprint(parse_lfs)

    ok_(
        r"see(john,mary)" in parse_lfs,
        "Parses of 'Mary saw John' should include typechecking see(john,mary)")
    ok_(
        r"request(john,mary)" not in parse_lfs,
        "Parses of 'Mary saw John' should not include non-typechecking request(john,mary)"
    )
Ejemplo n.º 8
0
def _make_mock_lexicon():
    types = TypeSystem(["obj", "boolean"])
    functions = [
        types.new_function("unique", (("obj", "boolean"), "obj"),
                           lambda x: x[0]),
        types.new_function("twoplace", ("boolean", ("obj", "boolean"), "obj"),
                           lambda a, b: b[0]),
        types.new_function("dog", ("obj", "boolean"), lambda x: x["dog"]),
        types.new_function("not_", ("boolean", "boolean"), lambda a: not a),
        types.new_function("enlarge", ("obj", "obj"), lambda x: x),
    ]
    constants = [types.new_constant("true", "boolean")]
    ontology = Ontology(types, functions, constants)

    lex = Lexicon.fromstring(r"""
    :- S, N

    the => N/N {\x.unique(x)}
    thee => N\N {\x.unique(x)}
    twoplace => N/N {\x.twoplace(true,x)}
    twoplacee => N\N {\x.twoplace(true,x)}
    abc => N/N {\a.not_(a)}
    def => N/N {\b.not_(b)}
    qrs => N/N {\a.enlarge(a)}
    tuv => N/N {\b.enlarge(b)}
    twoarg => N/N/N {\a b.twoplace(a,b)}
    doggish => N/N {\x.dog(x)}
    dog => N {dog}

    # NB, won't typecheck
    cat => N {unique}
    """,
                             ontology=ontology,
                             include_semantics=True)

    # TODO hack: this needs to be integrated into lexicon construction..
    for w in ["the", "twoplace", "thee", "twoplacee"]:
        e = lex._entries[w][0]
        sem = e.semantics()
        tx = lex.ontology.infer_type(sem, "x")
        sem.variable.type = tx
        lex.ontology.typecheck(sem)

    return lex
Ejemplo n.º 9
0
def test_parse_with_derived_root_category():
  """
  Ensure that we can parse with a derived category whose base is the root
  category.
  """
  lex = Lexicon.fromstring(r"""
      :- S, N
      the => S/N {\x.unique(x)}
      foo => N {\x.foo(x)}
      """, include_semantics=True)

  involved_tokens = [lex._entries["the"][0]]
  derived_categ = lex.add_derived_category(involved_tokens)
  lex.propagate_derived_category(derived_categ)
  derived_categ_obj, _ = lex._derived_categories[derived_categ]

  results = WeightedCCGChartParser(lex).parse("the foo".split())
  eq_(set(str(result.label()[0].categ()) for result in results),
      {"S", str(derived_categ_obj)})
Ejemplo n.º 10
0
lexicon = Lexicon.fromstring(r"""
  :- S, N

  the => S/N {\x.unique(x)}
  the => N/N {\x.unique(x)}
  the => S/N {\x.x}
  the => N/N {\x.x}

  object => N {scene}
  objects => N {scene}

  metallic => N/N {\x.filter(material,x,metal)}
  shiny => N/N {\x.filter(material,x,metal)}

  big => N/N {\x.filter(size,x,large)}

  purple => N/N {\x.filter(color,x,purple)}

  material => N {material}
  shape => N {shape}
  color => N {color}
  size => N {size}

  same => N/N/N {\a o.same(a,o)}
  as => N/N {\x.x}

  with => S\N/N {\p x.filter_(x,p)}
  of => N\N/N {\o a.query(a,o)}
  that => N\N/S {\p x.filter_(x,p)}

  how_many => S/S/N {\x p.count(x,p)}
  what_number_of => S/S/N {\x p.count(x,p)}

  what => S/S {\x.x}
  is => S/N {\x.x}
  is => S/S {\x.x}
  are => S/N {\x.x}
  are => S/S {\x.x}

  # TODO this is wrong -- actually a very complicated and interesting operator..
  # Need to rule out the particular object given in the complement
  # in "what number of other objects are the same size as the purple shiny object"
  other => N/N {\x.x}
""",
                             ontology,
                             include_semantics=True)
Ejemplo n.º 11
0
initial_puddleworld_lex = Lexicon.fromstring(r"""
  :- S:N

  reach => S/N {\x.move(x)}
  reach => S/N {\x.move(unique(x))}
  below => S/N {\x.move(unique(\y.relate(y,x,down)))}
  above => S/N {\x.move(unique(\y.relate(y,x,up)))}

  , => S\S/S {\a b.a}
  , => S\S/S {\a b.b}

  of => N\N/N {\x d y.relate(x,y,d)}
  of => N\N/N {\x d y.relate(unique(x),d,y)}
  to => N\N/N {\x y.x}

  one => S/N/N {\d x.move(unique(\y.relate(y,x,d)))}
  one => S/N/N {\d x.move(unique(\y.relate_n(y,x,d,1)))}
  right => N/N {\f x.and_(apply(f, x),in_half(x,right))}

  most => N\N/N {\x d.max_in_dir(x, d)}

  the => N/N {\x.unique(x)}

  left => N {left}
  below => N {down}
  above => N {up}
  right => N {right}
  horse => N {\x.horse(x)}
  rock => N {\x.rock(x)}
  rock => N {unique(\x.rock(x))}
  cell => N {\x.true}
  spade => N {\x.spade(x)}
  spade => N {unique(\x.spade(x))}
  heart => N {\x.heart(x)}
  heart => N {unique(\x.heart(x))}
  circle => N {\x.circle(x)}
  # triangle => N {\x.triangle(x)}
""", ec_ontology, include_semantics=True)
Ejemplo n.º 12
0
    types.new_constant("sphere", "shape"),
    types.new_constant("cube", "shape"),
    types.new_constant("cylinder", "shape"),
    types.new_constant("true", "boolean"),
]

ontology = Ontology(types, functions, constants)

#######
# Lexicon: defines an initial set of word -> (syntax, meaning) mappings.
# Weights are initialized uniformly by default.

initial_lex = Lexicon.fromstring(r"""
  :- S, N

  any => S/N {\x.object_exists(x)}
  _dummy_noun => N {\x.true}
""",
                                 ontology,
                                 include_semantics=True)

#######
# VQA Dataset: defines the dataset.


class VQADataset(object):
    """
    A dummy dataset contains tuples of (scene, question, answer).
    Each scene contains only one objects with one of the three shapes ('sphere', 'cube' and 'cylinder').
    There are three types of questions: "any sphere", "any cube", "any cylinder".
    The answer is True if the shape of interest in the question match the shape of the visual object.
    """
Ejemplo n.º 13
0
constants = [
    types.new_constant("sphere", "shape"),
    types.new_constant("cube", "shape"),
    types.new_constant("cylinder", "shape"),
]

ontology = Ontology(types, functions, constants)

#######
# Lexicon: defines an initial set of word -> (syntax, meaning) mappings.
# Weights are initialized uniformly by default.

lex = Lexicon.fromstring(r"""
  :- N

  the => N/N {\x.unique(x)}
  ball => N {\x.has_shape(x,sphere)}
""",
                         ontology,
                         include_semantics=True)

#######
# Execute on a scene.

scene = {
    "objects": [
        Object("sphere", "big", "rubber"),
        Object("cube", "small", "metal"),
        Object("cylinder", "small", "rubber"),
    ]
}