Beispiel #1
0
  def fromstring(cls, lex_str, ontology=None, include_semantics=False,
                 default_weight=0.001):
    """
    Convert string representation into a lexicon for CCGs.
    """
    ccg_lexicon.CCGVar.reset_id()
    primitives, starts = [], []
    families = {}
    entries = defaultdict(list)
    for line in lex_str.splitlines():
      # Strip comments and leading/trailing whitespace.
      line = COMMENTS_RE.match(line).groups()[0].strip()
      if line == "":
        continue

      if line.startswith(':-'):
        # A line of primitive categories.
        # The first one is the target category
        # ie, :- S, N, NP, VP
        primitives = primitives + [prim.strip() for prim in line[2:].strip().split(',')]

        # But allow multiple target categories separated by a colon in the first element:
        # ie, :- S:N,NP,VP
        starts = primitives[0].split(":")
        primitives = starts + primitives[1:]
      else:
        # Either a family definition, or a word definition
        (ident, sep, rhs) = LEX_RE.match(line).groups()
        (catstr, semantics_str, weight) = RHS_RE.match(rhs).groups()
        (cat, var) = ccg_lexicon.augParseCategory(catstr, primitives, families)

        if sep == '::':
          # Family definition
          # ie, Det :: NP/N
          families[ident] = (cat, var)
          # TODO weight?
        else:
          semantics = None
          if include_semantics is True:
            if semantics_str is None:
              raise AssertionError(line + " must contain semantics because include_semantics is set to True")
            else:
              semantics = l.Expression.fromstring(ccg_lexicon.SEMANTICS_RE.match(semantics_str).groups()[0])

              # Assign types.
              if ontology is not None:
                ontology.typecheck(semantics)

          weight = float(weight[1:-1]) if weight is not None else default_weight
          weight = Parameter(T.tensor(weight, requires_grad=True))

          # Word definition
          # ie, which => (N\N)/(S/NP)
          entries[ident].append(Token(ident, cat, semantics, weight=weight))
    return cls(starts, primitives, families, entries, has_semantics=include_semantics,
               ontology=ontology)
Beispiel #2
0
 def parse_category(self, cat_str):
     return ccg_lexicon.augParseCategory(cat_str, self._primitives,
                                         self._families)[0]
Beispiel #3
0
 def test_case(cat, expected):
     eq_(get_semantic_arity(
         augParseCategory(cat, lex._primitives, lex._families)[0]),
         expected,
         msg=str(cat))