def fromstring(cls, lex_str, ontology=None, include_semantics=False, default_weight=0.001): """ Convert string representation into a lexicon for CCGs. """ ccg_lexicon.CCGVar.reset_id() primitives, starts = [], [] families = {} entries = defaultdict(list) for line in lex_str.splitlines(): # Strip comments and leading/trailing whitespace. line = COMMENTS_RE.match(line).groups()[0].strip() if line == "": continue if line.startswith(':-'): # A line of primitive categories. # The first one is the target category # ie, :- S, N, NP, VP primitives = primitives + [prim.strip() for prim in line[2:].strip().split(',')] # But allow multiple target categories separated by a colon in the first element: # ie, :- S:N,NP,VP starts = primitives[0].split(":") primitives = starts + primitives[1:] else: # Either a family definition, or a word definition (ident, sep, rhs) = LEX_RE.match(line).groups() (catstr, semantics_str, weight) = RHS_RE.match(rhs).groups() (cat, var) = ccg_lexicon.augParseCategory(catstr, primitives, families) if sep == '::': # Family definition # ie, Det :: NP/N families[ident] = (cat, var) # TODO weight? else: semantics = None if include_semantics is True: if semantics_str is None: raise AssertionError(line + " must contain semantics because include_semantics is set to True") else: semantics = l.Expression.fromstring(ccg_lexicon.SEMANTICS_RE.match(semantics_str).groups()[0]) # Assign types. if ontology is not None: ontology.typecheck(semantics) weight = float(weight[1:-1]) if weight is not None else default_weight weight = Parameter(T.tensor(weight, requires_grad=True)) # Word definition # ie, which => (N\N)/(S/NP) entries[ident].append(Token(ident, cat, semantics, weight=weight)) return cls(starts, primitives, families, entries, has_semantics=include_semantics, ontology=ontology)
def parse_category(self, cat_str): return ccg_lexicon.augParseCategory(cat_str, self._primitives, self._families)[0]
def test_case(cat, expected): eq_(get_semantic_arity( augParseCategory(cat, lex._primitives, lex._families)[0]), expected, msg=str(cat))