def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") in (PENN, None): kwargs.setdefault("map", lambda token, tag: stts2penntreebank(token, tag)) if kwargs.get("tagset") == UNIVERSAL: kwargs.setdefault("map", lambda token, tag: stts2universal(token, tag)) if kwargs.get("tagset") is STTS: kwargs.setdefault("map", lambda token,tag: (token, tag)) # The lexicon uses Swiss spelling: "ss" instead of "ß". # We restore the "ß" after parsing. tokens_ss = [t.replace(u"ß", "ss") for t in tokens] tokens_ss = _Parser.find_tags(self, tokens_ss, **kwargs) return [[w] + tokens_ss[i][1:] for i, w in enumerate(tokens)]
def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") in (PENN, None): kwargs.setdefault("map", lambda token, tag: stts2penntreebank(token, tag)) if kwargs.get("tagset") == UNIVERSAL: kwargs.setdefault("map", lambda token, tag: stts2universal(token, tag)) if kwargs.get("tagset") is STTS: kwargs.setdefault("map", lambda token, tag: (token, tag)) # The lexicon uses Swiss spelling: "ss" instead of "ß". # We restore the "ß" after parsing. tokens_ss = [t.replace(u"ß", "ss") for t in tokens] tokens_ss = _Parser.find_tags(self, tokens_ss, **kwargs) return [[w] + tokens_ss[i][1:] for i, w in enumerate(tokens)]
def find_tokens(self, tokens, **kwargs): kwargs.setdefault("abbreviations", ABBREVIATIONS) kwargs.setdefault("replace", {}) return _Parser.find_tokens(self, tokens, **kwargs)