Example #1
0
 def find_tags(self, tokens, **kwargs):
     if kwargs.get("tagset") in (PENN, None):
         kwargs.setdefault("map", lambda token, tag: stts2penntreebank(token, tag))
     if kwargs.get("tagset") == UNIVERSAL:
         kwargs.setdefault("map", lambda token, tag: stts2universal(token, tag))
     if kwargs.get("tagset") is STTS:
         kwargs.setdefault("map", lambda token,tag: (token, tag))
     # The lexicon uses Swiss spelling: "ss" instead of "ß".
     # We restore the "ß" after parsing.
     tokens_ss = [t.replace(u"ß", "ss") for t in tokens]
     tokens_ss = _Parser.find_tags(self, tokens_ss, **kwargs)
     return [[w] + tokens_ss[i][1:] for i, w in enumerate(tokens)]
Example #2
0
 def find_tags(self, tokens, **kwargs):
     if kwargs.get("tagset") in (PENN, None):
         kwargs.setdefault("map",
                           lambda token, tag: stts2penntreebank(token, tag))
     if kwargs.get("tagset") == UNIVERSAL:
         kwargs.setdefault("map",
                           lambda token, tag: stts2universal(token, tag))
     if kwargs.get("tagset") is STTS:
         kwargs.setdefault("map", lambda token, tag: (token, tag))
     # The lexicon uses Swiss spelling: "ss" instead of "ß".
     # We restore the "ß" after parsing.
     tokens_ss = [t.replace(u"ß", "ss") for t in tokens]
     tokens_ss = _Parser.find_tags(self, tokens_ss, **kwargs)
     return [[w] + tokens_ss[i][1:] for i, w in enumerate(tokens)]
Example #3
0
 def find_tokens(self, tokens, **kwargs):
     kwargs.setdefault("abbreviations", ABBREVIATIONS)
     kwargs.setdefault("replace", {})
     return _Parser.find_tokens(self, tokens, **kwargs)
Example #4
0
 def find_tokens(self, tokens, **kwargs):
     kwargs.setdefault("abbreviations", ABBREVIATIONS)
     kwargs.setdefault("replace", {})
     return _Parser.find_tokens(self, tokens, **kwargs)