def token_label(self, token: MonadicToken, _loc: int = None): form = 'BOW__%s' % token.form() if form in self.suffixes: return form for x in range(5, len(form)): if form[x:] in self.suffixes: return form[x:] return 'UNKNOWN'
def token_label(self, token: MonadicToken, _loc: int = None): pos = token.pos() form = token.form().lower() if (form, pos) not in self.__form_pos_combinations: form = self.__UNK if pos in self.__add_morph: feats = map(lambda x: tuple(x.split('=')), token.feats().split('|')) for feat in feats: if feat[0] in self.__add_morph[pos]: form += '#' + feat[0] + ':' + feat[1] return form + '-:-' + pos
def token_label(self, token: MonadicToken, _loc: int = None): word = token.form() # adapted from discodop if YEARRE.match(word): return '1970' elif NUMBERRE.match(word): return '000' elif word in self.lexicon: return word elif self.test_mode and word.lower() in self.lexicon: return word.lower() else: sig = unknownword4(word, _loc, self.lexicon) if sig in self.sigs: return sig else: return UNK
def token_label(self, token: MonadicToken, _loc: int = None): pos = token.pos() form = token.form().lower() if (form, pos) in self.__form_pos_combinations: form = self.__UNK return form + '-:-' + pos
def token_label(self, token: MonadicToken, _loc: int = None): form = token.form().lower() if self.__terminal_counts.get(form, 0) < self.__threshold: form = self.__UNK return form
def token_label(self, token: MonadicToken, _loc: int = None): return token.form()