예제 #1
0
 def token_label(self, token: MonadicToken, _loc: int = None):
     form = 'BOW__%s' % token.form()
     if form in self.suffixes:
         return form
     for x in range(5, len(form)):
         if form[x:] in self.suffixes:
             return form[x:]
     return 'UNKNOWN'
예제 #2
0
 def token_label(self, token: MonadicToken, _loc: int = None):
     pos = token.pos()
     form = token.form().lower()
     if (form, pos) not in self.__form_pos_combinations:
         form = self.__UNK
         if pos in self.__add_morph:
             feats = map(lambda x: tuple(x.split('=')),
                         token.feats().split('|'))
             for feat in feats:
                 if feat[0] in self.__add_morph[pos]:
                     form += '#' + feat[0] + ':' + feat[1]
     return form + '-:-' + pos
예제 #3
0
    def token_label(self, token: MonadicToken, _loc: int = None):
        word = token.form()

        # adapted from discodop
        if YEARRE.match(word):
            return '1970'
        elif NUMBERRE.match(word):
            return '000'
        elif word in self.lexicon:
            return word
        elif self.test_mode and word.lower() in self.lexicon:
            return word.lower()
        else:
            sig = unknownword4(word, _loc, self.lexicon)
            if sig in self.sigs:
                return sig
            else:
                return UNK
예제 #4
0
 def token_label(self, token: MonadicToken, _loc: int = None):
     pos = token.pos()
     form = token.form().lower()
     if (form, pos) in self.__form_pos_combinations:
         form = self.__UNK
     return form + '-:-' + pos
예제 #5
0
 def token_label(self, token: MonadicToken, _loc: int = None):
     form = token.form().lower()
     if self.__terminal_counts.get(form, 0) < self.__threshold:
         form = self.__UNK
     return form
예제 #6
0
 def token_label(self, token: MonadicToken, _loc: int = None):
     return token.form()