Beispiel #1
0
def is_oov(word):
    return TextNormalizer.normalize_word(
        word) not in known_words and len(word) > 0 and not word[0].isdigit()
Beispiel #2
0
def is_NE(word):
    nword = TextNormalizer.normalize_word(word)
    return len(word)>1 and nword not in funcwords\
           and nonstop( nword ) and word[0].isupper()
Beispiel #3
0
def is_FW(word):
    nword = TextNormalizer.normalize_word(word)
    return len(nword) > 0 and not (is_cyrword(nword) or is_digit(nword))