def find_lemmata(tokens):
    """ Annotates the tokens with lemmata for plural nouns and conjugated verbs,
        where each token is a [word, part-of-speech] list.
    """
    for token in tokens:
        word, pos, lemma = token[0], token[1], token[0]
        if pos.startswith(("DT", "PR", "WP")):
            lemma = singularize(word, pos=pos)
        if pos.startswith(("RB", "IN")) and (word.endswith(("'", u"’")) or word == "du"):
            lemma = singularize(word, pos=pos)
        if pos.startswith(("JJ",)):
            lemma = predicative(word)
        if pos == "NNS":
            lemma = singularize(word)
        if pos.startswith(("VB", "MD")):
            lemma = conjugate(word, INFINITIVE) or word
        token.append(lemma.lower())
    return tokens
Exemple #2
0
def find_lemmata(tokens):
    """ Annotates the tokens with lemmata for plural nouns and conjugated verbs,
        where each token is a [word, part-of-speech] list.
    """
    for token in tokens:
        word, pos, lemma = token[0], token[1], token[0]
        if pos.startswith(("DT", "PR", "WP")):
            lemma = singularize(word, pos=pos)
        if pos.startswith(("RB", "IN")) and (word.endswith(("'", u"’")) or word == "du"):
            lemma = singularize(word, pos=pos)
        if pos.startswith(("JJ",)):
            lemma = predicative(word)
        if pos == "NNS":
            lemma = singularize(word)
        if pos.startswith(("VB", "MD")):
            lemma = conjugate(word, INFINITIVE) or word
        token.append(lemma.lower())
    return tokens