Esempio n. 1
0
def generate_vocabulary(counter, threshold):
    """Generate vocabulary."""
    vocab = Vocabulary()

    # Keep words that have more occurances thatn threshold
    words = sorted([word for word, cnt in counter.items() if cnt >= threshold])

    # Add words to dictionary
    for i, word in enumerate(words):
        vocab.addWord(word)

    return vocab