Esempio n. 1
0
def test_add_items():
    """test adding a n-gram"""
    ngrams = NgramFrequencies()
    assert "the" not in ngrams.unigrams_dic
    ngrams.add_item("the", ngrams.unigrams_dic)
    assert ngrams.unigrams_dic["the"] == 1
    ngrams.add_item("the", ngrams.unigrams_dic)
    assert ngrams.unigrams_dic["the"] == 2
    assert ngrams.unigrams_dic["COUNT"] == 2
Esempio n. 2
0
def main(filename):
    tc = TextCleaner("corpse_bride.txt")
    list_of_sentences = tc.read_file()

    RANK = 10

    unigram = NgramFrequencies(RANK)
    bigram = NgramFrequencies(RANK)
    trigram = NgramFrequencies(RANK)

    for sentence in list_of_sentences:
        words = sentence.split()
        for i in range(len(words)):
            unigram.add_item(words[i])
            if i < len(words) - 1:
                bigram.add_item(words[i] + "_" + words[i + 1])
            if i < len(words) - 2:
                trigram.add_item(words[i] + "_" + words[i + 1] + "_" +
                                 words[i + 2])

    print("Top 10 unigrams:")
    print(unigram.top_n_freqs())
    print("Top 10 bigrams:")
    print(bigram.top_n_freqs())
    print("Top 10 trigrams:")
    print(trigram.top_n_freqs())
Esempio n. 3
0
def test_ngram_frequencies():
    nf = NgramFrequencies(2)
    assert nf.add_item("He_is") == 1
    assert nf.add_item("He_is") == 2
    assert nf.add_item("He_is") == 3
    assert nf.add_item("I_am") == 1
    assert nf.add_item("I_am") == 2
    assert nf.add_item("I_am") == 3
    assert nf.add_item("I_am") == 4
    assert nf.add_item("I_am") == 5
    assert nf.add_item("I_am") == 6
    assert nf.add_item("They_are") == 1

    assert nf.frequency("They_are") == 0.1

    assert nf.top_n_counts() == [("I_am", 6), ("He_is", 3)]
    assert nf.top_n_freqs() == [("I_am", 0.6), ("He_is", 0.3)]
Esempio n. 4
0
def test_add_item():
    '''Test the add item method'''
    ngram_freq = NgramFrequencies(1, '')
    ngram_freq.add_item('ab')
    assert ngram_freq.dic_ngram['ab'] == 1