Ejemplo n.º 1
0
def main(file_name):
    """Given the file name, print n-grams frequencies
    String -> None"""
    text = TextCleaner()
    ngrams = NgramFrequencies()
    text.read_file(file_name)
    for i in range(0, len(text.lines)):
        text.pre_process(text.lines[i])

    for word_per_list in text.word_list:
        ngrams.fill_in_dic(word_per_list)

    ngrams_list = [
        ngrams.unigrams_dic, ngrams.bigrams_dic, ngrams.trigrams_dic
    ]
    ngrams_name_list = ["unigrams", "bigrams", "trigrams"]
    for i in range(3):
        grams_top = ngrams.top_n_grams(ngrams_list[i], 10)
        print_output(grams_top, ngrams_name_list[0])
Ejemplo n.º 2
0
def test_fill_in_dic():
    """test filling in three dictionary"""
    ngrams = NgramFrequencies()
    word_per_list = ["time", "burton's", "corpse", "bride"]
    ngrams.fill_in_dic(word_per_list)
    assert ngrams.unigrams_dic == {
        "COUNT": 4,
        "time": 1,
        "burton's": 1,
        "corpse": 1,
        "bride": 1
    }
    assert ngrams.bigrams_dic == {
        "COUNT": 3,
        "time_burton's": 1,
        "burton's_corpse": 1,
        "corpse_bride": 1
    }
    assert ngrams.trigrams_dic == {
        "COUNT": 2,
        "time_burton's_corpse": 1,
        "burton's_corpse_bride": 1
    }