Beispiel #1
0
 def test_build_word_count_from_corpus_two_lines(self):
     expected = [('of', 1), ('ebook', 1), ('carroll', 1), ('alice', 1),
                 ('project', 1), ('gutenberg', 1), ('lewis', 1), ('in', 1),
                 ('wonderland', 1), ('the', 1), ('by', 1)]
     word_count = build_word_count_from_corpus(lines_max=2)
     most_common = word_count.most_common()
     self.assertEquals(most_common, expected)
Beispiel #2
0
def get_word_data():
    """
    :return: dictionary that has word_count and fragment lookup
    """
    word_count = build_word_count_from_corpus()
    frequency_min = parameters['min_frequency_word_to_fragment']
    word_count_smaller = {word: count for word, count in word_count.iteritems()
                          if count >= frequency_min}
    fragment_lookup = word_list_to_fragment_lookup(word_count_smaller.keys())
    word_data = {'word_count': word_count, 'fragment_lookup': fragment_lookup}
    return word_data
Beispiel #3
0
def get_word_data():
    """
    :return: dictionary that has word_count and fragment lookup
    """
    word_count = build_word_count_from_corpus()
    frequency_min = parameters['min_frequency_word_to_fragment']
    word_count_smaller = {
        word: count
        for word, count in word_count.iteritems() if count >= frequency_min
    }
    fragment_lookup = word_list_to_fragment_lookup(word_count_smaller.keys())
    word_data = {'word_count': word_count, 'fragment_lookup': fragment_lookup}
    return word_data
Beispiel #4
0
 def test_build_word_count_from_corpus_two_lines(self):
     expected = [('of', 1),
                 ('ebook', 1),
                 ('carroll', 1),
                 ('alice', 1),
                 ('project', 1),
                 ('gutenberg', 1),
                 ('lewis', 1),
                 ('in', 1),
                 ('wonderland', 1),
                 ('the', 1),
                 ('by', 1)]
     word_count = build_word_count_from_corpus(lines_max=2)
     most_common = word_count.most_common()
     self.assertEquals(most_common, expected)