Example #1
0
def build_word_count_from_corpus(lines_max=100000000000):
    """
    :param lines_max: maximum lines to consider, default is all
    :return: Counter object with count of all processed words
    """
    word_counter = Counter()
    for line_num, line in enumerate(read_corpus_line_by_line()):
        if line_num >= lines_max:
            return word_counter
        add_word_count_to_counter(line, word_counter)
    return word_counter
Example #2
0
 def setUp(self):
     self.stream = read_corpus_line_by_line()