def build_word_count_from_corpus(lines_max=100000000000): """ :param lines_max: maximum lines to consider, default is all :return: Counter object with count of all processed words """ word_counter = Counter() for line_num, line in enumerate(read_corpus_line_by_line()): if line_num >= lines_max: return word_counter add_word_count_to_counter(line, word_counter) return word_counter
def setUp(self): self.stream = read_corpus_line_by_line()