corpus = [] corpus_bigrams = [] corpus_trigrams = [] corpus_fourgrams = [] corpus_fivegrams = [] for doc in recurse_dir(r'./corpus', '*.txt'): doc_file = open(doc, 'rb') doc = doc_file.read() doc_words = [] bi_grams = [] tri_grams = [] four_grams = [] five_grams = [] cor = Corpus(doc, doc_words, bi_grams, tri_grams, four_grams, five_grams) cor.generate_location_vector(cor.parse_xml(), [0]) doc_count += 1 doc_word_count = len(doc_words) doc_word_count_list.append(doc_word_count) corpus_words.append(doc_words) doc_bi_gram_count = len(bi_grams) doc_bi_gram_count_list.append(doc_bi_gram_count) corpus_bi_grams.append(bi_grams) doc_tri_gram_count = len(tri_grams) doc_tri_gram_count_list.append(doc_tri_gram_count) corpus_tri_grams.append(tri_grams) doc_four_gram_count = len(four_grams)