def preprocess_file(file_name): sf = open(server.short_file_path(file_name), 'w') output_file = open(server.count_file_path(file_name), 'w') z = zipfile.ZipFile(server.zip_file_path(file_name), 'r') internal_name = z.namelist()[0] f = z.open(internal_name) word = '' word2 = '' count = 0 count_pair = 0 for line in f: tokens = line.split() if tokens[0] != word: append_word_to_file(output_file, word, count) append_word_to_short_file(sf, word, word2, count_pair) count = 0 word = tokens[0] word2 = tokens[1] count_pair = 0 elif tokens[1] != word2: append_word_to_short_file(sf, word, word2, count_pair) word2 = tokens[1] count_pair = 0 count += int(tokens[3]) count_pair += int(tokens[3]) append_word_to_file(output_file, word, count) append_word_to_short_file(sf, word, word2, count_pair) f.close() sf.close() z.close() output_file.close()
def get_counts_from_file(file_name, word): file = open(server.count_file_path(file_name), 'r') count = 0 for line in file: (candidate_word, candidate_count) = line.split() if candidate_word == word: count += int(candidate_count) elif is_word_less_than_candidate(word, candidate_word): break file.close() return count