def preprocess_file(file_name): sf = open(server.short_file_path(file_name), 'w') output_file = open(server.count_file_path(file_name), 'w') z = zipfile.ZipFile(server.zip_file_path(file_name), 'r') internal_name = z.namelist()[0] f = z.open(internal_name) word = '' word2 = '' count = 0 count_pair = 0 for line in f: tokens = line.split() if tokens[0] != word: append_word_to_file(output_file, word, count) append_word_to_short_file(sf, word, word2, count_pair) count = 0 word = tokens[0] word2 = tokens[1] count_pair = 0 elif tokens[1] != word2: append_word_to_short_file(sf, word, word2, count_pair) word2 = tokens[1] count_pair = 0 count += int(tokens[3]) count_pair += int(tokens[3]) append_word_to_file(output_file, word, count) append_word_to_short_file(sf, word, word2, count_pair) f.close() sf.close() z.close() output_file.close()
def word_from_file(file_name, word, index): f = open(server.short_file_path(file_name), 'r') for line in f: tokens = line.split() if tokens[0] == word: times = int(tokens[2]) if times >= index: return tokens[1] else: index -= times elif count_word.is_word_less_than_candidate(word, tokens[0]): break f.close() z.close()