def sort_data(self, filename, chunk_filename_format): total_chunks = mcm.get_total_chunk(filename, self.chunk_memory) file_splitter = FileSplitter(filename, chunk_filename_format) file_splitter.split(self.chunk_memory) file_merger = FileMerger(MergeSortFile(total_chunks)) buffer_merge_size = self.chunk_memory / (total_chunks) output_filename = filename.replace(".txt", "") + '_sorted.txt' file_merger.merge(file_splitter.get_chunk_filenames(), output_filename, buffer_merge_size) if self.cleanup: mcm.cleanup_chunk_file(FileSplitter.chunk_filenames)
for i in range(total_chunk): file = open(blacklist_temp_format.format(i), "r") lines = [line.split() for line in file.readlines()] item_point = bisect.bisect( lines, check_data) # binary search for each chunk file if check_data in lines[item_point - 1:item_point]: is_blacklisted = True file.close() break return is_blacklisted # Global variables chunk_memory = mcm.get_parsed_memory("1k") chunk_filenames = [] blacklist = "blacklist.txt" blacklist_temp_format = 'blacklist_{0}.txt' total_chunk = mcm.get_total_chunk(blacklist, chunk_memory) if __name__ == "__main__": initialize(blacklist) # Demo print(check_blacklist("Siska", "712454")) print(check_blacklist("Melisa", "8565467")) print(check_blacklist("Jokho", "081235")) print() mcm.cleanup_chunk_file(chunk_filenames)
for i in range(total_chunk): file = open(blacklist_temp_format.format(i), "r") lines = [line.split() for line in file.readlines()] item_point = bisect.bisect(lines, check_data) # binary search for each chunk file if check_data in lines[item_point-1 : item_point]: is_blacklisted = True file.close() break return is_blacklisted # Global variables chunk_memory = mcm.get_parsed_memory("1k") chunk_filenames = [] blacklist = "blacklist.txt" blacklist_temp_format = 'blacklist_{0}.txt' total_chunk = mcm.get_total_chunk(blacklist, chunk_memory) if __name__ == "__main__": initialize(blacklist) # Demo print(check_blacklist("Siska", "712454")) print(check_blacklist("Melisa", "8565467")) print(check_blacklist("Jokho", "081235")) print() mcm.cleanup_chunk_file(chunk_filenames)