def merge_sort_stupid(fin: io.BufferedIOBase, fout: io.BufferedIOBase, memory_size: int, left=0, count=None): fout.seek(0) if count is None: count = content_length(fin, preserve_pos=False) if count <= memory_size: go_to_pos(fin, left) write_content(fout, sorted(read_content(fin, count=count)), batch_size=memory_size) return with tmp_file() as left_f, tmp_file() as right_f: merge_sort_stupid(fin, left_f, memory_size, left, count=count // 2) merge_sort_stupid(fin, right_f, memory_size, left + count // 2, count=count - count // 2) left_f.seek(0) right_f.seek(0) write_content(fout, heapq.merge(read_content(left_f, batch_size=memory_size // 2), read_content(right_f, batch_size=memory_size // 2)), batch_size=memory_size)
def _to_sorted_blocks(fin: io.BufferedIOBase, memory_size): while True: sorted_values = sorted(read_content(fin, memory_size)) if not sorted_values: break f = tmp_file() write_content(f, sorted_values) f.close() yield f
def merge_sort_k_blocks_two_passes(fin: io.BufferedIOBase, fout: io.BufferedIOBase, memory_size: int): tmp_files = list(_to_sorted_blocks(fin, memory_size)) if len(tmp_files) < 10: larger_tmp_files = tmp_files else: larger_tmp_files = [] larger_blocks_count = int(sqrt(len(tmp_files))) for i in range(0, len(tmp_files), larger_blocks_count): larger_f = tmp_file() _merge_blocks(tmp_files[i:min(len(tmp_files), i + larger_blocks_count)], larger_f, memory_size) larger_f.close() larger_tmp_files.append(larger_f) _merge_blocks(larger_tmp_files, fout, memory_size)
def _test_simple(self, values, sort_f, memory_size=None): with tmp_file() as input_file, tmp_file() as output_file: write_content(input_file, values) input_file.seek(0) sort_f(input_file, output_file, memory_size=self._memory_size if memory_size is None else memory_size) self._check_sorted(input_file, output_file)