Exemple #1
0
def merge_sort_stupid(fin: io.BufferedIOBase, fout: io.BufferedIOBase, memory_size: int, left=0, count=None):
    fout.seek(0)
    if count is None:
        count = content_length(fin, preserve_pos=False)

    if count <= memory_size:
        go_to_pos(fin, left)
        write_content(fout, sorted(read_content(fin, count=count)), batch_size=memory_size)
        return

    with tmp_file() as left_f, tmp_file() as right_f:
        merge_sort_stupid(fin, left_f, memory_size, left, count=count // 2)
        merge_sort_stupid(fin, right_f, memory_size, left + count // 2, count=count - count // 2)
        left_f.seek(0)
        right_f.seek(0)
        write_content(fout, heapq.merge(read_content(left_f, batch_size=memory_size // 2),
                                        read_content(right_f, batch_size=memory_size // 2)),
                      batch_size=memory_size)
Exemple #2
0
def _to_sorted_blocks(fin: io.BufferedIOBase, memory_size):
    while True:
        sorted_values = sorted(read_content(fin, memory_size))
        if not sorted_values:
            break

        f = tmp_file()
        write_content(f, sorted_values)
        f.close()
        yield f
Exemple #3
0
def merge_sort_k_blocks_two_passes(fin: io.BufferedIOBase, fout: io.BufferedIOBase, memory_size: int):
    tmp_files = list(_to_sorted_blocks(fin, memory_size))
    if len(tmp_files) < 10:
        larger_tmp_files = tmp_files
    else:
        larger_tmp_files = []
        larger_blocks_count = int(sqrt(len(tmp_files)))
        for i in range(0, len(tmp_files), larger_blocks_count):
            larger_f = tmp_file()
            _merge_blocks(tmp_files[i:min(len(tmp_files), i + larger_blocks_count)], larger_f, memory_size)
            larger_f.close()
            larger_tmp_files.append(larger_f)

    _merge_blocks(larger_tmp_files, fout, memory_size)
 def _test_simple(self, values, sort_f, memory_size=None):
     with tmp_file() as input_file, tmp_file() as output_file:
         write_content(input_file, values)
         input_file.seek(0)
         sort_f(input_file, output_file, memory_size=self._memory_size if memory_size is None else memory_size)
         self._check_sorted(input_file, output_file)