Esempio n. 1
0
def _run_keep(arrays_dict, buffers, buffers_to_infiles, buffer_to_outfiles):
    """
    Arguments: 
    ----------
        arrays_dict: dictionary mapping each output block index to its list of write blocks
        buffers: list of Volume objects (see utils.py) representing the read buffers. Each volume contains the coordinates of the buffer in the original image.  
        buffers_to_infiles: maps each read buffer index to the list of input blocks it crosses -> to know which input block to read
        buffer_to_outfiles: maps each read buffer index to the list of output blocks it crosses -> to only search for the write buffers linked to that read buffer
    """
    cache = dict()
    voxel_tracker = VoxelTracker()
    nb_infile_openings = 0
    nb_infile_inside_seeks = 0
    nb_buffers = len(buffers.keys())
    read_time = 0
    write_time = 0

    from monitor.monitor import Monitor
    _monitor = Monitor(enable_print=False, enable_log=False, save_data=True)
    _monitor.disable_clearconsole()
    _monitor.set_delay(5)
    _monitor.start()
    
    buffer_shape = buffers[0].get_shape()
    buffer_size = buffer_shape[0] * buffer_shape[1] * buffer_shape[2] * 2
    buffer_data = np.empty(copy.deepcopy(buffer_shape), dtype=np.float16)
    voxel_tracker.add_voxels(buffer_size)

    # for each read buffer
    for buffer_index in range(nb_buffers):
        print("\nBUFFER ", buffer_index, '/', nb_buffers)
        if DEBUG:
            print_mem_info() 
            
        buffer = buffers[buffer_index]
        nb_opening_seeks_tmp, nb_inside_seeks_tmp, t1, t2 = process_buffer(buffer_data, arrays_dict, buffers, buffer, voxel_tracker, buffers_to_infiles, buffer_to_outfiles, cache)
        
        read_time += t1
        write_time += t2
        nb_infile_openings += nb_opening_seeks_tmp
        nb_infile_inside_seeks += nb_inside_seeks_tmp

        if DEBUG:
            print("End of buffer - Memory info:")
            print_mem_info()

            if buffer_index == 1:
                sys.exit()

        buffer_data = np.empty(copy.deepcopy(buffer_shape), dtype=np.float16)

    file_manager.close_infiles()

    _monitor.stop()
    ram_pile, swap_pile = _monitor.get_mem_piles()
    return [read_time, write_time], ram_pile, swap_pile, nb_infile_openings, nb_infile_inside_seeks, voxel_tracker
def experiment(args):
    """
    Note: 
    - data type is np.float16
    """
    paths = load_json(args.paths_config)

    for k, v in paths.items():
        if "PYTHONPATH" in k:
            sys.path.insert(0, v)

    from monitor.monitor import Monitor
    from repartition_experiments.scripts_exp.exp_utils import create_empty_dir, verify_results
    from repartition_experiments.algorithms.baseline_algorithm import baseline_rechunk
    from repartition_experiments.algorithms.keep_algorithm import keep_algorithm, get_input_aggregate
    from repartition_experiments.algorithms.utils import get_file_manager
    from repartition_experiments.algorithms.clustered_reads import clustered_reads

    # setting
    paths = load_json(args.paths_config)
    cases = load_json(args.cases_config)
    bpv = 2
    indir_path, outdir_path = os.path.join(paths["ssd_path"],
                                           'indir'), os.path.join(
                                               paths["ssd_path"], 'outdir')

    # creating empty output directories
    create_empty_dir(outdir_path)
    if args.distributed:
        print(f"Distributed mode -> creating the output directories")
        for i in range(6):
            dirpath = '/disk' + str(i) + '/gtimothee'
            create_empty_dir(os.path.join(dirpath, 'output'))
    fm = get_file_manager(args.file_format)
    if args.overwrite:
        fm.remove_all(paths["ssd_path"])

    # transform cases into tuples + perform sanity check
    case = cases[args.case_name]
    for run in case:
        R, O, I, B, volumestokeep = tuple(run["R"]), tuple(run["O"]), tuple(
            run["I"]), tuple(run["B"]), run["volumestokeep"]
        if args.case_name.split('_')[0] == "case 1":
            lambd = get_input_aggregate(O, I)
            B, volumestokeep = (lambd[0], lambd[1],
                                lambd[2]), list(range(1, 8))
            run["volumestokeep"] = volumestokeep

        run["R"] = R
        run["O"] = O
        run["I"] = I
        run["B"] = B

        for shape_to_test in [O, I, B]:
            for dim in range(3):
                try:
                    assert R[dim] % shape_to_test[dim] == 0
                except Exception as e:
                    print(R, shape_to_test)
                    print(e)

    random.shuffle(case)
    results = list()
    R_prev, I_prev = (0, 0, 0), (0, 0, 0)
    for run in case:
        R, O, I, B, volumestokeep = run["R"], run["O"], run["I"], run[
            "B"], run["volumestokeep"]
        ref = run["ref"]
        print(
            f'Case being processed: (ref: {ref}) {R}, {I}, {O}, {B}, {volumestokeep}'
        )
        filename = f'{R[0]}_{R[1]}_{R[2]}_original.hdf5'
        origarr_filepath = os.path.join(paths["ssd_path"], filename)

        # resplit
        print("processing...")

        flush_cache()
        print(f"cache flushed")

        if args.model == "baseline":
            _monitor = Monitor(enable_print=False,
                               enable_log=False,
                               save_data=True)
            _monitor.disable_clearconsole()
            _monitor.set_delay(15)
            _monitor.start()
            t = time.time()
            tread, twrite, seeks_data = baseline_rechunk(
                indir_path, outdir_path, O, I, R, args.file_format,
                args.addition, args.distributed)
            t = time.time() - t
            _monitor.stop()
            piles = _monitor.get_mem_piles()
            max_voxels = 0
            print(f"Processing time: {t}")
            print(f"Read time: {tread}")
            print(f"Write time: {twrite}")
            tpp = 0
            voxel_tracker = None
        elif args.model == "keep":
            print(f"Running keep...")
            t = time.time()
            tpp, tread, twrite, seeks_data, voxel_tracker, piles = keep_algorithm(
                R, O, I, B, volumestokeep, args.file_format, outdir_path,
                indir_path, args.addition, args.distributed)
            t = time.time() - t - tpp
            max_voxels = voxel_tracker.get_max()
            print(f"Processing time: {t}")
            print(f"Read time: {tread}")
            print(f"Write time: {twrite}")
        elif args.model == "clustered":
            tpp = 0
            m = args.clustered_mem * 1000000000  # one GIG

            _monitor = Monitor(enable_print=False,
                               enable_log=False,
                               save_data=True)
            _monitor.disable_clearconsole()
            _monitor.set_delay(15)
            _monitor.start()
            t = time.time()
            tread, twrite, seeks_data = clustered_reads(
                outdir_path, R, I, bpv, m, args.file_format, indir_path)
            t = time.time() - t - tpp
            _monitor.stop()
            piles = _monitor.get_mem_piles()

            voxel_tracker = None
            max_voxels = 0

            print(f"Processing time: {t}")
            print(f"Read time: {tread}")
            print(f"Write time: {twrite}")
        else:
            raise ValueError("Bad model name")

        # verify and clean output
        print("verifying results....")
        if args.verify:
            split_merge = False
            if args.case_name == "case 3":
                split_merge = True
            success = verify_results(outdir_path, origarr_filepath, R, O,
                                     args.file_format, args.addition,
                                     split_merge)
        else:
            success = True
        print("successful run: ", success)

        results.append([
            args.case_name, run["ref"], args.model, t, tpp, tread, twrite,
            seeks_data[0], seeks_data[1], seeks_data[2], seeks_data[3],
            max_voxels, success
        ])
        create_empty_dir(outdir_path)
        R_prev, I_prev = R, I

        write_memory_pile(piles[0], piles[1], run["ref"], args)
        if voxel_tracker != None:
            write_voxel_history(voxel_tracker, run["ref"], args)

    return results