Example #1
0
def delete_analysis(base_path):
    """
    Delete distributed analysis sets from a FileHandler.

    Parameters
    ----------
    base_path : str or pathlib.Path
        Base path of FileHandler output

    Notes
    -----
    This function is parallelized over sets, and so can be effectively
    parallelized up to the number of distributed sets.

    """
    set_path = pathlib.Path(base_path)
    logger.info("Deleting files from {}".format(base_path))

    set_paths = get_assigned_sets(base_path, distributed=True)
    for set_path in set_paths:
        set_path = pathlib.Path(set_path)
        logger.info("Deleting set {}".format(set_path))

        set_stem = set_path.stem
        proc_paths = set_path.glob("{}_p*.h5".format(set_stem))
        proc_paths = natural_sort(proc_paths)

        for proc_path in proc_paths:
            proc_path.unlink()
        set_path.rmdir()
Example #2
0
def visit_writes(set_paths, function, **kw):
    """
    Apply function to writes from a list of analysis sets.

    Parameters
    ----------
    set_paths : list of str or pathlib.Path
        List of set paths
    function : function(set_path, start, count, **kw)
        A function on an HDF5 file, start index, and count.

    Other keyword arguments are passed on to `function`

    Notes
    -----
    This function is parallelized over writes, and so can be effectively
    parallelized up to the number of writes from all specified sets.

    """
    set_paths = natural_sort(str(sp) for sp in set_paths)
    arg_list = zip(set_paths, *get_assigned_writes(set_paths))
    for set_path, start, count in arg_list:
        if count:
            logger.info("Visiting set {} (start: {}, end: {})".format(
                set_path, start, start + count))
            function(set_path, start, count, **kw)
Example #3
0
def merge_distributed_set(set_path, cleanup=False):
    """
    Merge a distributed analysis set from a FileHandler.

    Parameters
    ----------
    set_path : str of pathlib.Path
        Path to distributed analysis set folder
    cleanup : bool, optional
        Delete distributed files after merging (default: False)

    """
    set_path = pathlib.Path(set_path)
    logger.info("Merging set {}".format(set_path))

    set_stem = set_path.stem
    proc_paths = set_path.glob("{}_p*.h5".format(set_stem))
    proc_paths = natural_sort(proc_paths)
    joint_path = set_path.parent.joinpath("{}.h5".format(set_stem))

    # Create joint file, overwriting if it already exists
    with h5py.File(str(joint_path), mode='w') as joint_file:
        # Setup joint file based on first process file (arbitrary)
        merge_setup(joint_file, proc_paths[0])
        # Merge data from all process files
        for proc_path in proc_paths:
            merge_data(joint_file, proc_path)
    # Cleanup after completed merge, if directed
    if cleanup:
        for proc_path in proc_paths:
            proc_path.unlink()
        set_path.rmdir()
Example #4
0
def get_all_writes(set_paths):
    """
    Get write numbers from a list of analysis sets.

    Parameters
    ----------
    set_paths : list of str or pathlib.Path
        List of set paths

    """
    set_paths = natural_sort(str(sp) for sp in set_paths)
    writes = []
    for set_path in set_paths:
        with h5py.File(str(set_path), mode='r') as file:
            writes.append(file.attrs['writes'])
    return writes
Example #5
0
def get_all_sets(base_path, distributed=False, wrap=False):
    """
    Divide analysis sets from a FileHandler between MPI processes.

    Parameters
    ----------
    base_path : str or pathlib.Path
        Base path of FileHandler output
    distributed : bool, optional
        Divide distributed sets instead of merged sets (default: False)
    wrap : bool, optional


    """
    base_path = pathlib.Path(base_path)
    base_stem = base_path.stem

    if distributed:
        set_paths = base_path.glob("{}_*".format(base_stem))
        set_paths = filter(lambda path: path.is_dir(), set_paths)
    else:
        set_paths = base_path.glob("{}_*.h5".format(base_stem))
    set_paths = natural_sort(set_paths)
    return set_paths
Example #6
0
def get_assigned_writes(set_paths):
    """
    Divide writes from a list of analysis sets between MPI processes.

    Parameters
    ----------
    set_paths : list of str or pathlib.Path
        List of set paths

    """
    set_paths = natural_sort(str(sp) for sp in set_paths)
    # Distribute all writes in blocks
    writes = get_all_writes(set_paths)
    block = int(np.ceil(sum(writes) / MPI_SIZE))
    proc_start = MPI_RANK * block
    # Find set start/end indices
    writes = np.array(writes)
    set_ends = np.cumsum(writes)
    set_starts = set_ends - writes
    # Find proc start indices and counts for each set
    starts = np.clip(proc_start, a_min=set_starts, a_max=set_ends)
    counts = np.clip(proc_start + block, a_min=set_starts,
                     a_max=set_ends) - starts
    return starts - set_starts, counts
Example #7
0
        data_dir += '/'
        output_path = pathlib.Path(data_dir).absolute()

    if args['--datacube']:
        datacube_filename = args['--datacube']
    else:
        datacube_filename = data_dir + 'time_lat_datacube.h5'

    start_time = time.time()
    if args['--case']:
        file_glob = args['--case'] + '/slices/slices_s*.h5'
        files = glob.glob(file_glob)
    else:
        files = args['<files>']
    from dedalus.tools.general import natural_sort
    files = natural_sort(files)

    data, times, theta, r = read_data(files)

    if MPI.COMM_WORLD.rank == 0:
        f_cube = h5py.File(datacube_filename, 'w')
        f_cube['scales/r'] = r
        f_cube['scales/theta'] = theta
        f_cube['scales/sim_time'] = times
        for task in data:
            f_cube['tasks/{:s}'.format(task)] = data[task]
        f_cube.close()
        end_time = time.time()
        logger.info("time to build datacube {:g}sec".format(end_time -
                                                            start_time))
Example #8
0
    for task in tasks:
        logger.info(task)
        freq, time, spectrum_freq = frequency_spectrum(task, files[start:end])
        task_group.create_dataset(name=task, data=spectrum_freq)

    scale_group.create_dataset(name='f', data=freq)
    scale_group.create_dataset(name='t', data=time)

    spectra_f.close()


if __name__ == "__main__":

    args = docopt(__doc__)

    files = natural_sort(args['<files>'])

    if args['--start'] == None: start = 0
    else: start = int(args['--start'])

    if args['--end'] == None: end = -1
    else: end = int(args['--end'])

    if args['--output'] is None:
        file = files[0]
        index_under = file.rfind('_')
        new_file = file[:index_under] + '_freq.h5'
    else:
        new_file = args['--output']

    calculate_spectrum(files, start, end, new_file)
Example #9
0
    rank = MPI.COMM_WORLD.rank
    size = MPI.COMM_WORLD.size

    dir = sys.argv[1]

    if len(sys.argv)>2:
      num_start = sys.argv[2]
    else:
      num_start = 1

    if len(sys.argv)>3:
      num_end = sys.argv[3]
    else:
      # find number of files
      base_path = pathlib.Path(sys.argv[1]).absolute()
      folder_paths = base_path.glob('%s_f*' %base_path.stem)
      folder_paths = general.natural_sort(folder_paths)
      num_end = len(folder_paths) + (num_start-1)


    for i in range(int(num_start)+rank,int(num_end)+1,size):
      filename = dir+"/"+dir+"_s"+str(i)+".h5"
      print(filename)
      visit(filename)
#      proc = multiprocessing.Process(target=visit, args=(filename,main))
#      proc.daemon=True
#      proc.start()
#      proc.join()