def delete_analysis(base_path): """ Delete distributed analysis sets from a FileHandler. Parameters ---------- base_path : str or pathlib.Path Base path of FileHandler output Notes ----- This function is parallelized over sets, and so can be effectively parallelized up to the number of distributed sets. """ set_path = pathlib.Path(base_path) logger.info("Deleting files from {}".format(base_path)) set_paths = get_assigned_sets(base_path, distributed=True) for set_path in set_paths: set_path = pathlib.Path(set_path) logger.info("Deleting set {}".format(set_path)) set_stem = set_path.stem proc_paths = set_path.glob("{}_p*.h5".format(set_stem)) proc_paths = natural_sort(proc_paths) for proc_path in proc_paths: proc_path.unlink() set_path.rmdir()
def visit_writes(set_paths, function, **kw): """ Apply function to writes from a list of analysis sets. Parameters ---------- set_paths : list of str or pathlib.Path List of set paths function : function(set_path, start, count, **kw) A function on an HDF5 file, start index, and count. Other keyword arguments are passed on to `function` Notes ----- This function is parallelized over writes, and so can be effectively parallelized up to the number of writes from all specified sets. """ set_paths = natural_sort(str(sp) for sp in set_paths) arg_list = zip(set_paths, *get_assigned_writes(set_paths)) for set_path, start, count in arg_list: if count: logger.info("Visiting set {} (start: {}, end: {})".format( set_path, start, start + count)) function(set_path, start, count, **kw)
def merge_distributed_set(set_path, cleanup=False): """ Merge a distributed analysis set from a FileHandler. Parameters ---------- set_path : str of pathlib.Path Path to distributed analysis set folder cleanup : bool, optional Delete distributed files after merging (default: False) """ set_path = pathlib.Path(set_path) logger.info("Merging set {}".format(set_path)) set_stem = set_path.stem proc_paths = set_path.glob("{}_p*.h5".format(set_stem)) proc_paths = natural_sort(proc_paths) joint_path = set_path.parent.joinpath("{}.h5".format(set_stem)) # Create joint file, overwriting if it already exists with h5py.File(str(joint_path), mode='w') as joint_file: # Setup joint file based on first process file (arbitrary) merge_setup(joint_file, proc_paths[0]) # Merge data from all process files for proc_path in proc_paths: merge_data(joint_file, proc_path) # Cleanup after completed merge, if directed if cleanup: for proc_path in proc_paths: proc_path.unlink() set_path.rmdir()
def get_all_writes(set_paths): """ Get write numbers from a list of analysis sets. Parameters ---------- set_paths : list of str or pathlib.Path List of set paths """ set_paths = natural_sort(str(sp) for sp in set_paths) writes = [] for set_path in set_paths: with h5py.File(str(set_path), mode='r') as file: writes.append(file.attrs['writes']) return writes
def get_all_sets(base_path, distributed=False, wrap=False): """ Divide analysis sets from a FileHandler between MPI processes. Parameters ---------- base_path : str or pathlib.Path Base path of FileHandler output distributed : bool, optional Divide distributed sets instead of merged sets (default: False) wrap : bool, optional """ base_path = pathlib.Path(base_path) base_stem = base_path.stem if distributed: set_paths = base_path.glob("{}_*".format(base_stem)) set_paths = filter(lambda path: path.is_dir(), set_paths) else: set_paths = base_path.glob("{}_*.h5".format(base_stem)) set_paths = natural_sort(set_paths) return set_paths
def get_assigned_writes(set_paths): """ Divide writes from a list of analysis sets between MPI processes. Parameters ---------- set_paths : list of str or pathlib.Path List of set paths """ set_paths = natural_sort(str(sp) for sp in set_paths) # Distribute all writes in blocks writes = get_all_writes(set_paths) block = int(np.ceil(sum(writes) / MPI_SIZE)) proc_start = MPI_RANK * block # Find set start/end indices writes = np.array(writes) set_ends = np.cumsum(writes) set_starts = set_ends - writes # Find proc start indices and counts for each set starts = np.clip(proc_start, a_min=set_starts, a_max=set_ends) counts = np.clip(proc_start + block, a_min=set_starts, a_max=set_ends) - starts return starts - set_starts, counts
data_dir += '/' output_path = pathlib.Path(data_dir).absolute() if args['--datacube']: datacube_filename = args['--datacube'] else: datacube_filename = data_dir + 'time_lat_datacube.h5' start_time = time.time() if args['--case']: file_glob = args['--case'] + '/slices/slices_s*.h5' files = glob.glob(file_glob) else: files = args['<files>'] from dedalus.tools.general import natural_sort files = natural_sort(files) data, times, theta, r = read_data(files) if MPI.COMM_WORLD.rank == 0: f_cube = h5py.File(datacube_filename, 'w') f_cube['scales/r'] = r f_cube['scales/theta'] = theta f_cube['scales/sim_time'] = times for task in data: f_cube['tasks/{:s}'.format(task)] = data[task] f_cube.close() end_time = time.time() logger.info("time to build datacube {:g}sec".format(end_time - start_time))
for task in tasks: logger.info(task) freq, time, spectrum_freq = frequency_spectrum(task, files[start:end]) task_group.create_dataset(name=task, data=spectrum_freq) scale_group.create_dataset(name='f', data=freq) scale_group.create_dataset(name='t', data=time) spectra_f.close() if __name__ == "__main__": args = docopt(__doc__) files = natural_sort(args['<files>']) if args['--start'] == None: start = 0 else: start = int(args['--start']) if args['--end'] == None: end = -1 else: end = int(args['--end']) if args['--output'] is None: file = files[0] index_under = file.rfind('_') new_file = file[:index_under] + '_freq.h5' else: new_file = args['--output'] calculate_spectrum(files, start, end, new_file)
rank = MPI.COMM_WORLD.rank size = MPI.COMM_WORLD.size dir = sys.argv[1] if len(sys.argv)>2: num_start = sys.argv[2] else: num_start = 1 if len(sys.argv)>3: num_end = sys.argv[3] else: # find number of files base_path = pathlib.Path(sys.argv[1]).absolute() folder_paths = base_path.glob('%s_f*' %base_path.stem) folder_paths = general.natural_sort(folder_paths) num_end = len(folder_paths) + (num_start-1) for i in range(int(num_start)+rank,int(num_end)+1,size): filename = dir+"/"+dir+"_s"+str(i)+".h5" print(filename) visit(filename) # proc = multiprocessing.Process(target=visit, args=(filename,main)) # proc.daemon=True # proc.start() # proc.join()