def write_h5_averages( aver, file_name="xy", datadir="data/averages", nt=None, precision="d", indx=None, trange=None, quiet=True, append=False, procdim=None, dim=None, aver_by_proc=False, proc=-1, driver=None, comm=None, rank=0, size=1, overwrite=False, nproc=1, ): """ Write an hdf5 format averages dataset given as an Averages object. We assume by default that a run simulation directory has already been constructed and start completed successfully in h5 format so that files dim, grid and param files are already present. If not the contents of these will need to be supplied as dictionaries along with persist if included. call signature: write_h5_averages(aver, file_name='xy', datadir='data/averages', precision='d', indx=None, trange=None, quiet=True) Keyword arguments: *aver*: Averages object. Must be of shape [n_vars, n1] for averages across 'xy', 'xz' or 'yz'. Must be of shape [n_vars, n1, n2] for averages across 'y', 'z'. *file_name*: Name of the snapshot file to be written, e.g. 'xy', 'xz', 'yz', 'y', 'z'. *datadir*: Directory where the data is stored. *precision*: Single 'f' or double 'd' precision. *indx* Restrict iterative range to be written. *trange*: Restrict time range to be written. *append* For large binary files the data may need to be appended iteratively. *dim* Dim object required if the large binary files are supplied in chunks. """ import numpy as np import os from os.path import join, exists from pencil import read from pencil.io import open_h5, group_h5, dataset_h5 from pencil import is_sim_dir # test if simulation directory if not is_sim_dir(): print("ERROR: Directory needs to be a simulation") sys.stdout.flush() return -1 if not exists(datadir): try: os.mkdir(datadir) except FileExistsError: pass # open file for writing data filename = join(datadir, file_name + ".h5") if append: state = "a" else: state = "w" if not quiet: print("rank", rank, "saving " + filename) sys.stdout.flush() if not (file_name == "y" or file_name == "z"): aver_by_proc = False if aver_by_proc: n1, n2 = None, None if not dim: dim = read.dim() if not procdim: procdim = read.dim(proc=proc) if file_name == "y": nproc = dim.nprocz n1 = dim.nz nn = procdim.nz if file_name == "z": nproc = dim.nprocy n1 = dim.ny nn = procdim.ny n2 = dim.nx # number of iterations to record if not nt: nt = aver.t.shape[0] with open_h5(filename, state, driver=driver, comm=comm, overwrite=overwrite, rank=rank) as ds: if indx: if isinstance(indx, list): indx = indx else: indx = [indx] else: indx = list(range(0, nt)) if not quiet: print("rank", rank, "nt", nt, "indx", indx) sys.stdout.flush() dataset_h5( ds, "last", status=state, data=(nt - 1, ), dtype="i", overwrite=overwrite, rank=rank, comm=comm, size=size, ) for it in range(0, nt): group_h5( ds, str(it), status=state, delete=False, overwrite=overwrite, rank=rank, size=size, ) for it in range(0, nt): dataset_h5( ds[str(it)], "time", status=state, shape=(1, ), dtype=precision, overwrite=overwrite, rank=rank, comm=comm, size=size, ) for key in aver.__getattribute__(file_name).__dict__.keys(): data = aver.__getattribute__(file_name).__getattribute__(key) if file_name == "y" or file_name == "z": data = np.swapaxes(data, 1, 2) for it in range(0, nt): if aver_by_proc: dataset_h5( ds[str(it)], key, status=state, shape=(n1, n2), dtype=precision, overwrite=overwrite, rank=rank, comm=comm, size=size, ) else: dataset_h5( ds[str(it)], key, status=state, shape=data[0].shape, dtype=precision, overwrite=overwrite, rank=rank, comm=comm, size=size, ) for it in indx: ds[str(it)]["time"][:] = aver.t[it - indx[0]] for key in aver.__getattribute__(file_name).__dict__.keys(): # key needs to be broadcast as order of keys may vary on each process # causing segmentation fault data = aver.__getattribute__(file_name).__getattribute__(key) if file_name == "y" or file_name == "z": data = np.swapaxes(data, 1, 2) if not quiet: print("writing", key, "on rank", rank) sys.stdout.flush() for it in indx: if aver_by_proc: ds[str(it)][key][proc * nn:(proc + 1) * nn] = data[it - indx[0]] else: ds[str(it)][key][:] = data[it - indx[0]] if not quiet: print(filename + " written on rank {}".format(rank)) sys.stdout.flush()
def write_h5_grid( file_name="grid", datadir="data", precision="d", nghost=3, settings=None, param=None, grid=None, unit=None, quiet=True, driver=None, comm=None, overwrite=False, rank=0, ): """ Write the grid information as hdf5. We assume by default that a run simulation directory has already been constructed, but start has not been executed in h5 format so that binary sim files dim, grid and param files are already present in the sim directory, or provided from an old binary sim source directory as inputs. call signature: write_h5_grid(file_name='grid', datadir='data', precision='d', nghost=3, settings=None, param=None, grid=None, unit=None, quiet=True, driver=None, comm=None) Keyword arguments: *file_name*: Prefix of the file name to be written, 'grid'. *datadir*: Directory where 'grid.h5' is stored. *precision*: Single 'f' or double 'd' precision. *nghost*: Number of ghost zones. *settings*: Optional dictionary of persistent variable. *param*: Optional Param object. *grid*: Optional Pencil Grid object of grid parameters. *unit*: Optional dictionary of simulation units. *quiet*: Option to print output. """ from os.path import join import numpy as np from pencil import read from pencil.io import open_h5, group_h5, dataset_h5 from pencil import is_sim_dir # test if simulation directory if not is_sim_dir(): print("ERROR: Directory needs to be a simulation") sys.stdout.flush() # if settings == None: settings = {} skeys = [ "l1", "l2", "m1", "m2", "n1", "n2", "nx", "ny", "nz", "mx", "my", "mz", "nprocx", "nprocy", "nprocz", "maux", "mglobal", "mvar", "precision", ] dim = read.dim() for key in skeys: settings[key] = dim.__getattribute__(key) settings["precision"] = precision.encode() settings["nghost"] = nghost settings["version"] = np.int32(0) gkeys = [ "x", "y", "z", "Lx", "Ly", "Lz", "dx", "dy", "dz", "dx_1", "dy_1", "dz_1", "dx_tilde", "dy_tilde", "dz_tilde", ] if grid == None: grid = read.grid(quiet=True) else: gd_err = False for key in gkeys: if not key in grid.__dict__.keys(): print("ERROR: key " + key + " missing from grid") sys.stdout.flush() gd_err = True if gd_err: print("ERROR: grid incomplete") sys.stdout.flush() ukeys = [ "length", "velocity", "density", "magnetic", "time", "temperature", "flux", "energy", "mass", "system", ] if param == None: param = read.param(quiet=True) param.__setattr__("unit_mass", param.unit_density * param.unit_length**3) param.__setattr__("unit_energy", param.unit_mass * param.unit_velocity**2) param.__setattr__("unit_time", param.unit_length / param.unit_velocity) param.__setattr__("unit_flux", param.unit_mass / param.unit_time**3) param.unit_system = param.unit_system.encode() # open file for writing data filename = join(datadir, file_name + ".h5") with open_h5(filename, "w", driver=driver, comm=comm, overwrite=overwrite, rank=rank) as ds: # add settings sets_grp = group_h5(ds, "settings", status="w") for key in settings.keys(): if "precision" in key: dataset_h5(sets_grp, key, status="w", data=(settings[key], )) else: dataset_h5(sets_grp, key, status="w", data=(settings[key], )) # add grid grid_grp = group_h5(ds, "grid", status="w") for key in gkeys: dataset_h5(grid_grp, key, status="w", data=(grid.__getattribute__(key))) dataset_h5(grid_grp, "Ox", status="w", data=(param.__getattribute__("xyz0")[0], )) dataset_h5(grid_grp, "Oy", status="w", data=(param.__getattribute__("xyz0")[1], )) dataset_h5(grid_grp, "Oz", status="w", data=(param.__getattribute__("xyz0")[2], )) # add physical units unit_grp = group_h5(ds, "unit", status="w") for key in ukeys: if "system" in key: dataset_h5( unit_grp, key, status="w", data=(param.__getattribute__("unit_" + key), ), ) else: dataset_h5( unit_grp, key, status="w", data=param.__getattribute__("unit_" + key), )
def src2dst_remesh( src, dst, h5in="var.h5", h5out="var.h5", multxyz=[2, 2, 2], fracxyz=[1, 1, 1], srcghost=3, dstghost=3, srcdatadir="data/allprocs", dstdatadir="data/allprocs", dstprecision=[b"D"], lsymmetric=True, quiet=True, check_grid=True, OVERWRITE=False, optionals=True, nmin=32, rename_submit_script=False, MBmin=5.0, ncpus=[1, 1, 1], start_optionals=False, hostfile=None, submit_new=False, chunksize=1000.0, lfs=False, MB=1, count=1, size=1, rank=0, comm=None, ): """ src2dst_remesh(src, dst, h5in='var.h5', h5out='var.h5', multxyz=[2, 2, 2], fracxyz=[1, 1, 1], srcghost=3, dstghost=3, srcdatadir='data/allprocs', dstdatadir='data/allprocs', dstprecision=[b'D'], lsymmetric=True, quiet=True, check_grid=True, OVERWRITE=False, optionals=True, nmin=32, rename_submit_script=False, MBmin=5.0, ncpus=[1, 1, 1], start_optionals=False, hostfile=None, submit_new=False, chunksize=1000.0, lfs=False, MB=1, count=1, size=1, rank=0, comm=None) Parameters ---------- src : string Source relative or absolute path to source simulation. dst : string Destination relative or absolute path to destination simulation. h5in : string Source simulation data file to be copied and remeshed. h5out : string Destination simulation file to be written. multxyz : list Factors by which to multiply old sim dimensions yxz order. fracxyz : list Factors by which to divide old sim dimensions yxz order. srcghost : int Number of ghost zones from the source order of accuracy (mx-nx)/2. dstghost : int Number of ghost zones for the destination order of accuracy (mx-nx)/2. srcdatadir : string Path from source simulation directory to data. dstdatadir : Path from destination simulation directory to data. dstprecision : string Floating point precision settings [b'S'] or [b'D']. lsymmetric : bool Option to make non-periodic grid symmetric about old sim centre. Otherwise the lower boundary is retained from old sim grid. quiet : bool Flag for switching of output. check_grid : bool Flag to run check on grid and cpu layout before executing remesh. OVERWRITE : bool Flag to overwrite existing simulation directory and filesin dst. optionals : bool Copy simulation files with True or specify list of names (string) for additional files from src sim directory. nmin : int Minimum length along coordinate after splitting by proc. rename_submit_script : bool Edit lines in submission files vcopied from src to dst. Not yet operational. MBmin : float Minimum size in MB of data on a sinlge proc pf ncpus total processes. ncpus : ndarray Array of nprocx, nprocy, and nprocz to apply for new simulation. start_optionals : bool Copy simulation files output by start.x with True or specify list of names (string) for additional files from src sim data directory. hostfile : string Specify name of host config file argument in pc_build. Not yet operational. submit_new : bool Execute changes to submission files, compile and run simulation. Not yet operational. chunksize : float Size in megabytes of snapshot variable before chunked remesh is used. lfs : bool Flag to set the striping for large file sizes to imporve IO efficiency. MB : float Size of data to write contiguously before moving to new OST on lustre. count : int Number of OSTs across which the data will be shared for IO operations. size : int Number of MPI processes rank : int ID of processor comm : MPI library calls """ import h5py import os from os.path import join, abspath import time from pencil import read from pencil.io import mkdir from pencil.sim import simulation from pencil.math import cpu_optimal from pencil import is_sim_dir start_time = time.time() print("started at {}".format(time.ctime(start_time))) # set dtype from precision if dstprecision[0] == b"D": dtype = np.float64 elif dstprecision[0] == b"S": dtype = np.float32 else: print("precision " + dstprecision + " not valid") return 1 if is_sim_dir(src): srcsim = simulation(src, quiet=quiet) else: print('src2dst_remesh ERROR: src"' + src + '" is not a valid simulation path') return 1 if is_sim_dir(dst): dstsim = simulation(dst, quiet=quiet) else: dstname = str.split(dst, "/")[-1] dstpath = str.strip(dst, dstname) if len(dstpath) == 0: dstpath = str.strip(srcsim.path, srcsim.name) dstsim = srcsim.copy( path_root=dstpath, name=dstname, quiet=quiet, OVERWRITE=OVERWRITE, optionals=optionals, start_optionals=start_optionals, rename_submit_script=rename_submit_script, ) print("opening src file and dst file on rank{}".format(rank)) with open_h5( join(srcsim.path, srcdatadir, h5in), "r", rank=rank, comm=comm ) as srch5: with open_h5( join(dstsim.path, dstdatadir, h5out), "w", lfs=lfs, MB=MB, count=count, rank=rank, comm=comm, ) as dsth5: # apply settings and grid to dst h5 files get_dstgrid( srch5, srcsim.param, dsth5, ncpus=ncpus, multxyz=multxyz, fracxyz=fracxyz, srcghost=srcghost, dstghost=dstghost, dtype=dtype, lsymmetric=lsymmetric, quiet=quiet, ) print("get_dstgrid completed on rank {}".format(rank)) # use settings to determine available proc dist then set ncpus factors = cpu_optimal( dsth5["settings/nx"][0], dsth5["settings/ny"][0], dsth5["settings/nz"][0], mvar=dsth5["settings/mvar"][0], maux=dsth5["settings/maux"][0], par=srcsim.param, nmin=nmin, MBmin=MBmin, ) print( "remesh check grid: optional cpus upto min grid of" + "nmin={}\n".format(nmin) + "cpu options {}\n".format(factors) + "new mesh: {}, {}, {}\n".format( dsth5["settings/nx"][0], dsth5["settings/ny"][0], dsth5["settings/nz"][0], ) + 'To execute remesh set "check_grid=False".' ) if ncpus == [1, 1, 1]: ncpus = [factors[1][0], factors[1][1], factors[1][2]] dsth5["settings/nprocx"][0] = ncpus[0] dsth5["settings/nprocy"][0] = ncpus[1] dsth5["settings/nprocz"][0] = ncpus[2] nprocs = ncpus[0] * ncpus[1] * ncpus[2] srcprocs = ( srch5["settings/nprocx"][0] * srch5["settings/nprocy"][0] * srch5["settings/nprocz"][0] ) if srcprocs > nprocs: print( "\n**********************************************************\n" + "remesh WARNING: {} procs reduced from {}.\n".format( nprocs, srcprocs ) + "Review multxyz {} and fracxyz {} for more\n".format( multxyz, fracxyz ) + "efficient parallel processing options." + "\n**********************************************************\n" ) if check_grid: return 1 group = group_h5(dsth5, "unit", status="w") for key in srch5["unit"].keys(): if ( type(srch5["unit"][key][()]) == np.float64 or type(srch5["unit"][key][()]) == np.float32 ): dset = dataset_h5( group, key, status="w", data=srch5["unit"][key][()], overwrite=True, dtype=dtype, ) else: dset = dataset_h5( group, key, status="w", data=srch5["unit"][key][()], overwrite=True, ) gridh5 = open_h5(join(dstsim.datadir, "grid.h5"), status="w") dsth5.copy("settings", gridh5) dsth5.copy("grid", gridh5) dsth5.copy("unit", gridh5) gridh5.close() if "persist" in srch5.keys(): group = group_h5(dsth5, "persist", status="w") for key in srch5["persist"].keys(): tmp = np.zeros(nprocs) tmp[:] = srch5["persist"][key][0] if ( type(srch5["persist"][key][()]) == np.float64 or type(srch5["persist"][key][()]) == np.float32 ): dset = dataset_h5( group, key, status="w", data=tmp, overwrite=True, dtype=dtype, ) else: dset = dataset_h5( group, key, status="w", data=tmp, overwrite=True ) dset = dataset_h5( dsth5, "time", status="w", data=srch5["time"][()], dtype=dtype ) nx, ny, nz = ( dsth5["settings"]["nx"][0], dsth5["settings"]["ny"][0], dsth5["settings"]["nz"][0], ) dstchunksize = 8 * nx * ny * nz / 1024 * 1024 lchunks = False if dstchunksize > chunksize: lchunks = True nchunks = cpu_optimal(nx, ny, nz, mvar=1, maux=0, MBmin=chunksize)[1] print("nchunks {}".format(nchunks)) indx = np.array_split(np.arange(nx) + dstghost, nchunks[0]) indy = np.array_split(np.arange(ny) + dstghost, nchunks[1]) indz = np.array_split(np.arange(nz) + dstghost, nchunks[2]) mx, my, mz = ( dsth5["settings"]["mx"][0], dsth5["settings"]["my"][0], dsth5["settings"]["mz"][0], ) if not quiet: print("nx {}, ny {}, nz {}".format(nx, ny, nz)) print("mx {}, my {}, mz {}".format(mx, my, mz)) group = group_h5(dsth5, "data", status="w") for key in srch5["data"].keys(): print("remeshing " + key) if not lchunks: var = local_remesh( srch5["data"][key][()], srch5["grid"]["x"], srch5["grid"]["y"], srch5["grid"]["z"], dsth5["grid"]["x"], dsth5["grid"]["y"], dsth5["grid"]["z"], quiet=quiet, ) print("writing " + key + " shape {}".format(var.shape)) dset = dataset_h5( group, key, status="w", data=var, overwrite=True, dtype=dtype ) else: dset = dataset_h5( group, key, status="w", shape=[mz, my, mx], overwrite=True, dtype=dtype, ) print("writing " + key + " shape {}".format([mz, my, mx])) for iz in range(nchunks[2]): n1, n2 = indz[iz][0] - dstghost, indz[iz][-1] + dstghost srcn1 = np.max( np.where(srch5["grid/z"][()] < dsth5["grid/z"][n1]) ) srcn2 = np.min( np.where(srch5["grid/z"][()] > dsth5["grid/z"][n2]) ) n1out = n1 + dstghost n2out = n2 - dstghost + 1 varn1 = dstghost varn2 = -dstghost if iz == 0: n1out = 0 varn1 = 0 if iz == nchunks[2] - 1: n2out = n2 + 1 varn2 = n2 + 1 if not quiet: print( "n1 {}, n2 {}, srcn1 {}, srcn2 {}".format( n1, n2, srcn1, srcn2 ) ) for iy in range(nchunks[1]): m1, m2 = indy[iy][0] - dstghost, indy[iy][-1] + dstghost srcm1 = np.max( np.where(srch5["grid/y"][()] < dsth5["grid/y"][m1]) ) srcm2 = np.min( np.where(srch5["grid/y"][()] > dsth5["grid/y"][m2]) ) m1out = m1 + dstghost m2out = m2 - dstghost + 1 varm1 = dstghost varm2 = -dstghost if iy == 0: m1out = 0 varm1 = 0 if iy == nchunks[1] - 1: m2out = m2 + 1 varm2 = m2 + 1 if not quiet: print( "m1 {}, m2 {}, srcm1 {}, srcm2 {}".format( m1, m2, srcm1, srcm2 ) ) for ix in range(nchunks[0]): l1, l2 = indx[ix][0] - dstghost, indx[ix][-1] + dstghost srcl1 = np.max( np.where(srch5["grid/x"][()] < dsth5["grid/x"][l1]) ) srcl2 = np.min( np.where(srch5["grid/x"][()] > dsth5["grid/x"][l2]) ) l1out = l1 + dstghost l2out = l2 - dstghost + 1 varl1 = dstghost varl2 = -dstghost if ix == 0: l1out = 0 varl1 = 0 if ix == nchunks[0] - 1: l2out = l2 + 1 varl2 = l2 + 1 if not quiet: print( "l1 {}, l2 {}, srcl1 {}, srcl2 {}".format( l1, l2, srcl1, srcl2 ) ) if not quiet: print( "remeshing " + key + " chunk {}".format([iz, iy, ix]) ) var = local_remesh( srch5["data"][key][ srcn1 : srcn2 + 1, srcm1 : srcm2 + 1, srcl1 : srcl2 + 1, ], srch5["grid"]["x"][srcl1 : srcl2 + 1], srch5["grid"]["y"][srcm1 : srcm2 + 1], srch5["grid"]["z"][srcn1 : srcn2 + 1], dsth5["grid"]["x"][l1 : l2 + 1], dsth5["grid"]["y"][m1 : m2 + 1], dsth5["grid"]["z"][n1 : n2 + 1], quiet=quiet, ) if not quiet: print( "writing " + key + " shape {} chunk {}".format( var.shape, [iz, iy, ix] ) ) dset[n1out:n2out, m1out:m2out, l1out:l2out] = dtype( var[varn1:varn2, varm1:varm2, varl1:varl2] ) dstsim.update() dstsim.change_value_in_file("src/cparam.local", "ncpus", str(nprocs)) dstsim.change_value_in_file("src/cparam.local", "nprocx", str(ncpus[0])) dstsim.change_value_in_file("src/cparam.local", "nprocy", str(ncpus[1])) dstsim.change_value_in_file("src/cparam.local", "nprocz", str(ncpus[2])) dstsim.change_value_in_file("src/cparam.local", "nxgrid", str(dstsim.dim.nxgrid)) # dstsim.change_value_in_file('src/cparam.local','nygrid', # str(dstsim.dim.nygrid)) dstsim.change_value_in_file("src/cparam.local", "nzgrid", str(dstsim.dim.nzgrid)) # cmd = 'source '+join(srcsim.path,'src','.moduleinfo') # os.system(cmd) # os.chdir(dstsim.path) # cmd = 'pc_setupsrc; make cleann' # os.system(cmd) # cmd = 'pc_build' # if hostfile: cmd = cmd + ' -f '+hostfile # process = sub.Popen(cmd.split(),stdout=sub.PIPE) # process = sub.Popen(cmd.split(),stdout=sub.PIPE) # output, error = process.communicate() # print(cmd,output,error) if srcprocs > nprocs: print( "\n**********************************************************\n" + "remesh WARNING: {} procs reduced from {}.\n".format(nprocs, srcprocs) + "Review multxyz {} and fracxyz {} for more\n".format(multxyz, fracxyz) + "efficient parallel processing options." + "\n**********************************************************\n" ) end_time = time.time() print( "end at {} after {} seconds".format(time.ctime(end_time), end_time - start_time) )
def write_h5_snapshot( snapshot, file_name="VAR0", datadir="data/allprocs", precision="d", nghost=3, persist=None, settings=None, param=None, grid=None, lghosts=False, indx=None, proc=None, ipx=None, ipy=None, ipz=None, procdim=None, unit=None, t=None, x=None, y=None, z=None, state="a", quiet=True, lshear=False, driver=None, comm=None, overwrite=False, rank=0, size=1, ): """ Write a snapshot given as numpy array. We assume by default that a run simulation directory has already been constructed and start completed successfully in h5 format so that files dim, grid and param files are already present. If not the contents of these will need to be supplied as dictionaries along with persist if included. call signature: write_h5_snapshot(snapshot, file_name='VAR0', datadir='data/allprocs', precision='d', nghost=3, persist=None, settings=None, param=None, grid=None, lghosts=False, indx=None, unit=None, t=None, x=None, y=None, z=None, procdim=None, quiet=True, lshear=False, driver=None, comm=None) Keyword arguments: *snapshot*: Numpy array containing the snapshot. Must be of shape [nvar, nz, ny, nx] without boundaries or. Must be of shape [nvar, mz, my, mx] with boundaries for lghosts=True. *file_name*: Name of the snapshot file to be written, e.g. VAR0 or var. *datadir*: Directory where the data is stored. *precision*: Single 'f' or double 'd' precision. *persist*: optional dictionary of persistent variable. *settings*: optional dictionary of persistent variable. *param*: optional Param object. *grid*: optional Pencil Grid object of grid parameters. *nghost*: Number of ghost zones. *lghosts*: If True the snapshot contains the ghost zones. *indx* Index object of index for each variable in f-array *unit*: Optional dictionary of simulation units. *quiet*: Option to print output. *t*: Time of the snapshot. *xyz*: xyz arrays of the domain with ghost zones. This will normally be obtained from Grid object, but facility to redefine an alternative grid value. *lshear*: Flag for the shear. *driver* File driver for hdf5 io for use in serial or MPI parallel. *comm* MPI objects supplied if driver is 'mpio'. *overwrite* flag to replace existing h5 snapshot file. *rank* rank of process with root=0. """ import numpy as np from os.path import join from pencil import read from pencil.io import open_h5, group_h5, dataset_h5 from pencil import is_sim_dir # test if simulation directory if not is_sim_dir(): print("ERROR: Directory needs to be a simulation") sys.stdout.flush() if indx == None: indx = read.index() # if settings == None: settings = {} skeys = [ "l1", "l2", "m1", "m2", "n1", "n2", "nx", "ny", "nz", "mx", "my", "mz", "nprocx", "nprocy", "nprocz", "maux", "mglobal", "mvar", "precision", ] dim = read.dim() for key in skeys: settings[key] = dim.__getattribute__(key) settings["precision"] = precision.encode() settings["nghost"] = nghost settings["version"] = np.int32(0) nprocs = settings["nprocx"] * settings["nprocy"] * settings["nprocz"] gkeys = [ "x", "y", "z", "Lx", "Ly", "Lz", "dx", "dy", "dz", "dx_1", "dy_1", "dz_1", "dx_tilde", "dy_tilde", "dz_tilde", ] if grid == None: grid = read.grid(quiet=True) else: gd_err = False for key in gkeys: if not key in grid.__dict__.keys(): print("ERROR: key " + key + " missing from grid") sys.stdout.flush() gd_err = True if gd_err: print("ERROR: grid incomplete") sys.stdout.flush() ukeys = [ "length", "velocity", "density", "magnetic", "time", "temperature", "flux", "energy", "mass", "system", ] if param == None: param = read.param(quiet=True) param.__setattr__("unit_mass", param.unit_density * param.unit_length**3) param.__setattr__("unit_energy", param.unit_mass * param.unit_velocity**2) param.__setattr__("unit_time", param.unit_length / param.unit_velocity) param.__setattr__("unit_flux", param.unit_mass / param.unit_time**3) param.unit_system = param.unit_system.encode() # check whether the snapshot matches the simulation shape if lghosts: try: snapshot.shape[0] == settings["mvar"] snapshot.shape[1] == settings["mx"] snapshot.shape[2] == settings["my"] snapshot.shape[3] == settings["mz"] except ValueError: print("ERROR: snapshot shape {} ".format(snapshot.shape) + "does not match simulation dimensions with ghosts.") sys.stdout.flush() else: try: snapshot.shape[0] == settings["mvar"] snapshot.shape[1] == settings["nx"] snapshot.shape[2] == settings["ny"] snapshot.shape[3] == settings["nz"] except ValueError: print("ERROR: snapshot shape {} ".format(snapshot.shape) + "does not match simulation dimensions without ghosts.") sys.stdout.flush() # Determine the precision used and ensure snapshot has correct data_type. if precision == "f": data_type = np.float32 snapshot = np.float32(snapshot) elif precision == "d": data_type = np.float64 snapshot = np.float64(snapshot) else: print("ERROR: Precision {0} not understood.".format(precision) + " Must be either 'f' or 'd'") sys.stdout.flush() return -1 # Check that the shape does not conflict with the proc numbers. if ((settings["nx"] % settings["nprocx"] > 0) or (settings["ny"] % settings["nprocy"] > 0) or (settings["nz"] % settings["nprocz"] > 0)): print("ERROR: Shape of the input array is not compatible with the " + "cpu layout. Make sure that nproci devides ni.") sys.stdout.flush() return -1 # Check the shape of the xyz arrays if specified and overwrite grid values. if x != None: if len(x) != settings["mx"]: print("ERROR: x array is incompatible with the shape of snapshot.") sys.stdout.flush() return -1 grid.x = data_type(x) if y != None: if len(y) != settings["my"]: print("ERROR: y array is incompatible with the shape of snapshot.") sys.stdout.flush() return -1 grid.y = data_type(y) if z != None: if len(z) != settings["mz"]: print("ERROR: z array is incompatible with the shape of snapshot.") sys.stdout.flush() return -1 grid.z = data_type(z) # Define a time. if t is None: t = data_type(0.0) # making use of pc_hdf5 functionality: if not proc == None: state = "a" else: state = "w" filename = join(datadir, file_name) print("write_h5_snapshot: filename =", filename) with open_h5( filename, state, driver=driver, comm=comm, overwrite=overwrite, rank=rank, size=size, ) as ds: data_grp = group_h5( ds, "data", status=state, delete=False, overwrite=overwrite, rank=rank, size=size, ) if not procdim: for key in indx.__dict__.keys(): if key in ["uu", "keys", "aa", "KR_Frad", "uun", "gg", "bb"]: continue #create ghost zones if required if not lghosts: tmp_arr = np.zeros([ snapshot.shape[1] + 2 * nghost, snapshot.shape[2] + 2 * nghost, snapshot.shape[3] + 2 * nghost, ]) tmp_arr[dim.n1:dim.n2 + 1, dim.m1:dim.m2 + 1, dim.l1:dim.l2 + 1] = np.array( snapshot[indx.__getattribute__(key) - 1]) dataset_h5( data_grp, key, status=state, data=tmp_arr, dtype=data_type, overwrite=overwrite, rank=rank, comm=comm, size=size, ) else: dataset_h5( data_grp, key, status=state, data=np.array(snapshot[indx.__getattribute__(key) - 1]), dtype=data_type, overwrite=overwrite, rank=rank, comm=comm, size=size, ) else: for key in indx.__dict__.keys(): if key in ["uu", "keys", "aa", "KR_Frad", "uun", "gg", "bb"]: continue dataset_h5( data_grp, key, status=state, shape=(settings["mz"], settings["my"], settings["mx"]), dtype=data_type, rank=rank, comm=comm, size=size, ) # adjust indices to include ghost zones at boundaries l1, m1, n1 = procdim.l1, procdim.m1, procdim.n1 if procdim.ipx == 0: l1 = 0 if procdim.ipy == 0: m1 = 0 if procdim.ipz == 0: n1 = 0 l2, m2, n2 = procdim.l2, procdim.m2, procdim.n2 if procdim.ipx == settings["nprocx"] - 1: l2 = procdim.l2 + settings["nghost"] if procdim.ipy == settings["nprocy"] - 1: m2 = procdim.m2 + settings["nghost"] if procdim.ipz == settings["nprocz"] - 1: n2 = procdim.n2 + settings["nghost"] nx, ny, nz = procdim.nx, procdim.ny, procdim.nz ipx, ipy, ipz = procdim.ipx, procdim.ipy, procdim.ipz for key in indx.__dict__.keys(): if key in ["uu", "keys", "aa", "KR_Frad", "uun", "gg", "bb"]: continue tmp_arr = np.array(snapshot[indx.__getattribute__(key) - 1]) data_grp[key][n1 + ipz * nz:n2 + ipz * nz + 1, m1 + ipy * ny:m2 + ipy * ny + 1, l1 + ipx * nx:l2 + ipx * nx + 1, ] = tmp_arr[n1:n2 + 1, m1:m2 + 1, l1:l2 + 1] dataset_h5( ds, "time", status=state, data=np.array(t), size=size, dtype=data_type, rank=rank, comm=comm, overwrite=overwrite, ) # add settings sets_grp = group_h5( ds, "settings", status=state, delete=False, overwrite=overwrite, rank=rank, size=size, ) for key in settings.keys(): if "precision" in key: dataset_h5( sets_grp, key, status=state, data=(settings[key], ), dtype=None, rank=rank, comm=comm, size=size, overwrite=overwrite, ) else: dataset_h5( sets_grp, key, status=state, data=(settings[key], ), dtype=data_type, rank=rank, comm=comm, size=size, overwrite=overwrite, ) # add grid grid_grp = group_h5( ds, "grid", status=state, delete=False, overwrite=overwrite, rank=rank, size=size, ) for key in gkeys: dataset_h5( grid_grp, key, status=state, data=(grid.__getattribute__(key)), dtype=data_type, rank=rank, comm=comm, size=size, overwrite=overwrite, ) dataset_h5( grid_grp, "Ox", status=state, data=(param.__getattribute__("xyz0")[0], ), dtype=data_type, rank=rank, comm=comm, size=size, overwrite=overwrite, ) dataset_h5( grid_grp, "Oy", status=state, data=(param.__getattribute__("xyz0")[1], ), dtype=data_type, rank=rank, comm=comm, size=size, overwrite=overwrite, ) dataset_h5( grid_grp, "Oz", status=state, data=(param.__getattribute__("xyz0")[2], ), dtype=data_type, rank=rank, comm=comm, size=size, overwrite=overwrite, ) # add physical units unit_grp = group_h5( ds, "unit", status=state, delete=False, overwrite=overwrite, rank=rank, size=size, ) for key in ukeys: if "system" in key: dataset_h5( unit_grp, key, status=state, data=(param.__getattribute__("unit_" + key), ), rank=rank, comm=comm, size=size, overwrite=overwrite, ) else: dataset_h5( unit_grp, key, status=state, data=param.__getattribute__("unit_" + key), rank=rank, comm=comm, size=size, overwrite=overwrite, ) # add optional persistent data if persist != None: pers_grp = group_h5( ds, "persist", status=state, size=size, delete=False, overwrite=overwrite, rank=rank, ) for key in persist.keys(): if not quiet: print(key, type(persist[key][()])) sys.stdout.flush() arr = np.empty(nprocs, dtype=type(persist[key][()])) arr[:] = persist[key][()] dataset_h5( pers_grp, key, status=state, data=(arr), size=size, dtype=data_type, rank=rank, comm=comm, overwrite=overwrite, )
def src2dst_remesh(src, dst, h5in='var.h5', h5out='var.h5', multxyz=[2,2,2], fracxyz=[1,1,1], srcghost=3, dstghost=3, srcdatadir='data/allprocs', dstdatadir='data/allprocs', dstprecision=[b'D'], lsymmetric=True, quiet=True, check_grid=True, OVERWRITE=False, optionals=True, nmin=32, rename_submit_script=False, MBmin=5.0, ncpus=[1,1,1], start_optionals=False, hostfile=None, submit_new=False, chunksize=1000.0, lfs=False, MB=1, count=1, size=1, rank=0, comm=None ): """ Call signature: src2dst_remesh(src, dst, h5in='var.h5', h5out='var.h5', multxyz=[2,2,2], fracxyz=[1,1,1], srcghost=3, dstghost=3, srcdatadir='data/allprocs', dstdatadir='data/allprocs', dstprecision=[b'D'], lsymmetric=True, quiet=True, check_grid=True, OVERWRITE=False, optionals=True, nmin=32, rename_submit_script=False, MBmin=5.0, ncpus=[1,1,1], start_optionals=False, hostfile=None, submit_new=False) Keyword arguments: *src*: string relative or absolute path to source simulation. *dst*: string relative or absolute path to destination simulation. *h5in*: source simulation data file to be copied and remeshed. *h5out*: destination simulation file to be written. *multxyz*: factors by which to multiply old sim dimensions yxz order. *fracxyz*: factors by which to divide old sim dimensions yxz order. *srcghost*: Number of ghost zones from the source order of accuracy (mx-nx)/2 *dstghost*: Number of ghost zones for the destination order of accuracy (mx-nx)/2 *srcdatadir*: path from source simulation directory to data. *dstdatadir*: path from destination simulation directory to data. *dstprecision*: floating point precision settings [b'S'] or [b'D']. *lsymmetric*: Option to make non-periodic grid symmetric about old sim centre. Otherwise the lower boundary is retained from old sim grid. *quiet*: Flag for switching of output. *check_grid*: Flag to run check on grid and cpu layout before executing remesh. *OVERWRITE*: Flag to overwrite existing simulation directory and filesin dst. *optionals*: Copy simulation files with True or specify list of names (string) for additional files from src sim directory. *nmin*: Minimum length along coordinate after splitting by proc. *rename_submit_script: Edit lines in submission files vcopied from src to dst. Not yet operational. *MBmin*: Minimum size in MB of data on a sinlge proc pf ncpus total processes. *ncpus*: array of nprocx, nprocy, and nprocz to apply for new simulation. *start_optionals* Copy simulation files output by start.x with True or specify list of names (string) for additional files from src sim data directory. *hostfile: Specify name of host config file argument in pc_build. Not yet operational. *submit_new*: Execute changes to submission files, compile and run simulation. Not yet operational. *chunksize*: Size in megabytes of snapshot variable before chunked remesh is used. *lfs*: Flag to set the striping for large file sizes to imporve IO efficiency. *MB*: Size of data to write contiguously before moving to new OST on lustre. *count*: Number of OSTs across which the data will be shared for IO operations. *comm*: MPI library calls *rank*: Integer ID of processor *size*: Number of MPI processes """ import h5py import os from os.path import join, abspath import time from pencil import read from pencil.io import mkdir from pencil.sim import simulation from pencil.math import cpu_optimal from pencil import is_sim_dir start_time = time.time() print('started at {}'.format(time.ctime(start_time))) # set dtype from precision if dstprecision[0] == b'D': dtype = np.float64 elif dstprecision[0] == b'S': dtype = np.float32 else: print('precision '+dstprecision+' not valid') return 1 if is_sim_dir(src): srcsim = simulation(src,quiet=quiet) else: print('src2dst_remesh ERROR: src"'+src+ '" is not a valid simulation path') return 1 if is_sim_dir(dst): dstsim = simulation(dst,quiet=quiet) else: dstname = str.split(dst,'/')[-1] dstpath = str.strip(dst,dstname) if len(dstpath) == 0: dstpath = str.strip(srcsim.path,srcsim.name) dstsim = srcsim.copy(path_root=dstpath, name=dstname, quiet=quiet, OVERWRITE=OVERWRITE, optionals=optionals, start_optionals=start_optionals, rename_submit_script=rename_submit_script) print('opening src file and dst file on rank{}'.format(rank)) with open_h5(join(srcsim.path,srcdatadir,h5in),'r',rank=rank,comm=comm) as srch5: with open_h5(join(dstsim.path,dstdatadir,h5out),'w',lfs=lfs,MB=MB,count=count,rank=rank,comm=comm) as dsth5: #apply settings and grid to dst h5 files get_dstgrid(srch5, srcsim.param, dsth5, ncpus=ncpus, multxyz=multxyz, fracxyz=fracxyz, srcghost=srcghost, dstghost=dstghost, dtype=dtype, lsymmetric=lsymmetric, quiet=quiet) print('get_dstgrid completed on rank {}'.format(rank)) #use settings to determine available proc dist then set ncpus factors = cpu_optimal( dsth5['settings/nx'][0], dsth5['settings/ny'][0], dsth5['settings/nz'][0], mvar=dsth5['settings/mvar'][0], maux=dsth5['settings/maux'][0], par=srcsim.param, nmin=nmin, MBmin=MBmin) print('remesh check grid: optional cpus upto min grid of'+ 'nmin={}\n'.format(nmin)+ 'cpu options {}\n'.format(factors)+ 'new mesh: {}, {}, {}\n'.format(dsth5['settings/nx'][0], dsth5['settings/ny'][0], dsth5['settings/nz'][0])+ 'To execute remesh set "check_grid=False".') if ncpus == [1,1,1]: ncpus = [factors[1][0],factors[1][1],factors[1][2]] dsth5['settings/nprocx'][0] = ncpus[0] dsth5['settings/nprocy'][0] = ncpus[1] dsth5['settings/nprocz'][0] = ncpus[2] nprocs = ncpus[0]*ncpus[1]*ncpus[2] srcprocs = srch5['settings/nprocx'][0]*\ srch5['settings/nprocy'][0]*\ srch5['settings/nprocz'][0] if srcprocs > nprocs: print( '\n**********************************************************\n'+ 'remesh WARNING: {} procs reduced from {}.\n'.format( nprocs, srcprocs)+ 'Review multxyz {} and fracxyz {} for more\n'.format( multxyz,fracxyz)+ 'efficient parallel processing options.'+ '\n**********************************************************\n') if check_grid: return 1 group = group_h5(dsth5, 'unit', status='w') for key in srch5['unit'].keys(): if type(srch5['unit'][key][()]) == np.float64 or\ type(srch5['unit'][key][()]) == np.float32: dset = dataset_h5(group, key, status='w', data=srch5['unit'][key][()], overwrite=True, dtype=dtype) else: dset = dataset_h5(group, key, status='w', data=srch5['unit'][key][()], overwrite=True) gridh5 = open_h5(join(dstsim.datadir,'grid.h5'), status='w') dsth5.copy('settings', gridh5) dsth5.copy('grid', gridh5) dsth5.copy('unit', gridh5) gridh5.close() if 'persist' in srch5.keys(): group = group_h5(dsth5, 'persist', status='w') for key in srch5['persist'].keys(): tmp = np.zeros(nprocs) tmp[:] = srch5['persist'][key][0] if type(srch5['persist'][key][()]) == np.float64 or\ type(srch5['persist'][key][()]) == np.float32: dset = dataset_h5(group, key, status='w', data=tmp, overwrite=True, dtype=dtype) else: dset = dataset_h5(group, key, status='w', data=tmp, overwrite=True) dset = dataset_h5(dsth5, 'time', status='w', data=srch5['time'][()], dtype=dtype) nx, ny, nz = dsth5['settings']['nx'][0],\ dsth5['settings']['ny'][0],\ dsth5['settings']['nz'][0] dstchunksize = 8*nx*ny*nz/1024*1024 lchunks = False if dstchunksize > chunksize: lchunks = True nchunks = cpu_optimal(nx,ny,nz,mvar=1,maux=0,MBmin=chunksize)[1] print('nchunks {}'.format(nchunks)) indx = np.array_split(np.arange(nx)+dstghost,nchunks[0]) indy = np.array_split(np.arange(ny)+dstghost,nchunks[1]) indz = np.array_split(np.arange(nz)+dstghost,nchunks[2]) mx, my, mz = dsth5['settings']['mx'][0],\ dsth5['settings']['my'][0],\ dsth5['settings']['mz'][0] if not quiet: print('nx {}, ny {}, nz {}'.format(nx, ny, nz)) print('mx {}, my {}, mz {}'.format(mx, my, mz)) group = group_h5(dsth5, 'data', status='w') for key in srch5['data'].keys(): print('remeshing '+key) if not lchunks: var = local_remesh(srch5['data'][key][()], srch5['grid']['x'],srch5['grid']['y'], srch5['grid']['z'],dsth5['grid']['x'], dsth5['grid']['y'], dsth5['grid']['z'], quiet=quiet) print('writing '+key+' shape {}'.format(var.shape)) dset = dataset_h5(group, key, status='w', data=var, overwrite=True, dtype=dtype) else: dset = dataset_h5(group, key, status='w', shape=[mz,my,mx], overwrite=True, dtype=dtype) print('writing '+key+' shape {}'.format([mz,my,mx])) for iz in range(nchunks[2]): n1, n2 = indz[iz][ 0]-dstghost,\ indz[iz][-1]+dstghost srcn1 = np.max(np.where(srch5['grid/z'][()]< dsth5['grid/z'][n1])) srcn2 = np.min(np.where(srch5['grid/z'][()]> dsth5['grid/z'][n2])) n1out = n1+dstghost n2out = n2-dstghost+1 varn1 = dstghost varn2 = -dstghost if iz == 0: n1out = 0 varn1 = 0 if iz == nchunks[2]-1: n2out = n2+1 varn2 = n2+1 if not quiet: print('n1 {}, n2 {}, srcn1 {}, srcn2 {}'.format( n1, n2, srcn1, srcn2)) for iy in range(nchunks[1]): m1, m2 = indy[iy][ 0]-dstghost,\ indy[iy][-1]+dstghost srcm1 = np.max(np.where(srch5['grid/y'][()]< dsth5['grid/y'][m1])) srcm2 = np.min(np.where(srch5['grid/y'][()]> dsth5['grid/y'][m2])) m1out = m1+dstghost m2out = m2-dstghost+1 varm1 = dstghost varm2 = -dstghost if iy == 0: m1out = 0 varm1 = 0 if iy == nchunks[1]-1: m2out = m2+1 varm2 = m2+1 if not quiet: print('m1 {}, m2 {}, srcm1 {}, srcm2 {}'.format( m1, m2, srcm1, srcm2)) for ix in range(nchunks[0]): l1, l2 = indx[ix][ 0]-dstghost,\ indx[ix][-1]+dstghost srcl1 = np.max(np.where(srch5['grid/x'][()]< dsth5['grid/x'][l1])) srcl2 = np.min(np.where(srch5['grid/x'][()]> dsth5['grid/x'][l2])) l1out = l1+dstghost l2out = l2-dstghost+1 varl1 = dstghost varl2 = -dstghost if ix == 0: l1out = 0 varl1 = 0 if ix == nchunks[0]-1: l2out = l2+1 varl2 = l2+1 if not quiet: print( 'l1 {}, l2 {}, srcl1 {}, srcl2 {}'.format( l1, l2, srcl1, srcl2)) if not quiet: print('remeshing '+key+' chunk {}'.format( [iz,iy,ix])) var = local_remesh( srch5['data'][key][srcn1:srcn2+1, srcm1:srcm2+1, srcl1:srcl2+1], srch5['grid']['x'][srcl1:srcl2+1], srch5['grid']['y'][srcm1:srcm2+1], srch5['grid']['z'][srcn1:srcn2+1], dsth5['grid']['x'][l1:l2+1], dsth5['grid']['y'][m1:m2+1], dsth5['grid']['z'][n1:n2+1], quiet=quiet ) if not quiet: print('writing '+key+ ' shape {} chunk {}'.format( var.shape, [iz,iy,ix])) dset[n1out:n2out, m1out:m2out, l1out:l2out] = dtype(var[ varn1:varn2, varm1:varm2, varl1:varl2]) dstsim.update() dstsim.change_value_in_file('src/cparam.local','ncpus', str(nprocs)) dstsim.change_value_in_file('src/cparam.local','nprocx',str(ncpus[0])) dstsim.change_value_in_file('src/cparam.local','nprocy',str(ncpus[1])) dstsim.change_value_in_file('src/cparam.local','nprocz',str(ncpus[2])) dstsim.change_value_in_file('src/cparam.local','nxgrid', str(dstsim.dim.nxgrid)) #dstsim.change_value_in_file('src/cparam.local','nygrid', # str(dstsim.dim.nygrid)) dstsim.change_value_in_file('src/cparam.local','nzgrid', str(dstsim.dim.nzgrid)) #cmd = 'source '+join(srcsim.path,'src','.moduleinfo') #os.system(cmd) #os.chdir(dstsim.path) #cmd = 'pc_setupsrc; make cleann' #os.system(cmd) #cmd = 'pc_build' #if hostfile: cmd = cmd + ' -f '+hostfile #process = sub.Popen(cmd.split(),stdout=sub.PIPE) #process = sub.Popen(cmd.split(),stdout=sub.PIPE) #output, error = process.communicate() #print(cmd,output,error) if srcprocs > nprocs: print('\n**********************************************************\n'+ 'remesh WARNING: {} procs reduced from {}.\n'.format( nprocs, srcprocs)+ 'Review multxyz {} and fracxyz {} for more\n'.format( multxyz,fracxyz)+ 'efficient parallel processing options.'+ '\n**********************************************************\n') end_time = time.time() print('end at {} after {} seconds'.format( time.ctime(end_time),end_time-start_time))