def average_main(filename, maskname, varname, latname, lonname, depname=None, timname=None, gridval=None, Nproc=1, ind=None): """ Creates a mask of are values for a file or a set of splitted files. If one file is used this can be automatically splitted using ind_out. Parameters ---------- filename : str Name of the file to be open. maskname : str Name of the mask file to compute the average. savename : str Name of the file to be saved. varname : str Name of the var to be averaged. latname : str Name of the latitude variable. lonname : str Name of the longitude variable. depname : str, optional Name of the depth variable. The default is None. timname : str, optional Name of the time variable. The default is None. Nproc : int, optional If bigger than 1 will run in parallel. The default is 1. ind : tuple, optional If the file(s) to be loaded come(s) from a partition from breakds a tuple with the index must be provided (Z, Y, X). Where X, Y, Z are the respective index in each dimension (floats or array like). The default is None. Returns ------- None. """ if ind is None: f_var, f_lat, f_lon, weight, weight0, tim, dep =\ average_1file(filename, maskname, varname, latname, lonname, depname, timname, gridval) f_var = f_var / weight f_lat = f_lat / weight0 f_lon = f_lon / weight0 else: def avg_kji(kji): k, j, i = kji fname = filename + '_' + str(k) + '_' + str(j) + '_' + str(i) mname = maskname + '_' + str(k) + '_' + str(j) + '_' + str(i) outs.append( average_1file(fname, mname, varname, latname, lonname, depname, timname, gridval)) return 1 # Get iterables for the 3 index and call combinations indk = int2iterable(ind[0]) indj = int2iterable(ind[1]) indi = int2iterable(ind[2]) dep = np.array([]) f_var = np.array([]) for k in indk: outs = [] kji_com = [(k, j, i) for j in indj for i in indi] totl = len(kji_com) # Run in parallel if Nproc > 1: kji_com = split_iterable(kji_com, Nproc) print("Processing {} files with {} cores".format(totl, Nproc)) totl = len(kji_com) for i, kji_ in enumerate(kji_com): print("\t{:.2f}%".format(100. * i * (k + 1) / totl)) output = [] for kji in kji_: run_paral = dask.delayed(avg_kji)(kji) output.append(run_paral) total = dask.delayed(sum)(output) total.compute() # Run in series else: print("Processing {} files".format(totl)) for i, kji_ in enumerate(kji_com): print("\t{:.2f}%".format(100. * i * (k + 1) / totl)) avg_kji(kji_) tav_val = np.zeros_like(outs[0][0]) tav_lon, tav_lat = 0., 0. tweight, tweight0 = np.zeros_like(outs[0][3]), 0. for av_val, av_lat, av_lon, weight, weight0, _, _ in outs: tav_val += av_val tav_lat += av_lat tav_lon += av_lon tweight0 += weight0 tweight += weight dep = np.append(dep, outs[0][6]) tweight[tweight == 0.] = 1 if len(f_var.shape) == 2: f_var = np.append(f_var, tav_val / tweight, axis=-1) elif f_var.shape[0] != 0: f_var = np.append(f_var, tav_val / tweight) else: f_var = tav_val / tweight f_lat = tav_lat / tweight0 f_lon = tav_lon / tweight0 tim = outs[0][5] ############# # SAVE DATA # ############# ds = {lonname: ((), f_lon), latname: ((), f_lat)} if timname is not None: ds[timname] = (('t'), tim) if depname is not None: ds[depname] = (('z'), dep) if len(f_var.shape) == 2: ds[varname] = (('t', 'z'), f_var) else: ds[varname] = (('t'), f_var) ds = xr.Dataset(ds) ds.to_netcdf(filename + '_mean.nc')
def stra_main(file_den, file_str, bpnt, denname, latname, lonname, depname, strname, timname=None, Nproc=1, ind=None, H=5000): if ind is None: stra_1file(file_den, file_str, bpnt, denname, latname, lonname, depname, strname, timname, H) else: def str_kji(kji): k, j, i = kji fden = file_den + '_' + str(k) + '_' + str(j) + '_' + str(i) fstr = file_str + '_' + str(k) + '_' + str(j) + '_' + str(i) stra_1file(fden, fstr, bpnt, denname, latname, lonname, depname, strname, timname, H) return 1 # Get iterables for the 3 index and call combinations indk = int2iterable(ind[0]) indj = int2iterable(ind[1]) indi = int2iterable(ind[2]) for k in indk: kji_com = [(k, j, i) for j in indj for i in indi] totl = len(kji_com) # Run in parallel if Nproc > 1: kji_com = split_iterable(kji_com, Nproc) print("Processing {} files with {} cores".format(totl, Nproc)) totl = len(kji_com) for i, kji_ in enumerate(kji_com): print("\t{:.2f}%".format(100. * i * (k + 1) / totl)) output = [] for kji in kji_: run_paral = dask.delayed(str_kji)(kji) output.append(run_paral) total = dask.delayed(sum)(output) total.compute() # Run in series else: print("Processing {} files".format(totl)) for i, kji_ in enumerate(kji_com): print("\t{:.2f}%".format(100. * i * (k + 1) / totl)) str_kji(kji_) [strval], lats, lons, tim, _ = load_main(file_str, [strname], latname, lonname, None, timname, ind) ds = { lonname: (('y', 'x'), lons), latname: (('y', 'x'), lats), strname: (('t', 'z', 'y', 'x'), strval) } if timname is not None: ds[timname] = (('t'), tim) ds = xr.Dataset(ds) ds.to_netcdf(file_str + '.nc')
def load_split(filename, varnames, latname, lonname, depname, timname, ind): """ Load splitted data. Check the documentation of load_main for more information. """ # Transform the floats to iterables indk = int2iterable(ind[0]) indj = int2iterable(ind[1]) indi = int2iterable(ind[2]) tim_in = None nvars = len(varnames) # Loop appending the values vars_k = [None for v in range(nvars)] dep_k = None for k in indk: vars_j = [None for var in varnames] lat_j, lon_j = None, None for j in indj: vars_i = [None for var in varnames] lat_i, lon_i = None, None for i in indi: filenamekji = filename + '_' + str(k) + '_' + str( j) + '_' + str(i) with xr.open_dataset(filenamekji + '.nc') as data: for v in range(nvars): if vars_i[v] is None: vars_i[v] = data[varnames[v]].values else: vars_i[v] = np.append(vars_i[v], data[varnames[v]].values, axis=-1) if lat_i is None: lat_i = data[latname].values lon_i = data[lonname].values else: lat_i = np.append(lat_i, data[latname].values, axis=-1) lon_i = np.append(lon_i, data[lonname].values, axis=-1) if depname is not None: dep_in = data[depname].values if timname is not None: tim_in = data[timname].values for v in range(nvars): if vars_j[v] is None: vars_j[v] = vars_i[v] else: vars_j[v] = np.append(vars_j[v], vars_i[v], axis=-2) if lat_j is None: lat_j = lat_i lon_j = lon_i else: lat_j = np.append(lat_j, lat_i, axis=-2) lon_j = np.append(lon_j, lon_i, axis=-2) for v in range(nvars): if len(vars_j[v].shape) == 3 or vars_k[v] is None: vars_k[v] = vars_j[v] else: vars_k[v] = np.append(vars_k[v], vars_j[v], axis=-3) if depname is not None: if dep_k is None: dep_k = dep_in else: dep_k = np.append(dep_k, dep_in) return vars_k, lat_j, lon_j, tim_in, dep_k
def den_main(filename_tem, filename_sal, savename, temname, salname, depname, denname, latname, lonname, pressure_lvl=False, mlat=None, Nproc=1, timname=None, ind=None): """ Computes the density profile of a file or a set splitted files. Saves the file as the input format. Parameters ---------- filename_tem : str Name of the temperature data file. filename_sal : str Name of the salinity data file. savename : str Name to the saving file. temname : str Name of the temperature variable. temname : str Name of the salinity variable. latname : str Name of the latitude variable. lonname : str Name of the longitude variable. pressure_lvl : Bool, optional True if the depth variable is the pressure level. The default is False. mlat : float, optional If provided, mean latitude is used to compute pressure levels. The default is None. Nproc : int, optional If bigger than 1 will run in parallel. The default is 1. timname : str, optional Name of the time variable. The default is None. ind : tuple, optional If the file or files to be loaded come from a partition from breakds a tuple with the index must be provided (Z, Y, X). Where X, Y, Z are the respective index in each dimension (floats or array like). In that case the loaded data will be appended. Returns ------- None. """ # Get data from 1 file if ind is None: den_1file(filename_tem, filename_sal, savename, temname, salname, depname, denname, latname, lonname, pressure_lvl, mlat, timname) # Get data from splitted files else: def den_kji(kji): k, j, i = kji fname_tem = filename_tem+'_'+str(k)+'_'+str(j)+'_'+str(i) fname_sal = filename_sal+'_'+str(k)+'_'+str(j)+'_'+str(i) fsave_den = savename+'_'+str(k)+'_'+str(j)+'_'+str(i) den_1file(fname_tem, fname_sal, fsave_den, temname, salname, depname, denname, latname, lonname, pressure_lvl, mlat, timname) return 1 # Get iterables for the 3 index and call combinations indk = int2iterable(ind[0]) indj = int2iterable(ind[1]) indi = int2iterable(ind[2]) kji_com = [(k, j, i) for k in indk for j in indj for i in indi] totl = len(kji_com) # Run in parallel if Nproc > 1: kji_com = split_iterable(kji_com, Nproc) print("Processing {} files with {} cores".format(totl, Nproc)) totl = len(kji_com) for i, kji_ in enumerate(kji_com): print("\t{:.2f}%".format(100.*i/totl)) output = [] for kji in kji_: run_paral = dask.delayed(den_kji)(kji) output.append(run_paral) total = dask.delayed(sum)(output) total.compute() # Run in series else: print("Processing {} files".format(totl)) for i, kji_ in enumerate(kji_com): print("\t{:.2f}%".format(100.*i/totl)) den_kji(kji_)
def mask_main(filename, savename, latname, lonname, Nproc=1, ind=None, Nb=None): """ Creates a mask of are values for a file or a set of splitted files. If one file is used this can be automatically splitted using ind_out. Parameters ---------- filename : str Name of the file to be open. savename : str Name of the file to be saved. latname : str Name of the latitude variable. lonname : str Name of the longitude variable. Nproc : int, optional If bigger than 1 will run in parallel. The default is 1. ind : tuple, optional If the file(s) to be loaded come(s) from a partition from breakds a tuple with the index must be provided (Z, Y, X). Where X, Y, Z are the respective index in each dimension (floats or array like). In that case the loaded data will be appended. The default is None. Nb : tuple of 3 int, optional Number of partitions to be made in the z, y and x axis. Only used if ind is None and Nb not None. The default is None. Returns ------- None. """ if ind is None: mask_1file(filename, savename, latname, lonname, ind, Nb) else: def mask_kji(kji): k, j, i = kji fname = filename + '_' + str(k) + '_' + str(j) + '_' + str(i) fsave = savename + '_' + str(k) + '_' + str(j) + '_' + str(i) mask_1file(fname, fsave, latname, lonname, ind, None) return 1 # Get iterables for the 3 index and call combinations indk = int2iterable(ind[0]) indj = int2iterable(ind[1]) indi = int2iterable(ind[2]) kji_com = [(k, j, i) for k in indk for j in indj for i in indi] totl = len(kji_com) # Run in parallel if Nproc > 1: kji_com = split_iterable(kji_com, Nproc) print("Processing {} files with {} cores".format(totl, Nproc)) totl = len(kji_com) for i, kji_ in enumerate(kji_com): print("\t{:.2f}%".format(100. * i / totl)) output = [] for kji in kji_: run_paral = dask.delayed(mask_kji)(kji) output.append(run_paral) total = dask.delayed(sum)(output) total.compute() # Run in series else: print("Processing {} files".format(totl)) for i, kji_ in enumerate(kji_com): print("\t{:.2f}%".format(100. * i / totl)) mask_kji(kji_)