def plot(self, var_name, t=None, depth_idx=None, **kwargs): """ Plot 2D data of curvilinear grid. See `plot.plot_data` for details. """ title = var_name # get a valid time object t_obj = self.get_time_obj(t) # extract data data = getattr(self, var_name) # handle depth depth_key = get_depth_dim(data) if depth_key is not None: my_depth_idx = 0 if depth_key == 'depth' else -1 my_depth_idx = my_depth_idx if depth_idx is None else depth_idx data = data.isel(**{depth_key: my_depth_idx}) title += ', depth {:d}'.format(my_depth_idx) # handle time if 'time' in data.dims: data = data.sel(time=t_obj, method='nearest') real_time = self.time.sel(time=t_obj, method='nearest').values.item() title += ', time {}'.format(real_time) # handle doy if 'doy' in data.dims: my_doy = 0 if t is None else t data = data.isel(doy=my_doy) title += ', doy {:d}'.format(my_doy) return plot_data(self.grid, data, title=title, **kwargs)
def eke_at_depth(ds, climatology, depth_idx, time_slice=slice(None, None), days_around=30): """ Calculate EKE and associated data at a given depth index. This function is efficient, when the whole domain x time fits into memory. """ # get times times = np.squeeze(ds['time'].values) # get depth key depth_key = get_depth_dim(ds) # load raw data raw_u = ds['u'].isel(**{depth_key: depth_idx, 'time': time_slice}).values print('Raw u loaded') raw_v = ds['v'].isel(**{depth_key: depth_idx, 'time': time_slice}).values print('Raw v loaded') # load climatological data clim_u = climatology['u_b'].isel(**{depth_key: depth_idx}).values print('Clim u loaded') clim_v = climatology['v_b'].isel(**{depth_key: depth_idx}).values print('Clim v loaded') # add memory for prime values = value - climatology prime_u = raw_u.copy() prime_v = raw_v.copy() # add memory for results eke_b = np.zeros_like(clim_u) num_items = np.zeros(clim_u.shape[0]) # loop times for t_idx, t_obj in progressbar.progressbar(enumerate(times)): # get doys and weights doy = t_obj.dayofyr - 1 doys = get_doys(t_obj, ds, 30) doys_weights = get_triangular_weights(doys) # calculate the climatological value c_u = np.nansum(clim_u[doys] * doys_weights[:, None, None], axis=0) c_v = np.nansum(clim_v[doys] * doys_weights[:, None, None], axis=0) # calculate prime values prime_u[t_idx] -= c_u prime_v[t_idx] -= c_v num_items[doy] += 1 # add eke-climatological eke_b[doy] += 0.5 * ((prime_u[t_idx]**2) + (prime_v[t_idx]**2)) # normalize eke-climatological eke_b /= num_items[:, None, None] print('Calculated primes') # calculate eke eke = 0.5 * ((prime_u**2) + (prime_v**2)) # calculate ke ke = 0.5 * (raw_u**2 + raw_v**2) return eke, eke_b, prime_u, prime_v, ke
def calculate_climatology_at_depth(ds, variable, depth_idx, days_around=0, with_std=True): """ Calculate the climatology of a dataset ds for a variable at depth depth_idx. Use days_around for smoothing and write results to ds_out. """ # load raw data into memory depth_key = get_depth_dim(ds) assert (depth_key is not None) or depth_idx is None data_t = ds[variable].isel(**{ depth_key: depth_idx }).values if depth_key is not None else ds[variable].values # get the number of days (number of doys) num_days = get_num_days(ds) new_shape = tuple([num_days] + list(data_t.shape[1:])) # set up data arrays for climatology, its standard deviation and the number of items data_clim = np.empty(new_shape, dtype=float) data_clim_std = None if not with_std else np.empty(new_shape, dtype=float) data_clim_num = np.empty((num_days, ), dtype=int) # loop doys for doy in range(num_days): # create boolean mask mask = get_doy_mask(doy, days_around, ds) # get data data = data_t[mask] assert data.shape[0] > 0, (data.shape, np.count_nonzero(mask)) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Degrees of freedom <= 0 for slice") warnings.filterwarnings("ignore", "Mean of empty slice") # calculate data data_clim[doy] = np.nanmean(data, axis=0) if with_std: data_clim_std[doy] = np.nanstd(data, axis=0) data_clim_num[doy] = data.shape[0] return data_clim, data_clim_std, data_clim_num
def add_data(self, path, variables=[], time_calendar=None, time_raw=None, time_units=None, var_prefix='', time_offset=0, time_from=None): """ Add a netCDF dataset to the object. """ if path in self._data: print('WARNING: this path has been loaded already') return # open dataset data = open_dataset(path, variables, time_calendar=time_calendar, time_raw=time_raw, time_offset=time_offset, time_from=time_from, time_units=time_units, eta_rho_slice=self._eta_slice, xi_rho_slice=self._xi_slice, s_rho_slice=self._s_rho_slice) depth_key = get_depth_dim(data) # do some consistency tests... # ... check depth assert self._depth_key is None or depth_key is None or depth_key == self._depth_key assert self._depth_num is None or depth_key is None or data.dims[ depth_key] == self._depth_num # ... check if time is the same if 'time' in data: assert self.time is None or \ (len(self.time) == len(data['time']) and \ abs((self.time[0] - data['time'][0]).values) < np.timedelta64(1, 'h') and \ abs((self.time[-1] - data['time'][-1]).values) < np.timedelta64(1, 'h')), (self.time[0], data.time[0], self.time[-1], data.time[-1]) self.time = data.time # save depth key (s_rho vs depth) if depth_key is not None: self._depth_key = depth_key self._depth_num = data.dims[depth_key] if depth_key == 'depth' and 'depth' in data.coords: setattr(self, 'depth', data.depth) # save full object for direct access self._data[path] = data self._prefixes[path] = var_prefix # save each single variables for var_name in data: if getattr(self, var_prefix + var_name, None) is not None: print('WARNING:', var_prefix + var_name, 'already exist. Consider to change `var_prefix`.') continue setattr(self, var_prefix + var_name, data[var_name]) self._reverse_data[var_prefix + var_name] = path self._reverse_prefix[var_prefix + var_name] = var_prefix
def psd(self, var_name, t, depth_idx=None, aggregation_mode='mean', scaling='density', dim='eta', min_doy=0, max_doy=365): """ Calculate a PSD on the data. When having multiple depth levels, they are averaged. You can combine variables by putting a + in between (this is for example useful for horizontal velocities). See `psd.do_multidim_welch` for details. """ # handle lists of depth idxs: # calculate psd for each depth and average y and d_y if type(depth_idx) == list or type(depth_idx) == np.ndarray: all_y = [] all_d_y = [] all_t = None all_x = None for d_idx in depth_idx: all_t, all_x, y, d_y = self.psd( var_name, t, depth_idx=d_idx, aggregation_mode=aggregation_mode, scaling=scaling, dim=dim, min_doy=min_doy, max_doy=max_doy) all_y.append(y) all_d_y.append(d_y) return all_t, all_x, np.nanmean(all_y, axis=0), np.nanmean(all_d_y, axis=0) # handle u+v/v+u: # calculate psd for u and v seperately and just add them together if '+' in var_name: dg_time = None dg_x = None dg_y = None dg_dy = None vars = var_name.split('+') for var in vars: d = self.psd(var, t, depth_idx=depth_idx, aggregation_mode=aggregation_mode, scaling=scaling, dim=dim, min_doy=min_doy, max_doy=max_doy) # is first var if dg_x is None: dg_time = d[0] dg_x = d[1] dg_y = d[2] dg_dy = d[3]**2 else: assert (d[1] == dg_x).all() dg_y += d[2] dg_dy += d[3]**2 return dg_time, dg_x, dg_y, np.sqrt(dg_dy) # set slices first if dim not in self._psd_slices: raise RuntimeError( 'You need to set a subdomain using set_psd_slice for this dim first' ) # get subdomain info subdomain_slice = self._psd_slices[dim]['subdomain'] const_dim_dist = self._psd_slices[dim]['const_dim_dist'] axis = self._psd_slices[dim]['axis'] list_real_t = [] list_y = [] list_d_y = [] prev_x = None # if t is not a list of t_obj/t_idxs, create a list with length 1 t_list = t if type(t) != list and type(t) != np.ndarray: t_list = [t] # loop over times and average results for t_el in t_list: data = getattr(self, var_name) # get data by t_obj if 'time' in data.dims: t_obj = self.get_time_obj(t_el) doy = t_obj.dayofyr - 1 if doy < min_doy or doy >= max_doy: continue data = data.sel(time=t_obj, method='nearest') real_t = self.time.sel(time=t_obj, method='nearest').values.item() # get data by doy if 'doy' in data.dims: data = data.isel(doy=t_el) real_t = t_el # get correct depth depth_key = get_depth_dim(data) if depth_key is not None: my_depth_idx = 0 if depth_key == 'depth' else -1 my_depth_idx = my_depth_idx if depth_idx is None else depth_idx data = data.isel(**{depth_key: my_depth_idx}) # extract the variable values and slice to subdomain var = np.squeeze(data.values.copy()) var = var[subdomain_slice] # do PSD with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="nperseg = 256 is greater") warnings.filterwarnings("ignore", message="divide by zero") x, y, d_y = do_multidim_welch(var, const_dim_dist, axis, aggregation_mode, scaling) # check that the x values are actually the same if prev_x is not None: assert (prev_x == x).all() else: prev_x = x # save data list_real_t.append(real_t) list_y.append(y) list_d_y.append(d_y) return list_real_t, prev_x, np.nanmean(list_y, axis=0), np.nanmean(list_d_y, axis=0)
def create_climatology_output(output_path, dataset, variables, days_around, with_std=False): """ Create a writeable netCDF4 object with the climatology structure """ # check if output exists res = check_output_path(output_path) if res is not None: # check if days_around matches ds_days_around = res.getncattr('days_around') if ds_days_around != days_around: raise RuntimeError( 'Found different days_around attribute in file:', days_around) return res # get number of days per year max_days = get_num_days(dataset) # get all dimensions dims = {} for variable in variables: for i, dim in enumerate(dataset[variable].dims): if dim != "time" and dim != "doy": dims[dim] = dataset[variable].shape[i] # get depth key depth_key = get_depth_dim(dataset) # set up output dataset ds_out = nc.Dataset(output_path, 'w') # pylint: disable=no-member ds_out.setncatts(dataset.attrs) # save the number of days_around used for this ds_out.setncattr('days_around', days_around) chunksizes = {'doy': 1} # create dimensions ds_out.createDimension("doy", max_days) for dim in dims: ds_out.createDimension(dim, dims[dim]) chunksizes[dim] = dims[dim] if dim != depth_key else min( 10, dims[depth_key]) print('Chunking:', chunksizes) for variable in variables: dimensions = ['doy'] + list([ dim for dim in dataset[variable].dims if dim != "time" and dim != "doy" ]) variable_chunking = tuple([chunksizes[dim] for dim in dimensions]) # only variable or also with standard deviation variable_names = [variable + '_b'] if not with_std else [ variable + '_b', variable + '_std' ] # create all the variables for variable_name in variable_names: ds_out.createVariable(variable_name, float, zlib=True, dimensions=tuple(dimensions), fill_value=np.nan, chunksizes=variable_chunking) for key in dataset[variable].attrs: setattr(ds_out.variables[variable_name], key, dataset[variable].attrs[key]) ds_out.createVariable(variable + '_num', int, zlib=True, dimensions=("doy", ), fill_value=0) return ds_out
"--output", type=str, help="Output path for smoothed climatology", required=True) parser.add_argument( "--days-around", type=int, help="Number of days around doy for smoothed climatology", default=0) args = parser.parse_args() # get initial dataset dsf = get_dataset(args) depth_key = get_depth_dim(dsf) depth_idxs = [None] if depth_key is None else list( range(dsf.dims[depth_key])) print(depth_idxs) # get output netCDF file out = create_climatology_output(args.output, dsf, args.variables, args.days_around, with_std=False) # loop vars for var in args.variables: print('Start calculation of', var)
def create_eke_output(output_path, dataset): """ Create a writeable netCDF4 object with the eke structure """ # check if output exists res = check_output_path(output_path) if res is not None: return res # get depth key depth_key = get_depth_dim(dataset) # get all dimensions dims = { 'eta_rho': dataset['u'].shape[2], 'xi_rho': dataset['u'].shape[3], 'time': dataset['u'].shape[0], 'doy': 365 } # set up output dataset ds_out = nc.Dataset(output_path, 'w') # pylint: disable=no-member ds_out.setncatts(dataset.attrs) # create dimensions chunksizes = {} for dim in dims: ds_out.createDimension(dim, dims[dim]) chunksizes[dim] = dims[dim] chunksizes['time'] = 30 chunksizes['doy'] = 30 print('Chunking:', chunksizes) # create time variable ds_out.createVariable('time', float, zlib=True, dimensions=('time', ), fill_value=np.nan) ds_out.variables['time'].units = dataset.time.attrs['units'] ds_out.variables['time'].calendar = dataset.time.attrs['calendar'] # save time values conv_fn = partial(date2num, units=dataset.time.attrs['units'], calendar=dataset.time.attrs['calendar']) ds_out['time'][:] = np.array( [conv_fn(t_obj) for t_obj in dataset.time.values]) # create variables with correct chunking for variable in ['eke', 'eke_b', 'u_p', 'v_p', 'ke']: dimensions = ['time', 'eta_rho', 'xi_rho'] if variable == 'eke_b': dimensions[0] = 'doy' variable_chunking = tuple([chunksizes[dim] for dim in dimensions]) ds_out.createVariable(variable, float, zlib=True, dimensions=tuple(dimensions), fill_value=np.nan, chunksizes=variable_chunking) return ds_out