def __init__(self, time=None, location=None, frequency=None, direction=None, energy=None, spreading=None, time_units='s', location_units='m', frequency_units='Hz', direction_units='deg', energy_units='m^2/Hz', spreading_units='deg', time_var='time', location_var='location', frequency_var='frequency', direction_var='direction', energy_var='energy', spreading_var='spreading', frequency_convention='absolute', direction_convention='nautical', spreading_convention='cosine', spectral=True, directional=True, attrs={}, crs=None, **kwargs): '''Initialize class Sets dimensions, converts coordinates and fills the dataset, if data is provided. Parameters ---------- time : iterable, optional Time coordinates, each item can be a datetime object or float location : iterable of 2-tuples, optional Location coordinates, each item is a 2-tuple with x- and y-coordinates frequency : iterable, optional Frequency cooridinates direction : iterable, optional Direction coordinates energy : matrix, optional Wave energy time_units : str, optional Units of time coordinates (default: s) location_units : str, optional Units of location coordinates (default: m) frequency_units : str, optional Units of frequency coordinates (default: Hz) direction_units : str, optional Units of direction coordinates (default: deg) energy_units : str, optional Units of wave energy (default: m^2/Hz) time_var : str, optional Name of time variable (default: time) location_var : str, optional Name of location variable (default: location) frequency_var : str, optional Name of frequency variable (default: frequency) direction_var : str, optional Name of direction variable (default: direction) energy_var : str, optional Name of wave energy variable (default: energy) frequency_convention : str, optional Convention of frequency definition (default: absolute) direction_convention : str, optional Convention of direction definition (default: nautical) attrs : dict-like, optional Global attributes crs : str, optional Proj4 specification of local coordinate reference system kwargs : dict, optional Additional options passed to the xarray.Dataset initialization method See Also -------- oceanwaves.OceanWaves.reinitialize ''' dims = [] coords = OrderedDict() data_vars = OrderedDict() # simplify dimensions time = np.asarray(time) location = np.asarray(location) frequency = np.asarray(frequency, dtype=np.float) direction = np.asarray(direction, dtype=np.float) spreading = np.asarray(spreading, dtype=np.float) energy = np.asarray(energy, dtype=np.float) # simplify units time_units = simplify(time_units) location_units = simplify(location_units) frequency_units = simplify(frequency_units) direction_units = simplify(direction_units) energy_units = simplify(energy_units) # determine object dimensions if self._isvalid(time): dims.append(time_var) coords[time_var] = xr.Variable( time_var, time ) # only set time units if given. otherwise a datetime # object is assumed that is encoded by xarray. setting # units manually in that case would raise an exception if # the dataset is written to CF-compatible netCDF. if time_units is None or time_units != '': coords[time_var].attrs.update(dict(units=time_units)) if self._isvalid(location): dims.append(location_var) coords[location_var] = xr.Variable( location_var, np.arange(len(location)) ) x, y = list(zip(*location)) coords['%s_x' % location_var] = xr.Variable( location_var, np.asarray(x), attrs=dict(units=location_units) ) coords['%s_y' % location_var] = xr.Variable( location_var, np.asarray(y), attrs=dict(units=location_units) ) coords['%s_lat' % location_var] = xr.Variable( location_var, np.asarray(x) + np.nan, attrs=dict(units='degN') ) coords['%s_lon' % location_var] = xr.Variable( location_var, np.asarray(y) + np.nan, attrs=dict(units='degE') ) if self._isvalid(frequency, mask=frequency>0) and spectral: dims.append(frequency_var) coords[frequency_var] = xr.Variable( frequency_var, frequency[frequency>0], attrs=dict(units=frequency_units) ) if self._isvalid(direction) and directional: dims.append(direction_var) coords[direction_var] = xr.Variable( direction_var, direction, attrs=dict(units=direction_units) ) # determine object shape shp = tuple([len(c) for k, c in coords.items() if k in dims]) # initialize energy variable data_vars[energy_var] = xr.DataArray( np.nan + np.zeros(shp), dims=dims, coords=coords, attrs=dict(units=energy_units) ) # store parameterized frequencies if not spectral: if self._isvalid(frequency): data_vars[frequency_var] = xr.DataArray( frequency, dims=dims, coords=coords, attrs=dict(units=direction_units) ) # store parameterized directions if not directional: if self._isvalid(direction): data_vars[direction_var] = xr.DataArray( direction, dims=dims, coords=coords, attrs=dict(units=direction_units) ) if self._isvalid(spreading): data_vars[spreading_var] = xr.DataArray( spreading, dims=dims, coords=coords, attrs=dict(units=spreading_units) ) # collect global attributes attrs.update(dict( _init=kwargs.copy(), _crs=crs, _names=dict( time = time_var, location = location_var, frequency = frequency_var, direction = direction_var, spreading = spreading_var, energy = energy_var ), _units=dict( time = time_units, location = location_units, frequency = frequency_units, direction = direction_units, energy = energy_units ), _conventions=dict( frequency = frequency_convention, direction = direction_convention, spreading = spreading_convention ) )) # initialize empty object super(OceanWaves, self).__init__( data_vars=data_vars, coords=coords, attrs=attrs, **kwargs ) # set wave energy if self._isvalid(energy): self['_energy'] = dims, energy.reshape(shp) # convert coordinates self.convert_coordinates(crs)
def create_data_array_from_record( record: GradsRecordHandler, parameter, level, level_dim_name=None, latitude_direction="degree_north", ) -> Optional[xr.DataArray]: grads_ctl = record.grads_ctl # values file_path = grads_ctl.get_data_file_path(record.record_info) with open(file_path, "rb") as f: values = record.load_data(f) # coords lons = grads_ctl.xdef["values"] lats = grads_ctl.ydef["values"] if latitude_direction == "degree_north": values = np.flip(values, 0) lats = lats[::-1] coords = {} coords["latitude"] = xr.Variable( "latitude", lats, attrs={ "units": latitude_direction, "standard_name": "latitude", "long_name": "latitude" }, ) coords["longitude"] = xr.Variable("longitude", lons, attrs={ "units": "degrees_east", "standard_name": "longitude", "long_name": "longitude" }) coords[level_dim_name] = level coords["valid_time"] = record.record_info["valid_time"] if grads_ctl.start_time is not None and grads_ctl.forecast_time is not None: coords["start_time"] = grads_ctl.start_time coords["forecast_time"] = grads_ctl.forecast_time # dims dims = ("latitude", "longitude") # attrs data_attrs = {"description": record.record_info["description"]} data = xr.DataArray( values, dims=dims, coords=coords, attrs=data_attrs, name=parameter, ) return data
def setUp(self): self.data = sparse.random((4, 6), random_state=0, density=0.5) self.var = xr.Variable(("x", "y"), self.data)
def compute(self, data, selected_indexes): observed = data.time.values[selected_indexes] - np.datetime64( self._since) days_since = observed.astype('timedelta64[D]').astype('int16') return self._var_name, xarray.Variable(('y', 'x'), days_since)
def compute(self, data, selected_indexes): return self._var_name, xarray.Variable( ('y', 'x'), data.source.values[selected_indexes])
def get_dataset(self, varnames=None, iter_start=None, iter_stop=None, iter_step=None, k_levels=None, k_chunksize=1, type='faces'): """ Create an xarray Dataset object for this model. Parameters ---------- *varnames : list of strings, optional The variables to include, e.g. ``['Salt', 'Theta']``. Otherwise include all known variables. iter_start : int, optional Starting iteration number. Otherwise use model default. Follows standard `range` conventions. (inclusive) iter_start : int, optional Stopping iteration number. Otherwise use model default. Follows standard `range` conventions. (exclusive) iter_step : int, optional Iteration number stepsize. Otherwise use model default. k_levels : list of ints, optional Vertical levels to extract. Default is to get them all k_chunksize : int, optional How many vertical levels per Dask chunk. type : {'faces', 'latlon'}, optional What type of dataset to create Returns ------- ds : xarray.Dataset """ def _if_not_none(a, b): if a is None: return b else: return a iter_start = _if_not_none(iter_start, self.iter_start) iter_stop = _if_not_none(iter_stop, self.iter_stop) iter_step = _if_not_none(iter_step, self.iter_step) iter_params = [iter_start, iter_stop, iter_step] if any([a is None for a in iter_params]): raise ValueError("The parameters `iter_start`, `iter_stop` " "and `iter_step` must be defined either by the " "model class or as argument. Instead got %r " % iter_params) iters = np.arange(*iter_params) varnames = varnames or self.varnames ds = self._make_coords_faces(iters) if type == 'latlon': ds = _faces_coords_to_latlon(ds) k_levels = k_levels or np.arange(self.nz) ds = ds.sel(k=k_levels, k_l=k_levels, k_u=k_levels, k_p1=k_levels) # get the data in facet form data_facets = { vname: self._get_facet_data(vname, iters, k_levels, k_chunksize) for vname in varnames } # transform it into faces or latlon data_transformers = { 'faces': _all_facets_to_faces, 'latlon': _all_facets_to_latlon } transformer = data_transformers[type] data = transformer(data_facets, _VAR_METADATA) variables = {} for vname in varnames: meta = _VAR_METADATA[vname] dims = meta['dims'] if type == 'faces': dims = _add_face_to_dims(dims) dims = [ 'time', ] + dims attrs = meta['attrs'] variables[vname] = xr.Variable(dims, data[vname], attrs) ds = ds.update(variables) return ds
def _extend_1d_vertical_to_2d(cls, vertical_variable, reference_var): shape = reference_var.shape[-2:] var_reshaped = np.resize(vertical_variable, shape[::-1]) var_reshaped = np.moveaxis(var_reshaped, 0, 1) return xr.Variable(reference_var.dims[-2:], var_reshaped)
axes = (0, 3) dims = ('f', 'plev', 'lat', 'k') ff, kk, spectrum, *powers = climpy.power2d( *params[flux], dx=dt, dy=dlon, axes=axes, wintype=wintype, nperseg=days.size, coherence=False, ) # Coordinates if out is None: f = xr.Variable( ('f',), ff, {'long_name': 'frequency', 'units': 'cycles/day'} ) k = xr.Variable( ('k',), kk, {'long_name': 'zonal wavenumber', 'units': 'none'} ) out = xr.Dataset( {}, coords={'f': f, 'plev': plev, 'lat': lat, 'k': k}, ) # Save to file if powers: # non-empty, i.e. we got a *co*-spectrum # Save power spectra, but make sure not to do so twice _, P1, P2 = powers for i, P in enumerate((P1, P2)): if shorts_i[i] not in out:
def test_apply_output_core_dimension(): def stack_negative(obj): def func(x): return xr.core.npcompat.stack([x, -x], axis=-1) result = apply_ufunc(func, obj, output_core_dims=[['sign']]) if isinstance(result, (xr.Dataset, xr.DataArray)): result.coords['sign'] = [1, -1] return result array = np.array([[1, 2], [3, 4]]) variable = xr.Variable(['x', 'y'], array) data_array = xr.DataArray(variable, {'x': ['a', 'b'], 'y': [-1, -2]}) dataset = xr.Dataset({'data': data_array}) stacked_array = np.array([[[1, -1], [2, -2]], [[3, -3], [4, -4]]]) stacked_variable = xr.Variable(['x', 'y', 'sign'], stacked_array) stacked_coords = {'x': ['a', 'b'], 'y': [-1, -2], 'sign': [1, -1]} stacked_data_array = xr.DataArray(stacked_variable, stacked_coords) stacked_dataset = xr.Dataset({'data': stacked_data_array}) assert_identical(stacked_array, stack_negative(array)) assert_identical(stacked_variable, stack_negative(variable)) assert_identical(stacked_data_array, stack_negative(data_array)) assert_identical(stacked_dataset, stack_negative(dataset)) assert_identical(stacked_data_array, stack_negative(data_array.groupby('x'))) assert_identical(stacked_dataset, stack_negative(dataset.groupby('x'))) def original_and_stack_negative(obj): def func(x): return (x, xr.core.npcompat.stack([x, -x], axis=-1)) result = apply_ufunc(func, obj, output_core_dims=[[], ['sign']]) if isinstance(result[1], (xr.Dataset, xr.DataArray)): result[1].coords['sign'] = [1, -1] return result out0, out1 = original_and_stack_negative(array) assert_identical(array, out0) assert_identical(stacked_array, out1) out0, out1 = original_and_stack_negative(variable) assert_identical(variable, out0) assert_identical(stacked_variable, out1) out0, out1 = original_and_stack_negative(data_array) assert_identical(data_array, out0) assert_identical(stacked_data_array, out1) out0, out1 = original_and_stack_negative(dataset) assert_identical(dataset, out0) assert_identical(stacked_dataset, out1) out0, out1 = original_and_stack_negative(data_array.groupby('x')) assert_identical(data_array, out0) assert_identical(stacked_data_array, out1) out0, out1 = original_and_stack_negative(dataset.groupby('x')) assert_identical(dataset, out0) assert_identical(stacked_dataset, out1)
def calc_anom(self, variable, window=1, smooth=1, groupby='dayofyear', clim=None): """ Creates a new variable with name "anom" from variable. Anomalies are computed for each grid point and time step as the departure from a climatology. Parameters ---------- variable : string Input variable. window : int, optional number of timesteps for running mean. The default is 1. smooth : int, optional number of timesteps for smoothing anomaly field. The default is 1. clim : string, optional If None: Calculate (long-term) climatological mean from input variable with groupby operation and running window. If string: path + dataname. Will be opened with xr.open_dataarray() If xarray.DataArray: containing the climatology. Will be regridded to resolution of input variable. groupby : string xarray “group by” operations. The default is dayofyear. Returns ------- xarray.Dataset: float An xarray Dataset object containing the anomalie field. """ # Set up dimensions logger.info("Set up dimensions...") if hasattr(self, '_time_name'): # print names logger.info("\n time: '{}'\n" " longitude: '{}'\n" " latitude: '{}'\n".format(self._time_name, self._longitude_name, self._latitude_name)) pass else: self.set_up() # step 1: calculate clim if clim is None: logger.info( 'Calculating climatological mean from {}...'.format(variable)) clim_mean = self.calc_clim(variable=variable, window=window, groupby=groupby) clim = 'from {} with running window time steps {}'.format( variable, window) else: logger.info('Reading climatological mean from {}...'.format(clim)) # if string, load data if isinstance(clim, str): clim_mean = xr.open_dataarray(clim) else: # clim is xarray.DataArray clim_mean = clim # check time dimension if groupby not in clim_mean.dims: clim_mean = clim_mean.groupby(self._time_name + '.' + groupby) # regrid - grid dimensions in clim must have same name as in input variable clim_mean = clim_mean.reindex(**{ self._latitude_name: self.ds[self._latitude_name], self._longitude_name: self.ds[self._longitude_name] }, method='nearest') # step 2: calculate and create new variable anomaly self.ds['anom'] = xr.Variable( self.ds[variable].dims, (self.ds[variable].groupby(self._time_name + '.' + groupby) - clim_mean).rolling(time=smooth, center=True).mean( ), # [variable] at end if error because of frozen dimensions attrs={ 'units': self.ds[variable].attrs['units'], 'long_name': self.ds[variable].attrs['long_name'] + ' Anomaly', 'standard_name': self.ds[variable].attrs['long_name'] + ' anomaly', 'history': ' '.join([ 'Calculated from {} with input attributes:', 'smoothing time steps = {},', 'climatology = {}.' ]).format(variable, smooth, clim) }) logger.info('Calculating Anomaly... DONE')
def run_contrack(self, variable, threshold, gorl, overlap, persistence, twosided=True): """ Spatial and temporal tracking of closed contours. Parameters ---------- variable : string input variable. threshold : int threshold value to detect contours. gorl : string find contours that are greater or lower than threshold value [>, >=, <, >=, ge,le,gt,lt]. overlap : int overlapping fraction of two contours between two time steps [0-1]. persistence : int temporal persistence (in time steps) of the contour life time twosided = True : bool, optional if true twosided (forward and backward) overlap test, otherwise just forward (more transient contours) Returns ------- xarray.Dataset: float An xarray Dataset object containing the flag field. Each unique feature has a unique label/flag. """ logger.info("\nRun ConTrack \n" "########### \n" " threshold: {} {} \n" " overlap: {} \n" " persistence: {} time steps".format( gorl, threshold, overlap, persistence)) # Set up dimensions logger.info("Set up dimensions...") if hasattr(self, '_time_name'): # print names logger.info("\n time: '{}'\n" " longitude: '{}'\n" " latitude: '{}'\n".format(self._time_name, self._longitude_name, self._latitude_name)) pass else: self.set_up() # step 1: define closed contours (greater or less than threshold) logger.info("Find individual contours...") if gorl == '>=' or gorl == 'ge': flag = xr.where(self.ds[variable] >= threshold, 1, 0) elif gorl == '<=' or gorl == 'le': flag = xr.where(self.ds[variable] <= threshold, 1, 0) elif gorl == '>' or gorl == 'gt': flag = xr.where(self.ds[variable] > threshold, 1, 0) elif gorl == '<' or gorl == 'lt': flag = xr.where(self.ds[variable] < threshold, 1, 0) else: errmsg = ' Please select from [>, >=, <, >=] for gorl' raise ValueError(errmsg) # set order of dimension to (time,lat,lon) dims = self.ds[variable].dims sort = [ dims.index(dim) for dim in [self._time_name, self._latitude_name, self._longitude_name] ] flag = flag.transpose(dims[sort[0]], dims[sort[1]], dims[sort[2]]) # step 2: identify individual contours (only along x and y) flag, num_features = ndimage.label( flag.data, structure=np.array([[[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[0, 0, 0], [0, 0, 0], [0, 0, 0]]]) ) # comment: can lead to memory error... better to loop over each time step? # periodic boundry: allow contours to cross date border # comment: what if dimension index not in order (time,lat,lon)? --> self.ds[variable].dims.index(self._latitude_name) for tt in range(len(self.ds[self._time_name])): for yy in range(len(self.ds[self._latitude_name])): if flag[tt, yy, 0] > 0 and flag[tt, yy, -1] > 0 and ( flag[tt, yy, 0] > flag[tt, yy, -1]): # downstream flag[tt][flag[tt] == flag[tt, yy, 0]] = flag[tt, yy, -1] if flag[tt, yy, 0] > 0 and flag[tt, yy, -1] > 0 and ( flag[tt, yy, 0] < flag[tt, yy, -1]): # upstream flag[tt][flag[tt] == flag[tt, yy, -1]] = flag[tt, yy, 0] #step 3: overlapping logger.info("Apply overlap...") weight_lat = np.cos(self.ds[self._latitude_name].data * np.pi / 180) weight_grid = np.ones( (self.ds.dims[self._latitude_name], self.ds.dims[self._longitude_name])) * np.array( (111 * self._dlat * 111 * self._dlon * weight_lat)).astype( np.float32)[:, None] for tt in range(1, len(self.ds[self._time_name]) - 1): # loop over individual contours slices = ndimage.find_objects(flag[tt]) label = 0 for slice_ in slices: label = label + 1 if slice_ is None: #no feature with this flag/label continue # calculate values areacon = np.sum( weight_grid[slice_][flag[tt][slice_] == label]) areaover_forward = np.sum( weight_grid[slice_][(flag[tt][slice_] == label) & (flag[tt + 1][slice_] >= 1)]) areaover_backward = np.sum( weight_grid[slice_][(flag[tt][slice_] == label) & (flag[tt - 1][slice_] >= 1)]) fraction_backward = (1 / areacon) * areaover_backward fraction_forward = (1 / areacon) * areaover_forward # apply overlap criterion forward and backward if twosided: # middle if fraction_backward != 0 and fraction_forward != 0: if (fraction_backward < overlap) or (fraction_forward < overlap): flag[tt][slice_][(flag[tt][slice_] == label)] = 0. # decay if fraction_backward != 0 and fraction_forward == 0: if (fraction_backward < overlap): flag[tt][slice_][(flag[tt][slice_] == label)] = 0. # onset if fraction_backward == 0 and fraction_forward != 0: if (fraction_forward < overlap): flag[tt][slice_][(flag[tt][slice_] == label)] = 0. # apply overlap criterion only forward (capture also more transient features) else: if (fraction_forward < overlap): flag[tt][slice_][(flag[tt][slice_] == label)] = 0. # step 4: persistency # find features along time axis logger.info("Apply persistence...") flag = xr.where(flag >= 1, 1, 0) flag, num_features = ndimage.label( flag, structure=np.array([[[0, 0, 0], [0, 1, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[0, 0, 0], [0, 1, 0], [0, 0, 0]] ])) # comment: can lead to memory error... # periodic boundry: allow features to cross date border slices = ndimage.find_objects(flag) for tt in range(len(self.ds[self._time_name])): for yy in range(len(self.ds[self._latitude_name])): if flag[tt, yy, 0] > 0 and flag[tt, yy, -1] > 0 and ( flag[tt, yy, 0] > flag[tt, yy, -1]): # downstream slice_ = slices[flag[tt, yy, 0] - 1] flag[slice_][(flag[slice_] == flag[tt, yy, 0])] = flag[tt, yy, -1] if flag[tt, yy, 0] > 0 and flag[tt, yy, -1] > 0 and ( flag[tt, yy, 0] < flag[tt, yy, -1]): # upstream slice_ = slices[flag[tt, yy, 0] - 1] flag[slice_][(flag[slice_] == flag[tt, yy, -1])] = flag[tt, yy, 0] # check for persistance, remove features with lifetime < persistance label = 0 for slice_ in ndimage.find_objects(flag): label = label + 1 if slice_ is None: #no feature with this flag continue if (slice_[0].stop - slice_[0].start) < persistence: flag[slice_][(flag[slice_] == label)] = 0. # step 5: create new variable flag logger.info("Create new variable 'flag'...") self.ds['flag'] = xr.Variable( self.ds[variable].dims, flag.transpose(sort), attrs={ 'units': 'flag', 'long_name': 'contrack flag', 'standard_name': 'contrack flag', 'history': ' '.join([ 'Calculated from {} with input attributes:', 'threshold = {} {},', 'overlap fraction = {},', 'persistence time steps = {}.', 'twosided = {}' ]).format(variable, gorl, threshold, overlap, persistence, twosided), 'reference': 'https://github.com/steidani/ConTrack' }) num_features = len(np.unique(flag)) - 1 # don't count 0 logger.info("Running contrack... DONE\n" "{} contours tracked".format(num_features))
def construct_ds_from_dict(info, expt_labels, vars = ["rld", "rlu"]): ''' Contruct an xarray Dataset with global-mean up- and down fluxes for each realization and each experiement. Mean fluxes are supplemented with net fluxes, absorption, and forcing The location of the data is constructed from the keys in argument info, depending on whether the data is remote (assumed to be the Earth System) or local Parameters: info (dict): a dictionary with keys name, location, institution, physics, forcing, and realization expt_labels (array of strings): Names of the experiments, length needs to match size of "experiment" dimension in files vars = ["rld", "rlu"]: variables to be read ''' # # info is a list of dictionaries # out = xr.concat([open_one_file_set(i, vars) for i in info], dim=pd.Index([i["realization"] for i in info], name="realization")) out["forcing_index"] = xr.Variable(dims="realization", data=[i["forcing"] for i in info]) out["physics_index"] = xr.Variable(dims="realization", data=[i["physics"] for i in info]) out = out.assign_coords(expt=expt_labels) # # Weighted mean across profiles - profiles_weights should be the same across all realizations # x = (out * out.profile_weight/out.profile_weight.sum(dim='site')).sum(dim='site') # Profile weight depend on site but we've averaged over all those x = x.drop("profile_weight") # Variable attributes get lost in that reduction for v in x.variables: x[v].attrs = out[v].attrs out = x toa = out.isel(expt=0).plev.argmin().values sfc = out.isel(expt=0).plev.argmax().values if "rld" in out: band = "l" # # Net flux; atmospheric absorption # net = out["r" + band + "d"] - out["r" + band + "u"] net.attrs = {"standard_name":"net_downward_longwave_flux_in_air", "variable_name":"rln", "units":out["r" + band + "u"].attrs["units"], "cell_methods":out["r" + band + "u"].attrs["cell_methods"]} out["r" + band + "n"] = net out["r" + band + "a"] = net.sel(level=toa) - net.sel(level=sfc) out["r" + band + "a"].attrs = \ {"standard_name":"atmosphere_net_rate_of_absorption_of_longwave_energy", "variable_name":"rla", "units":out["r" + band + "u"].attrs["units"], "cell_methods":out["r" + band + "u"].attrs["cell_methods"]} out = compute_forcing(out, band) if "rsd" in out: band = "s" # # Net flux; atmospheric absorption # net = out["r" + band + "d"] - out["r" + band + "u"] net.attrs = {"standard_name":"net_downward_shortwave_flux_in_air", "variable_name":"rln", "units":out["r" + band + "u"].attrs["units"], "cell_methods":out["r" + band + "u"].attrs["cell_methods"]} out["r" + band + "n"] = net out["r" + band + "a"] = net.sel(level=toa) - net.sel(level=sfc) out["r" + band + "a"].attrs = \ {"standard_name":"atmosphere_net_rate_of_absorption_of_shortwave_energy", "variable_name":"rla", "units":out["r" + band + "u"].attrs["units"], "cell_methods":out["r" + band + "u"].attrs["cell_methods"]} out = compute_forcing(out, band) return(out)
def quantity(x): return x has_pint = False def test_allclose_regression(): x = xr.DataArray(1.01) y = xr.DataArray(1.02) xr.testing.assert_allclose(x, y, atol=0.01) @pytest.mark.parametrize( "obj1,obj2", ( pytest.param(xr.Variable("x", [1e-17, 2]), xr.Variable("x", [0, 3]), id="Variable"), pytest.param( xr.DataArray([1e-17, 2], dims="x"), xr.DataArray([0, 3], dims="x"), id="DataArray", ), pytest.param( xr.Dataset({ "a": ("x", [1e-17, 2]), "b": ("y", [-2e-18, 2]) }), xr.Dataset({ "a": ("x", [0, 2]), "b": ("y", [0, 1])
import pytest import xarray as xr def test_allclose_regression(): x = xr.DataArray(1.01) y = xr.DataArray(1.02) xr.testing.assert_allclose(x, y, atol=0.01) @pytest.mark.parametrize( "obj1,obj2", ( pytest.param( xr.Variable("x", [1e-17, 2]), xr.Variable("x", [0, 3]), id="Variable", ), pytest.param( xr.DataArray([1e-17, 2], dims="x"), xr.DataArray([0, 3], dims="x"), id="DataArray", ), pytest.param( xr.Dataset({"a": ("x", [1e-17, 2]), "b": ("y", [-2e-18, 2])}), xr.Dataset({"a": ("x", [0, 2]), "b": ("y", [0, 1])}), id="Dataset", ), ), ) def test_assert_allclose(obj1, obj2): with pytest.raises(AssertionError):
def __init__(self, filename, fields=[], categories=[], fix_cf=False, mode='r', endian='>', diaginfo_file='', tracerinfo_file='', use_mmap=False, dask_delayed=False): # Track the metadata accompanying this dataset. dir_path = os.path.abspath(os.path.dirname(filename)) if not dir_path: dir_path = os.getcwd() if not tracerinfo_file: tracerinfo_file = os.path.join(dir_path, 'tracerinfo.dat') if not os.path.exists(tracerinfo_file): tracerinfo_file = '' self.tracerinfo_file = tracerinfo_file if not diaginfo_file: diaginfo_file = os.path.join(dir_path, 'diaginfo.dat') if not os.path.exists(diaginfo_file): diaginfo_file = '' self.diaginfo_file = diaginfo_file self.filename = filename self.fsize = os.path.getsize(self.filename) self.mode = mode if not mode.startswith('r'): raise ValueError( "Currently only know how to 'r(b)'ead bpch files.") # Check endianness flag if endian not in ['>', '<', '=']: raise ValueError("Invalid byte order (endian={})".format(endian)) self.endian = endian # Open the raw output file, but don't yet read all the data self._mmap = use_mmap self._dask = dask_delayed self._bpch = BPCHFile(self.filename, self.mode, self.endian, tracerinfo_file=tracerinfo_file, diaginfo_file=diaginfo_file, eager=False, use_mmap=self._mmap, dask_delayed=self._dask) self.fields = fields self.categories = categories # Peek into the raw output file and read the header and metadata # so that we can get a head start at building the output grid self._bpch._read_metadata() self._bpch._read_header() # Parse the binary file and prepare to add variables to the DataStore self._bpch._read_var_data() # Create storage dicts for variables and attributes, to be used later # when xarray needs to access the data self._variables = OrderedDict() self._attributes = OrderedDict() self._attributes.update(self._bpch._attributes) self._dimensions = [d for d in BASE_DIMENSIONS] # Begin constructing the coordinate dimensions shared by the # output dataset variables dim_coords = {} self.ctm_info = CTMGrid.from_model(self._attributes['modelname'], resolution=self._attributes['res']) # Add vertical dimensions self._dimensions.append(dict(dims=[ 'lev', ], attrs={'axis': 'Z'})) self._dimensions.append(dict(dims=[ 'lev_trop', ], attrs={'axis': 'Z'})) self._dimensions.append(dict(dims=[ 'lev_edge', ], attrs={'axis': 'Z'})) eta_centers = self.ctm_info.eta_centers sigma_centers = self.ctm_info.sigma_centers # Add time dimensions self._dimensions.append( dict(dims=[ 'time', ], attrs={ 'axis': 'T', 'long_name': 'time', 'standard_name': 'time' })) # Add lat/lon dimensions self._dimensions.append( dict(dims=[ 'lon', ], attrs={ 'axis': 'X', 'long_name': 'longitude coordinate', 'standard_name': 'longitude' })) self._dimensions.append( dict(dims=[ 'lat', ], attrs={ 'axis': 'y', 'long_name': 'latitude coordinate', 'standard_name': 'latitude' })) if eta_centers is not None: lev_vals = eta_centers lev_attrs = { 'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate', 'axis': 'Z' } else: lev_vals = sigma_centers lev_attrs = { 'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate', 'axis': 'Z' } self._variables['lev'] = xr.Variable([ 'lev', ], lev_vals, lev_attrs) ## Latitude / Longitude # TODO: Add lon/lat bounds # Detect if we're on a nested grid; in that case, we'll have a displaced # origin set in the variable attributes we previously read ref_key = list(self._bpch.var_attrs.keys())[0] ref_attrs = self._bpch.var_attrs[ref_key] self.is_nested = (ref_attrs['origin'] != (1, 1, 1)) lon_centers = self.ctm_info.lon_centers lat_centers = self.ctm_info.lat_centers if self.is_nested: ix, iy, _ = ref_attrs['origin'] nx, ny, *_ = ref_attrs['original_shape'] # Correct i{x,y} for IDL->Python indexing (1-indexed -> 0-indexed) ix -= 1 iy -= 1 lon_centers = lon_centers[ix:ix + nx] lat_centers = lat_centers[iy:iy + ny] self._variables['lon'] = xr.Variable(['lon'], lon_centers, { 'long_name': 'longitude', 'units': 'degrees_east' }) self._variables['lat'] = xr.Variable(['lat'], lat_centers, { 'long_name': 'latitude', 'units': 'degrees_north' }) # TODO: Fix longitudes if ctm_grid.center180 # Add variables from the parsed BPCH file to our DataStore for vname in list(self._bpch.var_data.keys()): var_data = self._bpch.var_data[vname] var_attr = self._bpch.var_attrs[vname] if fields and (var_attr['name'] not in fields): continue if categories and (var_attr['category'] not in categories): continue # Process dimensions dims = [ 'time', 'lon', 'lat', ] dshape = var_attr['original_shape'] if len(dshape) == 3: # Process the vertical coordinate. A few things can happen here: # 1) We have cell-centered values on the "Nlayer" grid; we can take these variables and map them to 'lev' # 2) We have edge value on an "Nlayer" + 1 grid; we can take these and use them with 'lev_edge' # 3) We have troposphere values on "Ntrop"; we can take these and use them with 'lev_trop', but we won't have coordinate information yet # All other cases we do not handle yet; this includes the aircraft emissions and a few other things. Note that tracer sources do not have a vertical coord to worry about! nlev = dshape[-1] grid_nlev = self.ctm_info.Nlayers grid_ntrop = self.ctm_info.Ntrop try: if nlev == grid_nlev: dims.append('lev') elif nlev == grid_nlev + 1: dims.append('lev_edge') elif nlev == grid_ntrop: dims.append('lev_trop') else: continue except AttributeError: warnings.warn("Couldn't resolve grid_spec vertical layout") continue # xarray Variables are thin wrappers for numpy.ndarrays, or really # any object that extends the ndarray interface. A critical part of # the original ndarray interface is that the underlying data has to # be contiguous in memory. We can enforce this to happen by # concatenating each bundle in the variable data bundles we read # from the bpch file data = self._concat([v.data for v in var_data]) # Is the variable time-invariant? If it is, kill the time dim. # Here, we mean it only as one sample in the dataset. if data.shape[0] == 1: dims = dims[1:] data = data.squeeze() # Create a variable containing this data var = xr.Variable(dims, data, var_attr) # Shuffle dims for CF/COARDS compliance if requested # TODO: For this to work, we have to force a load of the data. # Is there a way to re-write BPCHDataProxy so that that's not # necessary? # Actually, we can't even force a load becase var.data is a # numpy.ndarray. Weird. # if fix_dims: # target_dims = [d for d in DIM_ORDER_PRIORITY if d in dims] # var = var.transpose(*target_dims) self._variables[vname] = var # Try to add a time dimension # TODO: Time units? if (len(var_data) > 1) and 'time' not in self._variables: time_bnds = np.asarray([v.time for v in var_data]) times = time_bnds[:, 0] self._variables['time'] = xr.Variable( [ 'time', ], times, { 'bounds': 'time_bnds', 'units': cf.CTM_TIME_UNIT_STR }) self._variables['time_bnds'] = xr.Variable( ['time', 'nv'], time_bnds, {'units': cf.CTM_TIME_UNIT_STR}) self._variables['nv'] = xr.Variable([ 'nv', ], [0, 1])
to_save_ds = xr.Dataset(coords={"time": obs}) for var in dicts.nc_meta.keys(): # v = var f2.create_variable(to_save_ds, var, variables[var]) ### ---------- adding the sonde_id var to the dataset --------- ##### sonde_id = (status_ds.swap_dims({ "sonde_id": "launch_time" }).sel(launch_time=sonde_ds[i].launch_time.values).sonde_id.values) attrs = { "descripion": "unique sonde ID", "long_name": "sonde identifier", "cf_role": "trajectory_id", } sonde_id_var = xr.Variable([], sonde_id, attrs=attrs) to_save_ds["sonde_id"] = sonde_id_var # file name file_name = ( "EUREC4A_JOANNE" # + str(Platform) + "_Dropsonde-RD41_" + str(sonde_id) + "_Level_2" + "_v" + str(joanne.__version__) + ".nc") save_directory = "/Users/geet/Documents/JOANNE/Data/Level_2/" comp = dict( zlib=True, complevel=4, fletcher32=True, _FillValue=np.finfo("float32").max,
def faces_dataset_to_latlon(ds, metric_vector_pairs=[('dxC', 'dyC'), ('dyG', 'dxG')]): """Transform a 13-face LLC xarray Dataset into a rectancular grid, discarding the Arctic. Parameters ---------- ds : xarray.Dataset A 13-face LLC dataset metric_vector_pairs : list, optional Pairs of variables that are positive-definite metrics located at grid edges. Returns ------- out : xarray.Dataset Transformed rectangular dataset """ coord_vars = list(ds.coords) ds_new = _faces_coords_to_latlon(ds) vector_pairs = [] scalars = [] vnames = list(ds.reset_coords().variables) for vname in vnames: try: mate = ds[vname].attrs['mate'] vector_pairs.append((vname, mate)) vnames.remove(mate) except KeyError: pass all_vector_components = [ inner for outer in (vector_pairs + metric_vector_pairs) for inner in outer ] scalars = [vname for vname in vnames if vname not in all_vector_components] data_vars = {} for vname in scalars: if vname == 'face' or vname in ds_new: continue if 'face' in ds[vname].dims: data = _faces_to_latlon_scalar(ds[vname].data) dims = _drop_facedim(ds[vname].dims) else: data = ds[vname].data dims = ds[vname].dims data_vars[vname] = xr.Variable(dims, data, ds[vname].attrs) for vname_u, vname_v in vector_pairs: data_u, data_v = _faces_to_latlon_vector(ds[vname_u].data, ds[vname_v].data) data_vars[vname_u] = xr.Variable(_drop_facedim(ds[vname_u].dims), data_u, ds[vname_u].attrs) data_vars[vname_v] = xr.Variable(_drop_facedim(ds[vname_v].dims), data_v, ds[vname_v].attrs) for vname_u, vname_v in metric_vector_pairs: data_u, data_v = _faces_to_latlon_vector(ds[vname_u].data, ds[vname_v].data, metric=True) data_vars[vname_u] = xr.Variable(_drop_facedim(ds[vname_u].dims), data_u, ds[vname_u].attrs) data_vars[vname_v] = xr.Variable(_drop_facedim(ds[vname_v].dims), data_v, ds[vname_v].attrs) ds_new = ds_new.update(data_vars) ds_new = ds_new.set_coords([c for c in coord_vars if c in ds_new]) return ds_new
def test_diff_array_repr(self): da_a = xr.DataArray( np.array([[1, 2, 3], [4, 5, 6]], dtype="int64"), dims=("x", "y"), coords={ "x": np.array(["a", "b"], dtype="U1"), "y": np.array([1, 2, 3], dtype="int64"), }, attrs={"units": "m", "description": "desc"}, ) da_b = xr.DataArray( np.array([1, 2], dtype="int64"), dims="x", coords={ "x": np.array(["a", "c"], dtype="U1"), "label": ("x", np.array([1, 2], dtype="int64")), }, attrs={"units": "kg"}, ) byteorder = "<" if sys.byteorder == "little" else ">" expected = dedent( """\ Left and right DataArray objects are not identical Differing dimensions: (x: 2, y: 3) != (x: 2) Differing values: L array([[1, 2, 3], [4, 5, 6]], dtype=int64) R array([1, 2], dtype=int64) Differing coordinates: L * x (x) %cU1 'a' 'b' R * x (x) %cU1 'a' 'c' Coordinates only on the left object: * y (y) int64 1 2 3 Coordinates only on the right object: label (x) int64 1 2 Differing attributes: L units: m R units: kg Attributes only on the left object: description: desc""" % (byteorder, byteorder) ) actual = formatting.diff_array_repr(da_a, da_b, "identical") try: assert actual == expected except AssertionError: # depending on platform, dtype may not be shown in numpy array repr assert actual == expected.replace(", dtype=int64", "") va = xr.Variable( "x", np.array([1, 2, 3], dtype="int64"), {"title": "test Variable"} ) vb = xr.Variable(("x", "y"), np.array([[1, 2, 3], [4, 5, 6]], dtype="int64")) expected = dedent( """\ Left and right Variable objects are not equal Differing dimensions: (x: 3) != (x: 2, y: 3) Differing values: L array([1, 2, 3], dtype=int64) R array([[1, 2, 3], [4, 5, 6]], dtype=int64)""" ) actual = formatting.diff_array_repr(va, vb, "equals") try: assert actual == expected except AssertionError: assert actual == expected.replace(", dtype=int64", "")
def write_crs(self, input_crs=None, grid_mapping_name=None, inplace=False): """ Write the CRS to the dataset in a CF compliant manner. Parameters ---------- input_crs: object Anything accepted by `rasterio.crs.CRS.from_user_input`. grid_mapping_name: str, optional Name of the grid_mapping coordinate to store the CRS information in. Default is the grid_mapping name of the dataset. inplace: bool, optional If True, it will write to the existing dataset. Default is False. Returns ------- :obj:`xarray.Dataset` | :obj:`xarray.DataArray`: Modified dataset with CF compliant CRS information. Examples -------- Write the CRS of the current `xarray` object: >>> raster.rio.write_crs("epsg:4326", inplace=True) Write the CRS on a copy: >>> raster = raster.rio.write_crs("epsg:4326") """ if input_crs is not None: data_obj = self.set_crs(input_crs, inplace=inplace) else: data_obj = self._get_obj(inplace=inplace) # get original transform transform = self._cached_transform() # remove old grid maping coordinate if exists grid_mapping_name = (self.grid_mapping if grid_mapping_name is None else grid_mapping_name) try: del data_obj.coords[grid_mapping_name] except KeyError: pass if data_obj.rio.crs is None: raise MissingCRS( "CRS not found. Please set the CRS with 'rio.write_crs()'.") # add grid mapping coordinate data_obj.coords[grid_mapping_name] = xarray.Variable((), 0) if get_option(EXPORT_GRID_MAPPING): grid_map_attrs = pyproj.CRS.from_user_input( data_obj.rio.crs).to_cf() else: grid_map_attrs = {} # spatial_ref is for compatibility with GDAL crs_wkt = data_obj.rio.crs.to_wkt() grid_map_attrs["spatial_ref"] = crs_wkt grid_map_attrs["crs_wkt"] = crs_wkt if transform is not None: grid_map_attrs["GeoTransform"] = " ".join( [str(item) for item in transform.to_gdal()]) data_obj.coords[grid_mapping_name].rio.set_attrs(grid_map_attrs, inplace=True) return data_obj.rio.write_grid_mapping( grid_mapping_name=grid_mapping_name, inplace=True)
def test_CFMaskCoder_decode(): original = xr.Variable(("x",), [0, -1, 1], {"_FillValue": -1}) expected = xr.Variable(("x",), [0, np.nan, 1]) coder = variables.CFMaskCoder() encoded = coder.decode(original) assert_identical(expected, encoded)
def _interpolate_to_raster(cls, variable, biggest_variable): shape = biggest_variable.shape full_size_array = resample_2d(variable.values, shape[1], shape[0]) return xr.Variable(shape, full_size_array)
def test_coder_roundtrip(): original = xr.Variable(("x",), [0.0, np.nan, 1.0]) coder = variables.CFMaskCoder() roundtripped = coder.decode(coder.encode(original)) assert_identical(original, roundtripped)
def compute(self, data, selected_indexes): observed = data.time.values[selected_indexes] observed_date = xarray.Variable(('y', 'x'), datetime64_to_inttime(observed)) return self._var_name, observed_date
def load_single(file, drop_ghost=True, use_dask=True, var_list="all", ini_file=None): """Load a single step file and generate an xarray Dataset Parameters ---------- file : str or Path Location of the file to load drop_ghost : bool, optional Drop all of the ghost cells, by default True var_list : List, optional Load only a specific set of variables, by default 'all' Returns ------- xarray Dataset """ if var_list == "all": var_list = [ "density", "pressure", "sound_speed", "x_velocity", "y_velocity", "ghost_cell", "deposited_energy", "deposited_power", ] data_vars = {} space_dims = ("i", "j") if not file.endswith(".h5"): raise Exception("Step files must be .h5 files") h5 = h5py.File(file, "r") for v in var_list: try: h5[f"/{v}"].shape except KeyError: continue if use_dask: chunk_size = h5[f"/{v}"].shape array = da.from_array(h5[f"/{v}"], chunks=chunk_size) array = da.transpose(array) else: array = h5[f"/{v}"][()].T.astype(np.float32) try: long_name = var_dict[v]["long_name"] except Exception: long_name = "" try: description = h5[f"/{v}"].attrs["description"].decode("utf-8") except Exception: description = "" try: standard_name = var_dict[v]["standard_name"] except Exception: standard_name = "" try: units = h5[f"/{v}"].attrs["units"].decode("utf-8") except Exception: units = "" data_vars[f"{v}"] = xr.Variable( space_dims, array, attrs={ "units": units, "description": description, "long_name": long_name, "standard_name": standard_name, }, ) x = h5[f"/x"][()].T.astype(np.float32) x_units = h5[f"/x"].attrs["units"].decode("utf-8") y = h5[f"/y"][()].T.astype(np.float32) # Get the cell centers dy = (np.diff(x[0, :]) / 2.0)[0] dx = (np.diff(y[:, 0]) / 2.0)[0] # cell center locations xc = x[:-1, 0] + dx yc = y[0, :-1] + dy coords = { "time": h5[f"/time"][()].astype(np.float32), "x": (["i"], xc), "y": (["j"], yc), } time_units = h5[f"/time"].attrs["units"].decode("utf-8") # Get the details about the CATO build info_attr = {} info = [ "build_type", "compile_hostname", "compile_os", "compiler_flags", "compiler_version", "git_changes", "git_hash", "git_ref", "version", ] for v in info: try: info_attr[v] = h5["/cato_info"].attrs[f"{v}"].decode("utf-8") except Exception: pass attr_dict = info_attr attr_dict["time_units"] = time_units attr_dict["space_units"] = x_units if ini_file: input_dict = read_ini(ini_file) attr_dict.update(input_dict) ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attr_dict) if ini_file: try: ds.attrs["title"] = ds.attrs["general_title"] except Exception: pass if drop_ghost: try: ds = ds.where(ds["ghost_cell"] == 0, drop=True) return ds.drop("ghost_cell") except KeyError: return ds else: return ds
for f in filenames_albedo: tif_file = rasterio.open(tif_dir + f) out_name = tmp_dir + "/" + Path(f).stem + ".tif" modis_functions.tif_clip(tif_file, tile_shp, out_name) for f in filenames_qc: tif_file = rasterio.open(tif_dir + f) out_name = tmp_dir + "/" + Path(f).stem + ".tif" modis_functions.tif_clip(tif_file, tile_shp, out_name) # Definition of the flags and criteria for choosing them is presented in: # https://daac.ornl.gov/ABOVE/guides/Albedo_Boreal_North_America.html # https://doi.org/10.1111/gcb.14888 summer_flag = [0, 1, 2, 4, 5, 6, 16, 17, 18, 20, 21, 22] winter_flag = [0, 1, 2, 3, 4, 5, 6, 7, 15, 16, 17, 18, 19, 20, 21, 22, 23] date_xr = xr.Variable("time", date) a = xr.open_rasterio(tmp_dir + "/" + filenames_albedo[0]) chunks = {"x": int(a.sizes["x"]), "y": int(a.sizes["x"]), "band": 1} da_albedo_init = xr.open_rasterio(tmp_dir + "/" + filenames_albedo[0], chunks=chunks) da_qc_init = xr.open_rasterio(tmp_dir + "/" + filenames_qc[0], chunks=chunks) if 5 <= date[0].month <= 9: # Summer months da_albedo_init = da_albedo_init.where(da_qc_init.isin(summer_flag)) else: # Winter months da_albedo_init = da_albedo_init.where(da_qc_init.isin(winter_flag)) ds_init = da_albedo_init.to_dataset(name="Albedo") ds_init = ds_init.assign_coords({"time": date_xr[0]})
def __init__(self, data_dir, grid_dir=None, iternum=None, delta_t=1, read_grid=True, file_prefixes=None, ref_date=None, calendar=None, geometry='sphericalpolar', endian='>', ignore_unknown_vars=False, default_dtype=np.dtype('f4'), nx=None, ny=None, nz=None, llc_method="smallchunks"): """ This is not a user-facing class. See open_mdsdataset for argument documentation. The only ones which are distinct are. Parameters ---------- iternum : int, optional The iteration timestep number to read. file_prefixes : list The prefixes of the data files to be read. """ self.geometry = geometry.lower() allowed_geometries = [ 'cartesian', 'sphericalpolar', 'llc', 'curvilinear' ] if self.geometry not in allowed_geometries: raise ValueError('Unexpected value for parameter `geometry`. ' 'It must be one of the following: %s' % allowed_geometries) # the directory where the files live self.data_dir = data_dir self.grid_dir = grid_dir if (grid_dir is not None) else data_dir self._ignore_unknown_vars = ignore_unknown_vars # The endianness of the files # By default, MITgcm does big endian if endian not in ['>', '<', '=']: raise ValueError("Invalid byte order (endian=%s)" % endian) self.endian = endian if default_dtype is not None: self.default_dtype = np.dtype(default_dtype).newbyteorder(endian) else: self.default_dtype = default_dtype # storage dicts for variables and attributes self._variables = xr.core.pycompat.OrderedDict() self._attributes = xr.core.pycompat.OrderedDict() self._dimensions = [] # the dimensions are theoretically the same for all datasets [self._dimensions.append(k) for k in dimensions] self.llc = (self.geometry == 'llc') # TODO: and maybe here a check for the presence of layers? # we don't need to know ny if using llc if self.llc and (nx is not None): ny = nx # Now we need to figure out the dimensions of the numerical domain, # nx, ny, nz # nface is the number of llc faces if (nz is not None) and (ny is not None) and (nz is not None): # we have been passed enough information to determine the # dimensions without reading any files self.nz, self.ny, self.nx = nz, ny, nx self.nface = LLC_NUM_FACES if self.llc else None else: # have to peek at the grid file metadata self.nz, self.nface, self.ny, self.nx = (_guess_model_dimensions( self.grid_dir, self.llc)) self.layers = _guess_layers(data_dir) if self.llc: nyraw = self.nx * self.nface else: nyraw = self.ny self.default_shape_3D = (self.nz, nyraw, self.nx) self.default_shape_2D = (nyraw, self.nx) self.llc_method = llc_method # Now set up the corresponding coordinates. # Rather than assuming the dimension names, we use Comodo conventions # to parse the dimension metdata. # http://pycomodo.forge.imag.fr/norm.html irange = np.arange(self.nx) jrange = np.arange(self.ny) krange = np.arange(self.nz) krange_p1 = np.arange(self.nz + 1) # the keys are `standard_name` attribute dimension_data = { "x_grid_index": irange, "x_grid_index_at_u_location": irange, "x_grid_index_at_f_location": irange, "y_grid_index": jrange, "y_grid_index_at_v_location": jrange, "y_grid_index_at_f_location": jrange, "z_grid_index": krange, "z_grid_index_at_lower_w_location": krange, "z_grid_index_at_upper_w_location": krange, "z_grid_index_at_w_location": krange_p1, } for dim in self._dimensions: dim_meta = dimensions[dim] dims = dim_meta['dims'] attrs = dim_meta['attrs'] data = dimension_data[attrs['standard_name']] dim_variable = xr.Variable(dims, data, attrs) self._variables[dim] = dim_variable # possibly add the llc dimension # seems sloppy to hard code this here # TODO: move this metadata to variables.py if self.llc: self._dimensions.append(LLC_FACE_DIMNAME) data = np.arange(self.nface) attrs = {'standard_name': 'face_index'} dims = [LLC_FACE_DIMNAME] self._variables[LLC_FACE_DIMNAME] = xr.Variable(dims, data, attrs) # do the same for layers for layer_name, n_layer in self.layers.items(): for suffix, offset in zip(['bounds', 'center', 'interface'], [0, -1, -2]): # e.g. "layer_1RHO_bounds" # dimname = 'layer_' + layer_name + '_' + suffix # e.g. "l1_b" dimname = 'l' + layer_name[0] + '_' + suffix[0] self._dimensions.append(dimname) data = np.arange(n_layer + offset) # we should figure out a way to properly populate the layers # attributes attrs = { 'standard_name': layer_name + '_layer_grid_index_at_layer_' + suffix, 'swap_dim': 'layer_' + layer_name + '_' + suffix } dim_variable = xr.Variable([dimname], data, attrs) self._variables[dimname] = dim_variable # maybe add a time dimension if iternum is not None: self.time_dim_name = 'time' self._dimensions.append(self.time_dim_name) # a variable for iteration number self._variables['iter'] = xr.Variable( (self.time_dim_name, ), [iternum], { 'standard_name': 'timestep', 'long_name': 'model timestep number' }) self._variables[ self.time_dim_name] = _iternum_to_datetime_variable( iternum, delta_t, ref_date, calendar, self.time_dim_name) # build lookup tables for variable metadata self._all_grid_variables = _get_all_grid_variables( self.geometry, self.layers) self._all_data_variables = _get_all_data_variables( self.data_dir, self.layers) # The rest of the data has to be read from disk. # The list `prefixes` specifies file prefixes from which to infer # The problem with this is that some prefixes are single variables # while some are multi-variable diagnostics files. prefixes = [] if read_grid: prefixes = prefixes + list(self._all_grid_variables.keys()) # add data files prefixes = ( prefixes + _get_all_matching_prefixes(data_dir, iternum, file_prefixes)) for p in prefixes: # use a generator to loop through the variables in each file for (vname, dims, data, attrs) in self.load_from_prefix(p, iternum): # print(vname, dims, data.shape) #Sizes of grid variables can vary between mitgcm versions. Check for #such inconsistency and correct if so (vname, dims, data, attrs) = self.fix_inconsistent_variables( vname, dims, data, attrs) thisvar = xr.Variable(dims, data, attrs) self._variables[vname] = thisvar
def driver(args): gridpath = './script_files/' #args.gridpath #'/glade/work/gmarques/cesm/mom6_input_files/tx0.66v1/salinity_restoring' if not os.path.isdir(gridpath): print('Creating a directory to place SCRIP files: {} ... \n'.format(gridpath)) os.system('mkdir '+gridpath) esmlab.config.set({'regrid.gridfile-directory': gridpath}) # src and dst grids if args.src_grid_name == 'WOA_01': src_grid_name = 'WOA_01_SCRIP' os.system('ln -s /glade/work/gmarques/cesm/datasets/WOA18/WOA_01_SCRIP.nc '+gridpath) elif args.src_grid_name == 'WOA_04': src_grid_name = 'WOA_04_SCRIP' os.system('ln -s /glade/work/gmarques/cesm/datasets/WOA18/WOA_04_SCRIP.nc '+gridpath) else: raise ValueError('The source grid name provided, {}, is not supported. Please use WOA_01 or WOA_04.'.format(args.src_grid_name)) if args.dst_grid_name == 'tx0.66v1': dst_grid_name = 'tx0.66v1_SCRIP_190314' os.system('ln -s /glade/work/altuntas/mom.input/tx0.66v1/gen_grid_190314/tx0.66v1_SCRIP_190314.nc '+gridpath) # prototype for the restoring file for the tx0.66v1 grid ds_out = xr.open_dataset('/glade/p/cesmdata/cseg/inputdata/ocn/mom/tx0.66v1/salt_restore_tx0.66v1_180828.nc', decode_times=False) ds_out['theta0'] = xr.Variable(dims=('TIME','LAT','LON'), data = np.zeros(ds_out.salt.shape)) elif args.dst_grid_name == 'tx0.1v3': dst_grid_name = 'tx0.1v3_SCRIP_200721' os.system('ln -s /glade/work/gmarques/Projects/MOM_tx0.1_v3/tx0.1v3_SCRIP_200721.nc '+gridpath) # prototype for the restoring file for the tx0.66v1 grid ds_out = xr.open_dataset('/glade/work/gmarques/Projects/MOM_tx0.1_v3/salt_restore_tx0.1v3_200807.nc', decode_times=False) ds_out['theta0'] = xr.Variable(dims=('TIME','LAT','LON'), data = np.zeros(ds_out.salt.shape)) else: raise ValueError('The destination grid name provided, {}, is not supported. Only tx0.66v1 is supported at this point. '.format(args.dst_grid_name)) # generate weights R_bilinear = esmlab_regrid.regridder(name_grid_src=src_grid_name, name_grid_dst=dst_grid_name, method='bilinear', overwrite_existing=True) ########################################################################### # WOA salinity file with land fill, created using create_filled_sfc.py woa = xr.open_dataset(args.infile, decode_times=False) # average between two-layers (depth = 0 and depth = 10, depth indices 0 and 2) woa_s_an_surface_ave = woa.s_an.isel(depth=[0,2]).mean('depth') woa_theta0_surface_ave = woa.theta0.isel(depth=[0,2]).mean('depth') # regrid and compare against original for m in range(len(woa.time)): ds_out.salt[m,:] = R_bilinear(woa_s_an_surface_ave[m,:]).rename({'lat':'LAT', 'lon':'LON'}) ds_out.theta0[m,:] = R_bilinear(woa_theta0_surface_ave[m,:]).rename({'lat':'LAT', 'lon':'LON'}) ########################################################################### # Global attrs ds_out.attrs['title'] = 'surface salinity and potential temperature from WOA filled over continents' ds_out.attrs['src_file'] = args.infile ds_out.attrs['src_grid_name'] = args.src_grid_name ds_out.attrs['dst_grid_name'] = args.dst_grid_name ds_out.attrs['author'] = args.author ds_out.attrs['date'] = datetime.now().isoformat() ds_out.attrs['created_using'] = os.path.basename(__file__) + ' -path_out ' + args.path_out + ' -author ' + \ args.author + ' -infile ' + args.infile + ' -src_grid_name ' + args.src_grid_name + \ ' -dst_grid_name ' + args.dst_grid_name ds_out.attrs['url'] = os.path.basename(__file__) + ' can be found at https://github.com/NCAR/WOA_MOM6' ds_out.attrs['git_hash'] = str(subprocess.check_output(["git", "describe","--always"]).strip()) # save fname = 'state_restore_{}_{}{}{}.nc'.format(args.dst_grid_name, datetime.now().isoformat()[0:4],datetime.now().isoformat()[5:7], datetime.now().isoformat()[8:10]) ds_out.to_netcdf(args.path_out+fname) print('Done!') return
plev_xr = data['plev'] lat_xr = data['lat'] k_xr = data['k'] plev = plev_xr.values lat = lat_xr.values k = k_xr.values f = data['f'].values # Transpose # NOTE: This is to speed things up when looping over the array data = data.transpose('lat', 'plev', 'k', 'f') # Output dataset cph_q = np.arange(-50, 50.01, 1.0) # try to reduce the resolution! cph_xr = xr.Variable(('c', ), cph_q, { 'long_name': 'phase speed', 'units': 'm/s' }) data_c = xr.Dataset({}, coords={ 'lat': lat_xr, 'plev': plev_xr, 'k': k_xr, 'c': cph_xr }) # Iterate over names, lattiudes, longitudes # Problem is that 'phase speed' is actually a line through the wavenumber, # frequency, and latitude dimensions! # As suggested by Randel and Held, *interpolate* from omega by k # to phase speed by k (basically interpolates to diagonal lines in # an omega by k plot, since cph == omega/k). See:
def make_xrvar(dim_lengths): return xr.Variable(tuple(dim_lengths.keys()), make_sparray(shape=tuple(dim_lengths.values())))
def get_dataset(self, varnames=None, iter_start=None, iter_stop=None, iter_step=None, iters=None, k_levels=None, k_chunksize=1, type='faces', read_grid=True, grid_vars_to_coords=True): """ Create an xarray Dataset object for this model. Parameters ---------- *varnames : list of strings, optional The variables to include, e.g. ``['Salt', 'Theta']``. Otherwise include all known variables. iter_start : int, optional Starting iteration number. Otherwise use model default. Follows standard `range` conventions. (inclusive) iter_stop : int, optional Stopping iteration number. Otherwise use model default. Follows standard `range` conventions. (exclusive) iter_step : int, optional Iteration number stepsize. Otherwise use model default. iters : list of ints, optional Specific iteration numbers in a list, possibly with nonuniform spacing. Either provide this or the iter parameters above. k_levels : list of ints, optional Vertical levels to extract. Default is to get them all k_chunksize : int, optional How many vertical levels per Dask chunk. type : {'faces', 'latlon'}, optional What type of dataset to create read_grid : bool, optional Whether to read the grid info grid_vars_to_coords : bool, optional Whether to promote grid variables to coordinate status Returns ------- ds : xarray.Dataset """ def _if_not_none(a, b): if a is None: return b else: return a user_iter_params = [iter_start, iter_stop, iter_step] attribute_iter_params = [ self.iter_start, self.iter_stop, self.iter_step ] # If the user has specified some iter params: if any([a is not None for a in user_iter_params]): # If iters is also set we have a problem if iters is not None: raise ValueError( "Only `iters` or the parameters `iter_start`, `iters_stop`, " "and `iter_step` can be provided. Both were provided") # Otherwise we can override any missing values iter_start = _if_not_none(iter_start, self.iter_start) iter_stop = _if_not_none(iter_stop, self.iter_stop) iter_step = _if_not_none(iter_step, self.iter_step) iter_params = [iter_start, iter_stop, iter_step] if any([a is None for a in iter_params]): raise ValueError( "The parameters `iter_start`, `iter_stop`, " "and `iter_step` must be defined either by the " "model class or as argument. Instead got %r " % iter_params) # Otherwise try loading from the user set iters elif iters is not None: pass # Now have a go at using the attribute derived iteration parameters elif all([a is not None for a in attribute_iter_params]): iter_params = attribute_iter_params # Now try using the attribute derived iters elif self.iters is not None: iters = self.iters # Now give up else: raise ValueError( "The parameters `iter_start`, `iter_stop`, " "and `iter_step`, or `iters` must be defined either by the " "model class or as argument") # Check the iter_start and iter_step if iters is None: self._check_iter_start(iter_params[0]) self._check_iter_step(iter_params[2]) iters = np.arange(*iter_params) else: self._check_iters(iters) iters = np.array(iters) varnames = varnames or self.varnames # grid stuff read_grid = read_grid and len(self.grid_varnames) != 0 if read_grid and self.store.grid_path is None: raise TypeError( 'Cannot read grid if grid_path is not specified in filestore (e.g. llcreader.known_models)' ) grid_vars_to_coords = grid_vars_to_coords and read_grid grid_varnames = self.grid_varnames if read_grid else [] ds = self._make_coords_faces(iters) if type == 'latlon': if self.domain == 'aste': raise TypeError( 'Swapping to lat/lon not available for ASTE. Must regrid or interpolate.' ) ds = _faces_coords_to_latlon(ds) k_levels = k_levels or list(range(self.nz)) kp1_levels = self._get_kp1_levels(k_levels) ds = ds.sel(k=k_levels, k_l=k_levels, k_u=k_levels, k_p1=kp1_levels) # get the data in facet form data_facets = { vname: self._get_facet_data(vname, iters, k_levels, k_chunksize) for vname in varnames } # get the grid in facet form # do separately for vertical coords on kp1_levels grid_facets = {} for vname in grid_varnames: my_k_levels = k_levels if _VAR_METADATA[vname]['dims'] != [ 'k_p1' ] else kp1_levels grid_facets[vname] = self._get_facet_data(vname, None, my_k_levels, k_chunksize) # transform it into faces or latlon data_transformers = { 'faces': _all_facets_to_faces, 'latlon': _all_facets_to_latlon } transformer = data_transformers[type] data = transformer(data_facets, _VAR_METADATA, self.nface) # separate horizontal and vertical grid variables hgrid_facets = { key: grid_facets[key] for key in grid_varnames if not _is_vgrid(key) } vgrid_facets = { key: grid_facets[key] for key in grid_varnames if _is_vgrid(key) } # do not transform vertical grid variables data.update(transformer(hgrid_facets, _VAR_METADATA, self.nface)) data.update(vgrid_facets) variables = {} gridlist = ['Zl', 'Zu'] if read_grid else [] for vname in varnames + grid_varnames: meta = _VAR_METADATA[vname] dims = meta['dims'] if type == 'faces': dims = _add_face_to_dims(dims) dims = [ 'time', ] + dims if vname not in grid_varnames else dims attrs = meta['attrs'] # Handle grid names different from filenames fname = vname vname = meta['real_name'] if 'real_name' in meta else vname if fname in grid_varnames: gridlist.append(vname) variables[vname] = xr.Variable(dims, data[fname], attrs) # handle vertical coordinate after the fact if read_grid and 'RF' in grid_varnames: ki = np.array([list(kp1_levels).index(x) for x in k_levels]) for zv, sl in zip(['Zl', 'Zu'], [ki, ki + 1]): variables[zv] = xr.Variable(_VAR_METADATA[zv]['dims'], data['RF'][sl], _VAR_METADATA[zv]['attrs']) ds = ds.update(variables) if grid_vars_to_coords: ds = ds.set_coords(gridlist) return ds