def test_gradient_xarray_implicit_axes(test_da_xy): """Test the 2D gradient calculation with a 2D DataArray and no axes specified.""" data = test_da_xy.isel(time=0, isobaric=2) deriv_y, deriv_x = gradient(data) truth_x = xr.full_like(data, -6.993007e-07) truth_x.attrs['units'] = 'kelvin / meter' truth_y = xr.full_like(data, -2.797203e-06) truth_y.attrs['units'] = 'kelvin / meter' xr.testing.assert_allclose(deriv_x, truth_x) assert deriv_x.metpy.units == truth_x.metpy.units xr.testing.assert_allclose(deriv_y, truth_y) assert deriv_y.metpy.units == truth_y.metpy.units
def test_first_derivative_xarray_time_and_default_axis(test_da_xy): """Test first derivative with an xarray.DataArray over time as default first dimension.""" deriv = first_derivative(test_da_xy) truth = xr.full_like(test_da_xy, -0.000777000777) truth.attrs['units'] = 'kelvin / second' xr.testing.assert_allclose(deriv, truth) assert deriv.metpy.units == truth.metpy.units
def test_gradient_xarray(test_da_xy): """Test the 3D gradient calculation with a 4D DataArray in each axis usage.""" deriv_x, deriv_y, deriv_p = gradient(test_da_xy, axes=('x', 'y', 'isobaric')) deriv_x_alt1, deriv_y_alt1, deriv_p_alt1 = gradient(test_da_xy, axes=('x', 'y', 'vertical')) deriv_x_alt2, deriv_y_alt2, deriv_p_alt2 = gradient(test_da_xy, axes=(3, 2, 1)) truth_x = xr.full_like(test_da_xy, -6.993007e-07) truth_x.attrs['units'] = 'kelvin / meter' truth_y = xr.full_like(test_da_xy, -2.797203e-06) truth_y.attrs['units'] = 'kelvin / meter' partial = xr.DataArray( np.array([0.04129204, 0.03330003, 0.02264402]), coords=(('isobaric', test_da_xy['isobaric']),) ) _, truth_p = xr.broadcast(test_da_xy, partial) truth_p.coords['crs'] = test_da_xy['crs'] truth_p.attrs['units'] = 'kelvin / hectopascal' # Assert results match expectations xr.testing.assert_allclose(deriv_x, truth_x) assert deriv_x.metpy.units == truth_x.metpy.units xr.testing.assert_allclose(deriv_y, truth_y) assert deriv_y.metpy.units == truth_y.metpy.units xr.testing.assert_allclose(deriv_p, truth_p) assert deriv_p.metpy.units == truth_p.metpy.units # Assert alternative specifications give same results xr.testing.assert_identical(deriv_x_alt1, deriv_x) xr.testing.assert_identical(deriv_y_alt1, deriv_y) xr.testing.assert_identical(deriv_p_alt1, deriv_p) xr.testing.assert_identical(deriv_x_alt2, deriv_x) xr.testing.assert_identical(deriv_y_alt2, deriv_y) xr.testing.assert_identical(deriv_p_alt2, deriv_p)
def tx90p(tasmax, t90, freq="YS"): r"""Number of days with daily maximum temperature over the 90th percentile. Number of days with daily maximum temperature over the 90th percentile. Parameters ---------- tasmax : xarray.DataArray Maximum daily temperature [℃] or [K] t90 : xarray.DataArray 90th percentile of daily maximum temperature [℃] or [K] freq : str, optional Resampling frequency Returns ------- xarray.DataArray Count of days with daily maximum temperature below the 10th percentile [days] Notes ----- The 90th percentile should be computed for a 5 day window centered on each calendar day for a reference period. Example ------- >>> t90 = percentile_doy(historical_tas, per=0.9) >>> hot_days = tg90p(tas, t90) """ if "dayofyear" not in t90.coords.keys(): raise AttributeError("t10 should have dayofyear coordinates.") t90 = utils.convert_units_to(t90, tasmax) # adjustment of t90 to tas doy range t90 = utils.adjust_doy_calendar(t90, tasmax) # create array of percentile with tas shape and coords thresh = xr.full_like(tasmax, np.nan) doy = thresh.time.dt.dayofyear.values thresh.data = t90.sel(dayofyear=doy) # compute the cold days over = tasmax > thresh return over.resample(time=freq).sum(dim="time")
def get_dataset(self, **kwargs): """ get a dataset with the goods Parameters ---------- kwargs : dict, optional passed to xmitgcm.open_mdsdataset Returns ------- ds : xarray.Dataset with cost, ctrl, and sensitivity fields by iteration """ cost = self.read_cost_function() ctrl = self.read_the_xx(**kwargs) both = xr.merge([cost, ctrl]) # flag the Newton's _, simuls = self.read_m1qn3() both['simulIsIter'] = xr.full_like(both.simul, True, dtype=bool) for mysimul in both.simul.values: if mysimul not in simuls: both['simulIsIter'].loc[{'simul': mysimul}] = False iters = [] i = 0 for k in both.simul.values: if both.simulIsIter.sel(simul=k).values: myiter = i i += 1 else: myiter = np.nan iters.append(myiter) both['iters'] = xr.DataArray(np.asarray(iters), coords=both.simul.coords, dims=both.simul.dims) both = both.set_coords('iters') # cleanup both['simulIsIter'] = xr.where(np.isnan(both['simulIsIter']), False, both['simulIsIter']).astype(bool) both['iters'] = both.iters.where(both.simulIsIter) both['simulAtMaxIter'] = both.simul.where( both.simulIsIter).max('simul') return both
def tn10p(tasmin, t10, freq='YS'): r"""Number of days with daily minimum temperature below the 10th percentile. Number of days with daily minimum temperature below the 10th percentile. Parameters ---------- tasmin : xarray.DataArray Mean daily temperature [℃] or [K] t10 : xarray.DataArray 10th percentile of daily minimum temperature [℃] or [K] freq : str, optional Resampling frequency Returns ------- xarray.DataArray Count of days with daily minimum temperature below the 10th percentile [days] Notes ----- The 10th percentile should be computed for a 5 day window centered on each calendar day for a reference period. Example ------- >>> t10 = percentile_doy(historical_tas, per=0.1) >>> cold_days = tg10p(tas, t10) """ if 'dayofyear' not in t10.coords.keys(): raise AttributeError("t10 should have dayofyear coordinates.") t10 = utils.convert_units_to(t10, tasmin) # adjustment of t10 to tas doy range t10 = utils.adjust_doy_calendar(t10, tasmin) # create array of percentile with tas shape and coords thresh = xr.full_like(tasmin, np.nan) doy = thresh.time.dt.dayofyear.values thresh.data = t10.sel(dayofyear=doy) # compute the cold days below = (tasmin < thresh) return below.resample(time=freq).sum(dim='time')
def rank_variable(array): """Rank an array assigning values 1,2,3 ... to array elements going from biggest to smallest. Parameters ---------- array: xarray DataArray input variable Returns ------- rank: a rank order array """ rank_values = len(array) - array.values.argsort().argsort() rank = xr.full_like(array, np.nan) rank[:] = rank_values return rank
def reduce_chunked(self, xs, output): """Computes the maximum across a chunk Parameters ---------- xs : iterable Iterable of sources Returns ------- UnitsDataArray Maximum of the source data over dims """ # note: np.fmax ignores NaNs, np.maximum propagates NaNs y = xr.full_like(output, np.nan) for x in xs: y = np.fmax(y, x.max(dim=self._dims)) return y
def full_like(other, fill_value=np.nan, add_coords={}, replace_vars=[]): data = xr.full_like(other, fill_value) if isinstance(data, xr.DataArray): data = data.to_dataset() for k, v in add_coords.items(): data[k] = v data[k].assign_coords() if replace_vars: data = data.drop(data.data_vars.keys()) dims = data.dims.keys() shape = tuple(data.dims.values()) empty = np.zeros(shape=shape) empty[empty == 0] = fill_value empty = xr.DataArray(empty, coords=data.coords, dims=data.dims) for var in replace_vars: data[var] = empty.copy() return data
def align_concat_fill(*args, **kwargs): # # Put Datasets together # import numpy as np import xarray as xr xr.set_options(keep_attrs=True) # # align dimensions # args = xr.align(*args, join='outer', copy=False) message("Alignment completed", **kwargs) # # align datavars # dummy = xr.Dataset() for iset in args: ivars = list(iset.data_vars) for ivar in ivars: if ivar not in dummy.data_vars: dummy[ivar] = xr.full_like(iset[ivar], np.nan) # copies attributes as well message("Initialization completed", dummy, **kwargs) # # add missing variables # for iset in args: for ivar in list(dummy.data_vars): if ivar not in iset.data_vars: iset[ivar] = dummy[ivar] # # concat # out = xr.concat(args, **kwargs) message("Concatenation completed", **kwargs) # # restore some Attributes, as they are removed by concat 'equal' # for ivar in out.data_vars: out[ivar].attrs.update(dict(dummy[ivar].attrs)) return out
def time_discretization(model, max_perlen, endtime, starttime=None, n_timesteps_p1=1, n_timesteps_rest=1, **kwargs): """ Collect all unique times and subdivide. Adapted from the function in imod/wq/model.py """ model.use_cftime = model._use_cftime() times = [] for pkg in model.values(): if "time" in pkg.coords: times.append(pkg["time"].values) # TODO: check that endtime is later than all other times. times.append(imod.wq.timeutil.to_datetime(endtime, model.use_cftime)) if starttime is not None: times.append(imod.wq.timeutil.to_datetime(starttime, model.use_cftime)) # np.unique also sorts times = np.unique(np.hstack(times)) duration = imod.wq.timeutil.timestep_duration(times, use_cftime=True) # Update times, ensuring that max_perlen is not exceeded. nper_extra = [int(d / (max_perlen * 365.25)) for d in duration] nu_times = add_timesteps(max_perlen, times, nper_extra) nu_duration = imod.wq.timeutil.timestep_duration(nu_times, use_cftime=True) # Generate time discretization, just rely on default arguments # Probably won't be used that much anyway? timestep_duration = xr.DataArray(nu_duration, coords={"time": np.array(nu_times)[:-1]}, dims=("time", )) n_timesteps = xr.full_like(timestep_duration, n_timesteps_rest).astype(np.int64) n_timesteps[0] = n_timesteps_p1 model["time_discretization"] = imod.wq.TimeDiscretization( timestep_duration=timestep_duration, n_timesteps=n_timesteps, **kwargs)
def add_empty_first_date_array(ds, time_slice, first_query_date): def create_new_date_string(first_query_date, time_slice): yr = first_query_date.split('-')[0] mt = '{:02d}'.format(time_slice) dy = first_query_date.split('-')[2] return yr + '-' + mt + '-' + dy # tell user print( 'Inserting empty first date, if required, to improve resampling. Please wait.' ) # for specific month or season, correct month if time_slice == 'q1': first_query_date = np.datetime64( create_new_date_string(first_query_date, 1)) elif time_slice == 'q2': first_query_date = np.datetime64( create_new_date_string(first_query_date, 4)) elif time_slice == 'q3': first_query_date = np.datetime64( create_new_date_string(first_query_date, 7)) elif time_slice == 'q4': first_query_date = np.datetime64( create_new_date_string(first_query_date, 10)) elif isinstance(time_slice, int): first_query_date = np.datetime64( create_new_date_string(first_query_date, time_slice)) else: first_query_date = np.datetime64(first_query_date) # check if already exists if ds['time'].isel(time=0) == first_query_date: return ds # generate empty and modify date empty_date = xr.full_like(ds.isel(time=0), fill_value=np.nan) empty_date['time'] = first_query_date # combine and sort ds = xr.concat([ds, empty_date], dim='time').sortby('time') return ds
def met_data(self): if self._met_data is None: if self.domain is None: self._domain = io.read_domain(self.params).isel( **self._domain_slice) self._met_data = io.read_met_data(self.params, self._domain) self._met_data['elev'] = self.domain['elev'] self._met_data['lat'] = self.domain['lat'] self._met_data['lon'] = self.domain['lon'] # process constant_vars constant_vars = self.params.get('constant_vars', None) if constant_vars: da_template = self._met_data[list(self._met_data)[0]] for var in constant_vars.keys(): self._met_data[var] = xr.full_like(da_template, float(constant_vars[var])) self._validate_force_times(force_times=self._met_data['time']) return self._met_data
def get_variable(self, run: str, varname: Hashable) -> xr.DataArray: """Query a collection of diagnostics for a given run and variable Args: diagnostics: list of xarray datasets, each with a "run" attribute varname: variable to exctract from the expected run Returns: varname of run if present, otherwise nans with the expected metadata """ if varname in self._varnames[run]: return self._get_run(run)[varname] else: for run in self._varnames: if varname in self._varnames[run]: template = self._get_run(run)[varname] return xr.full_like(template, np.nan) raise ValueError(f"{varname} not found.")
def map_property_onto_objects(objects, object_file, property): base_name, objects_mask = object_file.split('.objects.') object_properties = genesis.objects.get_data(base_name, mask_identifier=objects_mask) object_property = object_properties[property] N_objects = len(object_properties.object_id) properties_mapped = xr.full_like(objects, fill_value=np.nan, dtype=object_property.dtype) properties_mapped.attrs.update(object_property.attrs) properties_mapped.name = object_property.name print("Mapping {} onto {} objects...".format(property, N_objects)) for object_id in tqdm.tqdm(object_properties.object_id): if object_id == 0: continue v = object_property.sel(object_id=object_id).values properties_mapped = properties_mapped.where(objects != object_id, other=v) return properties_mapped
def run(self): if self.profiles_data is None: self.prepare_data() self.main_flag = xr.full_like(self.profiles_data['Temperature'], 2, dtype='i8') self.main_flag.name = "Main flag" indx_fail = np.array([], dtype='i8') indx_suspect = np.array([], dtype='i8') indx_pass = np.array([], dtype='i8') for test in self.qctests: self.qctests[test].apply(self.profiles_data) flag = self.qctests[test].get_flag() self.flag_dict[test] = flag indx_fail = np.append(indx_fail, np.argwhere(flag.data == 4)) indx_suspect = np.append(indx_suspect, np.argwhere(flag.data == 3)) indx_pass = np.append(indx_pass, np.argwhere(flag.data == 1)) self.main_flag.data[np.unique(indx_pass)] = 1 self.main_flag.data[np.unique(indx_suspect)] = 3 self.main_flag.data[np.unique(indx_fail)] = 4
def get_area_domain(dset, directory='./', write=0, radius=6378.137): import xarray as xr import numpy as np from math import pi, cos """Input is a VOLCAT dataset. Use after volcat.open_dataset(fname). Provide directory for where area netcdf should be located. Default is cwd. Default to NOT write area array to netcdf file. Output is xarray of same size with corresponding area (km2) of each grid cell. Converts degrees to meters using a radius of 6378.137 km.""" d2r = pi / 180.0 #convert degrees to radians d2km = radius * d2r #convert degree latitude to kilometers. ash_mass = dset.ash_mass #Pulls out ash mass array ash_mass = ash_mass[0, :, :] #Removes time dimension lat = ash_mass.latitude lon = ash_mass.longitude latrad = lat * d2r #Creating latitude array in radians coslat = np.cos( latrad ) * d2km * d2km #Grouping constant multiplication outside of loop #Creates an array copy of ash_mass filled with the fill value area = xr.full_like(ash_mass, ash_mass._FillValue) shape = np.shape(area) #Begins looping through each element of array i = 0 while i < (shape[0] - 1): j = 0 while j < (shape[1] - 1): area[i, j] = abs(lat[i, j] - lat[i + 1, j]) * abs( abs(lon[i, j]) - abs(lon[i, j + 1])) * coslat[i, j] j += 1 i += 1 if write == 1: #Reformatting array attributes before writing to netcdf area.name = 'area' area.attrs['long_name'] = 'area of each lat/lon grid box' area.attrs['units'] = 'km^2' area.to_netcdf(directory + 'area_whole_domain.nc') #Writes area array to netcdf return area
def first_run_after_date( da: xr.DataArray, window: int, date: str = "07-01", dim: str = "time", coord: Optional[Union[bool, str]] = "dayofyear", ): """Return the index of the first item of the first run after a given date. Parameters ---------- da : xr.DataArray Input N-dimensional DataArray (boolean) window : int Minimum duration of consecutive run to accumulate values. date : str The date after which to look for the run. dim : str Dimension along which to calculate consecutive run (default: 'time'). coord : Optional[Union[bool, str]] If not False, the function returns values along `dim` instead of indexes. If `dim` has a datetime dtype, `coord` can also be a str of the name of the DateTimeAccessor object to use (ex: 'dayofyear'). Returns ------- out : xr.DataArray Index (or coordinate if `coord` is not False) of first item in the first valid run. Returns np.nan if there are no valid run. """ after_date = datetime.strptime(date, "%m-%d").timetuple().tm_yday mid_idx = np.where(da.time.dt.dayofyear == after_date)[0] if mid_idx.size == 0: # The date is not within the group. Happens at boundaries. return xr.full_like(da.isel(time=0), np.nan, float).drop_vars("time") return first_run( da.where(da.time >= da.time[mid_idx][0]), window=window, dim=dim, coord=coord, )
def classify_landform(ds, elevation_levels=[], TYPE='SIMPLE'): """Subdivide landform classes by aspect class.""" SHAPE = ds['mask'].shape lf_cl = np.ma.masked_array(np.ones_like(ds['mask'].values), mask=ds['mask'].values) # depending on classifiaction scheme we need different slope classes that # have an aspect component if TYPE == 'SIMPLE': aspect_lf = [3] elif TYPE == 'WEISS': aspect_lf = [2,3,5] else: log.error('Currently only classifiation schemes WEISS, SIMPLE supported.') ds.tile.set('classification', TYPE.lower()) aspect_lfs = (ds['aspect_class'].to_masked_array() > 0) & \ (np.in1d(ds['landform'].to_masked_array(), aspect_lf).reshape(SHAPE)) lf_cl = np.ma.where(aspect_lfs, ds['landform'] * 10 + ds['aspect_class'], ds['landform'] * 10).filled(NODATA) lf_cl = np.ma.masked_where(ds['mask'] == 0, lf_cl) # if we have elevation levels subdivide the landform classes ele = ds['elevation'].to_masked_array() if len(elevation_levels) > 0: # add global elevation step attribute (second element, first is lower boundary) ds.tile.set('elevation_step', elevation_levels[1]) for i, (lb, ub) in enumerate(zip(elevation_levels[:-1], elevation_levels[1:])): lf_cl = np.ma.where(((ele >= lb) & (ele < ub)), lf_cl + (i+1) * 100, lf_cl) # special encoding (force output as Int16) ENCODING_INT = dict(ENCODING) ENCODING_INT.update({'dtype': np.int16}) lf_cl = np.ma.masked_where(ds['mask'] == 0, lf_cl) da_lf_cl = xr.full_like(ds['landform'], np.nan) ds['landform_class'] = da_lf_cl ds['landform_class'][:] = lf_cl ds['landform_class'].tile.update_encoding(ENCODING_INT) return ds
def _qx_to_lx(qx): r""" Computes :math:`l_x` based on :math:`q_x`, where :math:`q_x` already contains the 95-100 (33) and 100-105 (44) age groups. Also computes :math:`l_x` for 105-110 (45), and then set :math:`l_x` for 110+ to be 0. Args: qx (xr.DataArray): Probability of dying. Returns: (xr.DataArray): lx. """ if tuple(qx["age_group_id"].values[-2:]) != (33, 44): raise ValueError("qx must have age group ids 33 and 44") px = 1.0 - qx # now we have survival all the way to 100-105 (44) age group # Because l{x+n} = lx * px, we can compute all lx's if we start with # l_0 = 1 and iteratively apply the px's of higher age groups. # So we compute l_105-110, since we have p_100-105 from extrapolated qx. # We start with a set of lx's that are all 1.0 lx = xr.full_like(px, 1) # now expand lx to have age groups 105-110 (45) lx = expand_dimensions(lx, fill_value=1, age_group_id=[45]) # Since l{x+n} = lx * px, we make cumulative prduct of px down age groups # and apply the product to ages[1:] (since ages[0]) has lx = 1.0 ages = lx["age_group_id"] ppx = px.cumprod(dim="age_group_id") # the cumulative product of px ppx.coords["age_group_id"] = ages[1:] # need to correspond to ages[1:] lx.loc[dict(age_group_id=ages[1:])] *= ppx # lx all the way to 100-105 # now artificially sets lx to be 0 for the 110+ age group. lx = expand_dimensions(lx, fill_value=0, age_group_id=[148]) assert (lx.sel(age_group_id=2) == 1).all() assert tuple(lx['age_group_id'].values[-4:]) == (33, 44, 45, 148),\ "final lx should have age group ids 33, 44, 45, and 148." return lx
def extract_Sulphate(self): gefs = xr.open_dataset(self.f, engine='pynio') tmp1 = gefs.PMTF_P48_L105_GLL0_A62006 sulp = tmp1 sulp_size = xr.full_like(sulp, 0.139) sulp = sulp.assign_coords( {"lv_HYBL0": ("lv_HYBL0", self.p[::-1].values)}) sulp_size = sulp_size.assign_coords( {"lv_HYBL0": ("lv_HYBL0", self.p[::-1].values)}) pnew = np.arange(1000, 90, -10) sulp.sel(lv_HYBL0=pnew, method="nearest").sel(lon_0=np.arange(250, 300, 1)).sel( lat_0=np.arange(20, 60, 1))[20].plot() plt.show() sulp_size.sel(lv_HYBL0=pnew, method="nearest").sel(lon_0=np.arange(250, 300, 1)).sel( lat_0=np.arange(20, 60, 1))[20].plot() plt.show() return (sulp, sulp_size)
def first_run_after_date( da: xr.DataArray, window: int, date: Optional[DayOfYearStr] = "07-01", dim: str = "time", coord: Optional[Union[bool, str]] = "dayofyear", ) -> xr.DataArray: """Return the index of the first item of the first run after a given date. Parameters ---------- da : xr.DataArray Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive run to accumulate values. date : DayOfYearStr The date after which to look for the run. dim : str Dimension along which to calculate consecutive run (default: 'time'). coord : Optional[Union[bool, str]] If not False, the function returns values along `dim` instead of indexes. If `dim` has a datetime dtype, `coord` can also be a str of the name of the DateTimeAccessor object to use (ex: 'dayofyear'). Returns ------- xr.DataArray Index (or coordinate if `coord` is not False) of first item in the first valid run. Returns np.nan if there are no valid runs. """ mid_idx = index_of_date(da[dim], date, max_idxs=1, default=0) if mid_idx.size == 0: # The date is not within the group. Happens at boundaries. return xr.full_like(da.isel({dim: 0}), np.nan, float).drop_vars(dim) return first_run( da.where(da[dim] >= da[dim][mid_idx][0]), window=window, dim=dim, coord=coord, )
def fit(self, X, *args, **kwargs): """Fit the model Fit all the transforms one after the other and transform the data, then fit the transformed data using the final estimator. Parameters ---------- X : xarray.DataArray or xarray.Dataset Training data. Must fulfill input requirements of first step of the pipeline. If an xarray.Dataset is passed, it will be converted to an array using `to_array()`. y : xarray.DataArray, optional Training targets. Must fulfill label requirements for all steps of the pipeline. feature_dim : str, optional Name of feature dimension. **fit_params : dict of string -> object Parameters passed to the ``fit`` method of the this model. If the model is a sklearn Pipeline, parameters can be passed to each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``. """ kws = {'along_dim': self._dim, 'feature_dim': DEFAULT_FEATURE_DIM} kws.update(kwargs) assert len(args) <= 1 args = list(args) args.append(self._model) X = self._to_feature_x(X, feature_dim=kws['feature_dim']) if X.chunks: reduce_dims = [self._dim, kws['feature_dim']] mask = _make_mask(X, reduce_dims) template = xr.full_like(mask, None, dtype=np.object) self._models = xr.map_blocks(_fit_wrapper, X, args=args, kwargs=kws, template=template) else: self._models = _fit_wrapper(X, *args, **kws)
def column_ozone(filepath, lat, lon, altitude, time): ozone = xr.open_dataset(filepath) ma = 48 mo3 = 28.97 MMR = ozone.O3 VMR = MMR/(ma/mo3)*1e6 DELP = ozone.DELP g0 = 9.80665 # m/s2 T0 = 273.15 # K k = 1.3807e-23 # J/K/molecule p0 = 1.01325e5 #Pa Na = 6.022e23 R = 287.3 #J/Kg/K vmr_indx = np.where(3.28084*ozone.H[0,:,200,200].values/1000 >= altitude) constant = 10*(R*T0)/(g0*p0) Column = np.zeros([8,len(vmr_indx[0]),361,576]) for i in vmr_indx[0]: Column[:,i,:,:] = 0.5 * (VMR[:,i,:,:] + VMR[:,i+1,:,:]) * DELP[:,i,:,:] O3 = 1e-2*constant*np.sum(Column, axis=1) O3_x = xr.full_like(VMR[:,0,:,:], O3) C_O3 = O3_x.interp(lat = lat, lon = lon, time = time ) return C_O3.values
def create_template(like_da: xr.DataArray, var_names: Iterable[str], fill_val: float = np.nan) -> xr.Dataset: """Create an empty dataset with the given variables in it Parameters ---------- like_da : xr.DataArray Template variable. var_names : list-like List of variable names Returns ------- ds : xr.Dataset Template dataset """ ds = xr.Dataset() for v in var_names: ds[v] = xr.full_like(like_da, fill_val, dtype=np.float32) return ds
def calc_radar_mask(data_flag, clutter_threshold=4): assert data_flag.dims == ("time", "height") # 0: clear, 1: maybe, 2: certain, -1: unknown radar_mask = xarray.full_like(data_flag.time, Cloud_flag.clear, dtype=int) radar_mask[(data_flag == Radar_flag.good).any("height")] = Cloud_flag.probably cloud_mask2d = (data_flag == Radar_flag.good).transpose("time", "height").values cloud_mask8bit = cloud_mask2d.astype(np.uint8) number, markers = cv2.connectedComponents(cloud_mask8bit) assert np.all( (markers == 0) == (cloud_mask2d == 0) ), "Cloud free should be marked with 0. This seems not to be the case." IDs, count = np.unique(markers, return_counts=True) IDs = IDs[1:] # the first ID is the background, not needed here count = count[1:] # the first ID is the background, not needed here potential_clouds = np.where(markers != 0) # indices of potential cloudy pixels # remove clouds that are smaller than n pixel invalid_cloud_IDs = set( IDs[count < clutter_threshold] ) # set of echos that are too small and that are likely to be clutter for x, y in zip(*potential_clouds): if markers[x, y] in invalid_cloud_IDs: markers[x, y] = 0 # mark as cloud free certainly_cloudy = (markers != 0).any(axis=1) assert (radar_mask[certainly_cloudy] == Cloud_flag.probably).all() radar_mask[certainly_cloudy] = Cloud_flag.certain no_echo_signal = data_flag != Radar_flag.good no_clear_signal = data_flag != Radar_flag.clear radar_mask[(no_echo_signal & no_clear_signal).all("height")] = Cloud_flag.unknown return radar_mask
def _preprocess_weights(a, dim, new_dim, weights): """Preprocesses weights array to prepare for numpy computation. Parameters ---------- a : xarray.Dataset or xarray.DataArray One of the arrays over which the function will be applied. dim : str, list The original dimension(s) to apply the function along. new_dim : str The newly named dimension after running ``_preprocess_dims`` weights : xarray.Dataset or xarray.DataArray Weights to apply to function, matching the dimension size of ``new_dim``. """ if weights is None: return xr.full_like(a, None) # Return nan weighting array. else: # Throw error if there are negative weights. if weights.min() < 0: raise ValueError( 'Weights has a minimum below 0. Please submit a weights array ' 'of positive numbers.') # Scale weights to vary from 0 to 1. weights = weights / weights.max() # Check that the weights array has the same size # dimension(s) as those being applied over. drop_dims = {k: 0 for k in a.dims if k not in new_dim} if dict(weights.sizes) != dict(a.isel(drop_dims).sizes): raise ValueError( f'weights dimension(s) {dim} of size {dict(weights.sizes)} ' f"does not match DataArray's size " f'{dict(a.isel(drop_dims).sizes)}') if dict(weights.sizes) != dict(a.sizes): # Broadcast weights to full size of main object. _, weights = xr.broadcast(a, weights) return weights
def to_cross( data: xr.DataArray, tile: Hashable = "tile", x: Hashable = "grid_xt", y: Hashable = "grid_yt", ) -> xr.DataArray: """Combine tiles into a single 2D field resembling a "cross" Useful for quickly plotting maps or applying other 2D image processing techniques. See Also: Weyn J, Durran D, and Caruana, 2019 https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2019MS001705 """ tiles = [] dims = [y, x] rotation_plan = { (spec.x, spec.y): (spec.origin, tile_num) for tile_num, spec in TOPOLOGY.items() } data = data.drop_vars(dims + [tile], errors="ignore").transpose(..., tile, y, x) null = xr.full_like(data.isel({tile: 0}), np.nan) for j in range(3): row = [] for i in range(4): if (i, j) in rotation_plan: origin, tile_num = rotation_plan[(i, j)] arr = rotate(data.isel({tile: tile_num}), origin, SW, dims=dims) else: arr = null row.append(arr) tiles.append(row) return xr.combine_nested(tiles, concat_dim=dims)
def effr_ice(ice, temp): min_qi = 1.e-8 gfdl_tice = 273.16 gfdl_beta = 1.22 gfdl_reimin = 10.0 gfdl_reimax = 150.0 result = xr.full_like(ice, 0.0) ice_positive = xr.where(ice > min_qi, ice, min_qi) result = xr.where( temp[1:] - gfdl_tice >= -30.0, gfdl_beta / 9.387 * np.exp( (1 - 0.969) * np.log(1.0e3 * ice_positive)) * 1.0e3, result) result = xr.where( temp[1:] - gfdl_tice < -30.0, gfdl_beta / 9.208 * np.exp( (1 - 0.945) * np.log(1.0e3 * ice_positive)) * 1.0e3, result) result = xr.where( temp[1:] - gfdl_tice < -40.0, gfdl_beta / 9.337 * np.exp( (1 - 0.920) * np.log(1.0e3 * ice_positive)) * 1.0e3, result) result = xr.where( temp[1:] - gfdl_tice < -50.0, gfdl_beta / 9.917 * np.exp( (1 - 0.891) * np.log(1.0e3 * ice_positive)) * 1.0e3, result) result = xr.where(result < gfdl_reimin, gfdl_reimin, result) result = xr.where(result > gfdl_reimax, gfdl_reimax, result) result = xr.where(ice > min_qi, result, 0) return result * 2
def test_weights(ones, ndims): dims = ones.dims if ones.ndim < ndims: pytest.skip( "Don't need to test when number of dimension combinations " "exceeds the number of array dimensions" ) bins = np.array([0, 0.9, 1.1, 2]) bins_c = 0.5 * (bins[1:] + bins[:-1]) weight_value = 0.5 def _check_result(h, d): other_dims = [dim for dim in ones.dims if dim not in d] if len(other_dims) > 0: assert set(other_dims) <= set(h.dims) # check that all values are in the central bin h_sum = h.sum(other_dims) h_sum_expected = xr.DataArray( [0, weight_value * ones.size, 0], dims=["ones_bin"], coords={"ones_bin": ("ones_bin", bins_c)}, name="histogram_ones", ) xr.testing.assert_identical(h_sum, h_sum_expected) # get every possible combination of sub-dimensions for n_combinations in range(ones.ndim): for weight_dims in combinations(dims, n_combinations): i_selector = {dim: 0 for dim in weight_dims} weights = xr.full_like(ones.isel(**i_selector), weight_value) for nc in range(ndims): for d in combinations(dims, nc + 1): h = histogram(ones, weights=weights, bins=[bins], dim=d) _check_result(h, d)
def get_pop_region_mask_za( mask_type='3d', grid_name='POP_gx1v7', ): """return a region mask for zonal averaging""" mask3d = pop_tools.region_mask_3d(grid_name, mask_name='Pacific-Indian-Atlantic') nregion = len(mask3d.region) if mask_type.lower() == '3d': return mask3d elif mask_type.lower() == '2d': mask2d = xr.full_like(mask3d.isel(region=0), fill_value=0, dtype=np.int32) for i in range( 1, nregion ): # skip first index because "za" puts the global field in there mask2d = xr.where(mask3d.isel(region=i) == 1, i, mask2d) mask2d.name = 'REGION_MASK' return mask2d raise ValueError( f'unknown mask type: {mask_type}\nexpecting either "2d" or "3d"')
def get_index( self, da: Union[xr.DataArray, xr.Dataset], interp: Optional[Union[bool, str]] = None, ): """Return the group index of each element along the main dimension. Parameters ---------- da : Union[xr.DataArray, xr.Dataset] The input array/dataset for which the group index is returned. It must have Grouper.dim as a coordinate. interp : Union[bool, str] Argument `interp` defaults to `self.interp`. If True, the returned index can be used for interpolation. For month grouping, integer values represent the middle of the month, all other days are linearly interpolated in between. Returns ------- xr.DataArray The index of each element along `Grouper.dim`. If `Grouper.dim` is `time` and `Grouper.prop` is None, an uniform array of True is returned. If `Grouper.prop` is a time accessor (month, dayofyear, etc), an numerical array is returned, with a special case of `month` and `interp=True`. If `Grouper.dim` is not `time`, the dim is simply returned. """ if self.prop is None: if self.dim == "time": return xr.full_like(da[self.dim], True, dtype=bool) return da[self.dim] ind = da.indexes[self.dim] i = getattr(ind, self.prop) if not np.issubdtype(i.dtype, np.integer): raise ValueError( f"Index {self.name} is not of type int (rather {i.dtype}), but {self.__class__.__name__} requires integer indexes." ) interp = ( (interp or self.interp) if not isinstance(interp, str) else interp != "nearest" ) if interp: if self.dim == "time": if self.prop == "month": i = ind.month - 0.5 + ind.day / ind.days_in_month elif self.prop == "dayofyear": i = ind.dayofyear else: raise NotImplementedError else: raise NotImplementedError xi = xr.DataArray( i, dims=self.dim, coords={self.dim: da.coords[self.dim]}, name=self.dim + " group index", ) # Expand dimensions of index to match the dimensions of da # We want vectorized indexing with no broadcasting # xi = xi.broadcast_like(da) xi.name = self.prop return xi
def get_slope(dem, *resolution): x, y = np.gradient(dem, *resolution) slope = np.arctan(np.sqrt(x * x + y * y)) * 180 / np.pi xr_slope = xr.full_like(dem, slope) return xr_slope
def convert_to_xarray(self, data: Dict) -> Union[xr.Dataset, Tuple[xr.Dataset, xr.Dataset]]: """ Parameters ---------- data Data from the ``load_data`` function Returns ------- data formatted to an xarray Dataset """ # split up the fields into one of different sizes and optional returns fields = dict() # not currently returned fields['geometry'] = ['Tan_Alt', 'Tan_Lat', 'Tan_Lon'] fields['flags'] = ['InfVec', 'Dropped'] fields['profile_flags'] = ['ProfileInfVec'] # always returned - 1 per profile fields['general'] = ['Event_Num', 'Lat', 'Lon', 'Beta', 'Duration', 'Type_Sat', 'Type_Tan', 'Trop_Height'] # optional return parameters fields['background'] = ['NMC_Pres', 'NMC_Temp', 'NMC_Dens', 'NMC_Dens_Err', 'Density', 'Density_Err'] fields['ozone'] = ['O3', 'O3_Err'] fields['no2'] = ['NO2', 'NO2_Err'] fields['h2o'] = ['H2O', 'H2O_Err'] fields['aerosol'] = ['Ext386', 'Ext452', 'Ext525', 'Ext1020', 'Ext386_Err', 'Ext452_Err', 'Ext525_Err', 'Ext1020_Err'] fields['particle_size'] = ['SurfDen', 'Radius', 'SurfDen_Err', 'Radius_Err'] xr_data = [] index_flags = self.convert_index_bit_flags(data) species_flags = self.convert_species_bit_flags(data) time = pd.to_timedelta(data['mjd'], 'D') + pd.Timestamp('1858-11-17') data['Trop_Height'] = data['Trop_Height'].flatten() for key in fields['general']: xr_data.append(xr.DataArray(data[key], coords=[time], dims=['time'], name=key)) if 'aerosol' in self.species or self.filter_ozone: # we need aerosol to filter ozone altitude = data['Alt_Grid'][0:80] wavel = np.array([386.0, 452.0, 525.0, 1020.0]) ext = np.array([data['Ext386'], data['Ext452'], data['Ext525'], data['Ext1020']]) xr_data.append(xr.DataArray(ext, coords=[wavel, time, altitude], dims=['wavelength', 'time', 'Alt_Grid'], name='Ext')) ext = np.array([data['Ext386_Err'], data['Ext452_Err'], data['Ext525_Err'], data['Ext1020_Err']]) xr_data.append(xr.DataArray(ext, coords=[wavel, time, altitude], dims=['wavelength', 'time', 'Alt_Grid'], name='Ext_Err')) for key in fields['particle_size']: xr_data.append(xr.DataArray(data[key], coords=[time, altitude], dims=['time', 'Alt_Grid'], name=key)) if 'no2' in self.species: altitude = data['Alt_Grid'][0:100] for key in fields['no2']: xr_data.append(xr.DataArray(data[key], coords=[time, altitude], dims=['time', 'Alt_Grid'], name=key)) if 'h2o' in self.species: altitude = data['Alt_Grid'][0:100] for key in fields['h2o']: xr_data.append(xr.DataArray(data[key], coords=[time, altitude], dims=['time', 'Alt_Grid'], name=key)) if any(i in ['ozone', 'o3'] for i in self.species): altitude = data['Alt_Grid'][0:140] for key in fields['ozone']: xr_data.append(xr.DataArray(data[key], coords=[time, altitude], dims=['time', 'Alt_Grid'], name=key)) if 'background' in self.species: altitude = data['Alt_Grid'][0:140] for key in fields['background']: xr_data.append(xr.DataArray(data[key], coords=[time, altitude], dims=['time', 'Alt_Grid'], name=key)) xr_data = xr.merge(xr_data) if self.enumerate_flags: xr_data = xr.merge([xr_data, index_flags, species_flags]) for var in xr_data.variables.keys(): if xr_data[var].dtype == 'float32' or 'Err' in var: xr_data[var] = xr_data[var].where(xr_data[var] != data['FillVal']) # determine cloud filter for aerosol data cloud_filter = xr.full_like(species_flags.Cloud_Bit_1, fill_value=True, dtype=bool) min_alt = (xr_data.Alt_Grid * (species_flags.Cloud_Bit_1 & species_flags.Cloud_Bit_2)).max(dim='Alt_Grid') cloud_filter = cloud_filter.where(cloud_filter.Alt_Grid > min_alt) xr_data['cloud_filter'] = np.isnan(cloud_filter) # determine valid ozone altitudes if any(i in ['ozone', 'o3'] for i in self.species): # add an ozone filter field for convenience ozone_good = xr.full_like(species_flags.Cloud_Bit_1, fill_value=True, dtype=bool) # Exclusion of all data points with an uncertainty estimate of 300% or greater ozone_good = ozone_good.where(xr_data.O3_Err < 30000) # Exclusion of all profiles with an uncertainty greater than 10% between 30 and 50 km no_good = (xr_data.O3_Err > 1000) & (xr_data.Alt_Grid > 30) & (xr_data.Alt_Grid < 50) ozone_good = ozone_good.where(~no_good) # Exclusion of all data points at altitude and below the occurrence of an aerosol extinction value of # greater than 0.006 km^-1 # NOTE: the wavelength to use as the filter is not specified in the documentation, so I have chosen the # wavelength with the smallest extinction and therefore the strictest filtering min_alt = (xr_data.Alt_Grid * (xr_data.Ext.sel(wavelength=1020) > 0.006)).max(dim='Alt_Grid') ozone_good = ozone_good.where(xr_data.Alt_Grid > min_alt) # Exclusion of all data points at altitude and below the occurrence of both the 525nm aerosol extinction # value exceeding 0.001 km^-1 and the 525/1020 extinction ratio falling below 1.4 min_alt = (xr_data.Alt_Grid * ((xr_data.Ext.sel(wavelength=525) > 0.001) & ((xr_data.Ext.sel(wavelength=525) / xr_data.Ext.sel( wavelength=1020)) < 1.4))).max(dim='Alt_Grid') ozone_good = ozone_good.where(xr_data.Alt_Grid > min_alt) # Exclusion of all data points below 35km an 200% or larger uncertainty estimate no_good = (xr_data.O3_Err > 20000) & (xr_data.Alt_Grid < 35) ozone_good = ~np.isnan(ozone_good.where(~no_good)) xr_data['ozone_filter'] = ozone_good if self.filter_aerosol: xr_data['Ext'] = xr_data.Ext.where(~xr_data.cloud_filter) if self.filter_ozone: xr_data['O3'] = xr_data.O3.where(ozone_good) # drop aerosol if not requested if self.filter_ozone and not ('aerosol' in self.species): xr_data.drop(['Ext', 'Ext_Err', 'wavelength']) if self.normalize_percent_error: for var in xr_data.variables.keys(): if 'Err' in var: # put error units back into percent xr_data[var] = (xr_data[var] / 100).astype('float32') xr_data = xr_data.transpose('time', 'Alt_Grid', 'wavelength') xr_data = self.apply_cf_conventions(xr_data) if self.return_separate_flags: return xr_data, xr.merge([index_flags, species_flags]) else: return xr_data