def load_flags(self, view='in'): """Load flags for this product. This will include entries such whether the pixels are land, ocean, twilight, day etc. Data will be returned in the 1km grid resolution. Args: view: what view to load flags in e.g. 'in', 'io' etc. """ flags_path = os.path.join(self.path, 'flags_{}.nc'.format(view)) excluded = [ 'confidence_orphan_{}', 'pointing_orphan_{}', 'pointing_{}', 'cloud_orphan_{}', 'bayes_orphan_{}', 'probability_cloud_dual_{}' ] excluded = [e.format(view) for e in excluded] flags = xr.open_dataset(flags_path, decode_times=False, drop_variables=excluded, engine='h5netcdf') confidence_var = 'confidence_{}'.format(view) flag_masks = flags[confidence_var].attrs['flag_masks'] flag_meanings = flags[confidence_var].attrs['flag_meanings'].split() flag_map = dict(zip(flag_meanings, flag_masks)) expanded_flags = {} for key, bit in flag_map.items(): msk = flags[confidence_var] & bit msk = xr.where(msk > 0, 1, 0) expanded_flags[key] = msk flags = flags.assign(**expanded_flags) return flags
def get_dataset(self, key, info): """Get the dataset.""" dims = ['y', 'x'] if self[key['name']].ndim == 3: dims = ['y', 'x', 'selection'] data = self[key['name']] if key['name'] in 'wvc_row_time': data = data.rename({data.dims[0]: 'y'}) else: dim_map = { curr_dim: new_dim for curr_dim, new_dim in zip(data.dims, dims) } data = data.rename(dim_map) data = self._mask_data(key['name'], data) data = self._scale_data(key['name'], data) if key['name'] in 'wvc_lon': data = xr.where(data > 180, data - 360., data) data.attrs.update(info) data.attrs.update(self.get_metadata()) data.attrs.update(self.get_variable_metadata()) if "Platform_ShortName" in data.attrs: data.attrs.update( {'platform_name': data.attrs['Platform_ShortName']}) return data
def get_icefront(ds, grid, Zlev=0): """return a field with ones where the icefront is Parameters ---------- ds : xarray Dataset with 'maskC' and 'maskCtrlI' fields grid : xgcm Grid object to diff with Returns ------- icefront : xarray DataArray True where icefront, False otherwise """ maskCmI = 1 * ds['maskC'].isel(Z=Zlev) - 1 * ds['maskCtrlI'].isel(Z=0) icex = grid.diff(maskCmI, 'X', boundary='fill') != 0 icey = grid.diff(maskCmI, 'Y', boundary='fill') != 0 icex, icey = grid.interp_2d_vector({ 'X': icex, 'Y': icey }, boundary='fill').values() icefront = ((icey + icex != 0) * ds['maskC'].isel(Z=Zlev)) # Hacks for PIG!! icefront = xr.where((ds.YC > -74.55) & (ds.XC > -101.75), False, True * icefront) icefront = icefront.where(icefront.XC != icefront.XC.min(), False) return icefront
def calc_rh2(wrf_xr): """ Methods to calc relative humidity at 2 meters from the FAO56 manual. Uses 2m temperature and dew point temperature. Parameters ---------- wrf_xr : xr.Dataset The complete WRF output dataset with Q2, T2, and PSFC. Returns ------- xr.DataArray """ ## Assign variables t = wrf_xr['t2m'] - 273.15 dew = wrf_xr['d2m'] - 273.15 eo = 0.6108*np.exp((17.27*t)/(t + 237.3)) ea = 0.6108*np.exp((17.27*dew)/(dew + 237.3)) rh = (ea/eo) * 100 rh = xr.where(rh > 100, 100, rh) return rh
def mask_and_fill(xr_obj, valid_range, fill_value): """Masks xarray object using specified valid range and fills nan values with fill_value to use as RandomForestClassifier feature input. Parameters ---------- xr_obj : xarray DataArray DataArray with values to mask and fill valid_range : tuple The min and max valid range for the data. All pixels with values outside of this range will be masked. fill_value : integer Value to replace nan values Returns ------- masked_filled_array : xarray DataArray DataArray with masked and filled values. """ # Define and apply mask mask = ((xr_obj < valid_range[0]) | (xr_obj > valid_range[1])) masked_array = xr_obj.where(~xr.where(mask, True, False)) # Fill nan values with specified fill_value masked_filled_array = masked_array.fillna(fill_value) return masked_filled_array
def func_conv_data(datasets, region, start_year, end_year, reg_dict, month_no=9, regrid=None): da_t = sel_time(datasets['thetao'].thetao, start_year, end_year) da_s = sel_time(datasets['so'].so, start_year, end_year) da_dens = sel_time(dens0(da_s, da_t), start_year, end_year, month=month_no) if not regrid == None: da_dens = func_regrid(da_dens, regrid) da_t = func_regrid(da_t, regrid) da_s = func_regrid(da_s, regrid) da_dens = select_region(da_dens, region, reg_dict) da_t = select_region(da_t, region, reg_dict) da_s = select_region(da_s, region, reg_dict) da_mld = xr_func_mld(da_dens) conv = xr.where(da_mld >= reg_dict[region]['convdepth'], da_mld, np.nan) conv_area = conv.mean(dim='time', skipna=True) da_t_conv = select_conv_area_data(da_t, conv_area) da_s_conv = select_conv_area_data(da_s, conv_area) conv_area = add_region_attrs(conv_area, region, reg_dict) da_t_conv = add_region_attrs(da_t_conv, region, reg_dict) da_s_conv = add_region_attrs(da_s_conv, region, reg_dict) ind_t = da_t_conv.interp(lev=da_t_conv.conv_index) conv_ind = (ind_t - ind_t.mean('year')) / ind_t.std('year') * -1 return conv_area, da_t_conv, da_s_conv, conv_ind
def calc_dist_in_direction_cluster(turbines, prevail_wind_direction, bin_size_deg=15): """Same as calc_dist_in_direction(), but intended for one cluster only. Calculates a squared distance matrix (and a squared direction matrix) and therefore RAM usage is O(len(turbines)^2). Parameters ---------- turbines : xr.DataSet as returned by load_turbines() prevail_wind_direction : xr.DataArray (dim = turbines) will be used to orientate distances relative to prevailing wind direction, pass an xr.DataArray with zeros to get distances per absolute directions (not relative to prevailing wind direction) bin_size_deg : float size of direction bins in degrees Returns ------- xr.DataArray dims: turbines, direction direction is relative to prevail_wind_direction, i.e. 0° = in prevailing wind direction, and otherwise counter-clockwise relative to 0° """ directions = calc_directions(turbines, prevail_wind_direction) # directions is actually not used here, because bins and range are provided (except for dtype) bin_edges = np.histogram_bin_edges(directions, bins=360 // bin_size_deg, range=(-np.pi, np.pi)) num_bins = len(bin_edges) - 1 # Attention, fencepost problem! # np.digitize does not return the n-th bin, but the n+1-th bin! # This is not a symmetric matrix, directions get flipped by 180° if dims is provided in wrong # order, but it is not at all clear how xarray defines the order (probably the order of # usage of dims 'targets' and 'turbines' in the arctan2() call above). bin_idcs = np.digitize(directions, bin_edges) - 1 bin_idcs = xr.DataArray( bin_idcs, dims=('targets', 'turbines'), # targets = closest turbines coords={'turbines': turbines.turbines}) locations = turbine_locations(turbines) distances = geolocation_distances(locations) # set distance to itself to INF to avoid zero distance minimums later distances[np.diag_indices_from(distances)] = np.inf distances = xr.DataArray(distances, dims=('turbines', 'targets'), coords={'turbines': turbines.turbines}) bin_centers = edges_to_center(bin_edges) direction_bins = xr.DataArray(np.arange(num_bins), dims='direction', coords={'direction': bin_centers}) return xr.where(bin_idcs == direction_bins, distances, np.inf).min(dim='targets')
def make_site_capacity_array(site_df, time, included_days_of_week=None): """Returns an array of daily site capacities. Args: site_df: pd.DataFrame of site information, containing a "capacity" column with *weekly* site capacity. time: xr.DataArray of times to restrict to (expected to be c.time). included_days_of_week: optional list of integers (0 through 6) indicating which days of the week sites recruit participants (Monday through Sunday, respectively). Returns: An xr.DataArray of shape [location, time], the number of participants a site can recruit on each day. """ if included_days_of_week is None: included_days_of_week = list(range(7)) site_capacity = site_df.capacity.to_xarray() / len(included_days_of_week) site_capacity = site_capacity.broadcast_like(time).astype('float') excluded_days = [d for d in range(7) if d not in included_days_of_week] is_excluded = xr.apply_ufunc( lambda x: pd.to_datetime(x).dayofweek.isin(excluded_days), site_capacity.time) site_capacity = xr.where(is_excluded, 0.0, site_capacity) # Can't recruit before the activation date activation_date = site_df.start_date.to_xarray() for l in activation_date.location.values: date = activation_date.loc[l] site_capacity.loc[site_capacity.time < date, l] = 0.0 return site_capacity.transpose('location', 'time')
def if_then_else(condition, val_if_true, val_if_false): """ Implements Vensim's IF THEN ELSE function. Parameters ---------- condition: bool or xarray.DataArray of bools val_if_true: function Value to evaluate and return when condition is true. val_if_false: function Value to evaluate and return when condition is false. Returns ------- The value depending on the condition. """ if isinstance(condition, xr.DataArray): if condition.all(): return val_if_true() elif not condition.any(): return val_if_false() return xr.where(condition, val_if_true(), val_if_false()) return val_if_true() if condition else val_if_false()
def getMask(self, label, var): """Given the region label and a ILAMB.Variable, return a mask appropriate for that variable. Parameters ---------- label : str the unique region identifier var : ILAMB.Variable.Variable the variable to which we would like to apply a mask Returns ------- mask : numpy.ndarray a boolean array appropriate for masking the input variable data """ rdata = Regions._regions[label] rtype = rdata[0] if rtype == 0: rtype, rname, rlat, rlon = rdata lat = var.ds[var.lat_name] lon = var.ds[var.lon_name] if lon.max() > 180: rlon = (np.asarray(rlon) + 360) % 360 out = xr.where((lat >= rlat[0]) * (lat <= rlat[1]) * (lon >= rlon[0]) * (lon <= rlon[1]), False, True) return out elif rtype == 1: rtype, rname, da = rdata out = da.interp(lat=var.ds[var.lat_name], lon=var.ds[var.lon_name], method='nearest') == False return out msg = "Region type #%d not recognized" % rtype raise ValueError(msg)
def calc_land_surface_water_demand(lai_nc, etref_nc, p_nc, lu_nc, output=None, chunksize=None): ''' calculate water demand of land surface based on LAI lai_nc: str path to LAI netcdf file etref_nc: str path to ET reference netcdf file p_nc: str path to Precipiation netcdf file lu_nc: str path to LU netcdf file (invariant or monthly) return demand: xr.DataArray water demand datacube ''' #read netcdf data lai = cf.open_nc(lai_nc, chunksize=chunksize, layer=0) et_reference = cf.open_nc(etref_nc, chunksize=chunksize, layer=0) lu = cf.open_nc(lu_nc, chunksize=chunksize, layer=0) p = cf.open_nc(p_nc, chunksize=chunksize, layer=0) #calculate Potential ET using LAI-based crop coefficient Kc kc = xr.where( lu.isin([4, 5, 30, 23, 24, 63]), #mask water classes 1.4, #water KC (1 - xr.ufuncs.exp(-0.5 * lai)) / 0.76) #non-water KC et_potential = kc * et_reference #calculate land surface water demand as the gap between potential ET and effective rainfall phi = et_potential / p effective_rainfall = xr.ufuncs.sqrt(phi*xr.ufuncs.tanh(1/phi)\ *(1-xr.ufuncs.exp(-phi))) * p demand = et_potential - effective_rainfall #add attributes to demand data demand.name = 'land_surface_water_demand' demand = demand.transpose('time', 'latitude', 'longitude') demand.attrs = { 'units': p.attrs['units'], 'source': 'Potential ET - Effective Rainfall', 'quantity': 'land_surface_water_demand' } #save results if output is None: output = os.path.join(os.path.dirname(lai_nc), 'water_demand.nc') comp = dict(zlib=True, complevel=9, least_significant_digit=2, chunksizes=chunksize) demand.load().to_netcdf(output, encoding={'land_surface_water_demand': comp}) return output
def xidz(numerator, denominator, value_if_denom_is_zero): """ Implements Vensim's XIDZ function. This function executes a division, robust to denominator being zero. In the case of zero denominator, the final argument is returned. Parameters ---------- numerator: float or xarray.DataArray denominator: float or xarray.DataArray Components of the division operation value_if_denom_is_zero: float or xarray.DataArray The value to return if the denominator is zero Returns ------- numerator / denominator if denominator > 1e-6 otherwise, returns value_if_denom_is_zero """ if isinstance(denominator, xr.DataArray): return xr.where(np.abs(denominator) < small_vensim, value_if_denom_is_zero, numerator * 1.0 / denominator) if abs(denominator) < small_vensim: return value_if_denom_is_zero else: return numerator * 1.0 / denominator
def keep_longest_run(da: xr.DataArray, dim: str = "time") -> xr.DataArray: """Keep the longest run along a dimension. Parameters ---------- da : xr.DataArray Boolean array. dim : str Dimension along which to check for the longest run. Returns ------- xr.DataArray Boolean array similar to da but with only one run, the (first) longest. """ # Get run lengths rls = rle(da, dim) out = xr.where( # Construct an integer array and find the max rls[dim].copy(data=np.arange(rls[dim].size)) == rls.argmax(dim), rls + 1, # Add one to the First longest run rls, ) out = out.ffill(dim) == out.max(dim) return da.copy( data=out.transpose(*da.dims).data) # Keep everything the same
def reduce_precision(ds: xr.Dataset, fp16=False) -> None: """Remove false precision IN PLACE to facilitate downstream compression. Two methods are conversion to float16 and back (3.3 decimal digits precision) and binary rounding to fixed precision. The appropriate levels were determined in './data/merge and rechunk.ipynb'. **Hardcoded** for current iteration of MERRA-2 data. Parameters ---------- ds : xr.Dataset dataset of MERRA-2 fp16 : bool, optional If True, use fp16 conversion method. If False, use fixed precision rounding method. By default False """ ds[["lat", "lon"]].astype(np.float32) ds["PS"] = binary_round(ds["PS"], decimal_digits=-1).astype(np.int32) mask = ds["PRECTOTCORR"] <= -14 # assumes log10 applied first! ds["PRECTOTCORR"] = xr.where( mask, 0, ds["PRECTOTCORR"] ) # threshold tiny floats to 0 if fp16: f16 = ["GHLAND", "RHOA", "PRECTOTCORR", "RISFC", "TS", "T10M"] ds.update(ds[f16].astype(np.float16).astype(np.float32)) for dec, cols in {1: ["WDIR50M"], 3: ["WS50M"]}.items(): ds.update(binary_round(ds[cols], decimal_digits=dec)) else: prec = { 1: ["GHLAND", "RISFC", "TS", "T10M", "WDIR50M"], 2: ["PRECTOTCORR"], 3: ["RHOA", "WS50M"], } for dec, cols in prec.items(): ds.update(binary_round(ds[cols], decimal_digits=dec))
def evaluate( self, rho: DataArray, R: DataArray, t: Optional[DataArray] = None, R_0: Optional[DataArray] = None, ) -> DataArray: """Evaluate the function at a location defined using (R, z) coordinates""" if t is None: if "t" in rho.coords: t = rho.coords["t"] elif self.time is not None: t = self.time elif "t" not in rho.coords and ( "t" in self.sym_coords or "t" in self.asym_coords ): rho = rho.expand_dims(t=t) symmetric = broadcast_spline( self.symmetric_emissivity, self.sym_dims, self.sym_coords, rho ) asymmetric = broadcast_spline( self.asymmetry_parameter, self.asym_dims, self.asym_coords, rho ) if R_0 is None: R_0 = cast(DataArray, self.transform.equilibrium.R_hfs(rho, t)[0]) result = symmetric * np.exp(asymmetric * (R**2 - R_0**2)) # Ensure round-off error doesn't result in any values below 0 return where(result < 0.0, 0.0, result).fillna(0.0)
def test_gradient(od, axesList): varNameList = ["sinZ", "sinY", "sinX", "sintime"] if axesList == "wrong": with pytest.raises(ValueError): gradient(od, varNameList=varNameList, axesList=axesList) else: grad_ds = gradient(od, varNameList=varNameList, axesList=axesList) if axesList is None: axesList = list(od.grid_coords.keys()) # sin' = cos for varName in varNameList: for axis in axesList: gradName = "d" + varName + "_" + "d" + axis var = grad_ds[gradName] if axis not in varName: assert var.min().values == grad_ds[gradName].max( ).values == 0 else: check = od.dataset["cos" + axis].where(var) mask = xr.where( np.logical_or(check.isnull(), var.isnull()), 0, 1) assert_allclose( var.where(mask, drop=True).values, check.where(mask, drop=True).values, 1.0e-3, )
def strategy(data): # calc weights: close = data.sel(field="close") is_liquid = data.sel(field='is_liquid') ma_slow = qnta.lwma(close, 50) ma_fast = qnta.lwma(close, 10) return xr.where(ma_fast > ma_slow, 1, -1) * is_liquid
def raster2masked_xr(raster_path, valid_range=None): """converts raster to array and masks invalid values. Parameters ----------- raster_path: string a path to the shp boundary valid_range: takes in a tuple Returns ----------- xr : xarray object xarray with masked values outside valid range xr_counts : pandas data frame a count of all unique elements, helps locate possible artifacts """ # open raster as rxr xr_object = rxr.open_rasterio(raster_path, masked=True, parse_coordinates=False) # mask values if valid_range is not None: mask = ((xr_object < valid_range[0]) | (xr_object > valid_range[1])) xr_object = xr_object.where(~xr.where(mask, True, False)) # generate data frame of unique values and counts xr_df = xr_object.to_dataframe(name='unique values') xr_counts = xr_df.value_counts() return xr_object, xr_counts
def theta_l_detailed(tt, pp, qt, ql, qi): """theta_l: becomes theta for a dry parcel The default calculation is used whenever there is significant qv In the absence of qv but presence of ql (usually very high in the atmosphere, a correction is made to prevent division by zero.""" theta_l = xr.where( ql + qi < 0.999 * qt, (tt * (tt / tref)**(qt * (cpv - cpd) / cpd + ql * (cpl - cpv) / cpd + qi * (cpi - cpv) / cpd) * (pp / (pref * (1 + ((qt - ql - qi) * rv) / ((1 - qt) * rd))))**(-(1 - qt) * rd / cpd) * (pp / (pref * (1 + ((1 - qt) * rd) / ((qt - ql - qi) * rv))))**(-(qt - ql - qi) * rv / cpd) * np.exp(-ql * (srv - srl) / cpd - qi * (srv - sri) / cpd) * (1.0 / (1.0 + (qt * rv) / ((1.0 - qt) * rd)))**((1.0 - qt) * rd / cpd) * (1.0 / (1.0 + ((1.0 - qt) * rd) / (qt * rv)))**((qt) * rv / cpd)), (tt * (tt / tref)**(qt * (cpv - cpd) / cpd + ql * (cpl - cpv) / cpd + qi * (cpi - cpv) / cpd) * (pp / (pref * (1 + ((qt - ql - qi) * rv) / ((1 - qt) * rd))))**(-(1 - qt) * rd / cpd) * np.exp(-ql * (srv - srl) / cpd - qi * (srv - sri) / cpd) * (1.0 / (1.0 + (qt * rv) / ((1.0 - qt) * rd)))**((1.0 - qt) * rd / cpd) * (1.0 / (1.0 + ((1.0 - qt) * rd) / (qt * rv)))**((qt) * rv / cpd)), ) return theta_l
def aggregate_by_lu_dictionary(dts,LU,lu_dictionary,how='sum'): '''aggregate dataset by LU classes categories ''' data=[] #create empty data list LU=dts*0+LU #Trick: to keep same time dimension dts=LU*0+dts #Trick: to keep same time dimension for key in lu_dictionary: #agrregate total fluxes per each lu class classes=lu_dictionary[key] dts_lu=xr.where(LU.isin(classes),dts,np.nan) #mask only lu class if how=='sum': df_lu=dts_lu.sum(dim=[ 'latitude', 'longitude' ]).to_dataframe() #sum of all pixels in lu class elif how=='mean': df_lu=dts_lu.mean(dim=[ 'latitude', 'longitude' ]).to_dataframe() #mean of all pixels in lu class if len(df_lu.columns)>1: df_lu.columns=['{0}-{1}'.format(key, col) for col in df_lu.columns] #rename column with variable else: df_lu.columns=['{0}'.format(key) for col in df_lu.columns] #rename column data.append(df_lu) #append data list by lu class df=pd.concat(data, axis=1) #merge all results into 1 dataframe return df
def aggregate_by_lu_unique(dts,LU,how='sum'): '''aggregate dataset by unique LU classes in LU map(s) ''' unique_LU=np.unique(LU) #get unique landuse classes unique_LU=unique_LU[~np.isnan(unique_LU)] #exclude nan data=[] #create empty data list LU=dts*0+LU #Trick: to keep same time dimension dts=LU*0+dts #Trick: to keep same time dimension for lucl in unique_LU: #agrregate total fluxes per each lu class dts_lu=xr.where(LU==lucl,dts,np.nan) #mask only lu class if how=='sum': df_lu=dts_lu.sum(dim=[ 'latitude', 'longitude' ]).to_dataframe() #sum of all pixels in lu class elif how=='mean': df_lu=dts_lu.mean(dim=[ 'latitude', 'longitude' ]).to_dataframe() #mean of all pixels in lu class if len(df_lu.columns)>1: df_lu.columns=['{0}-{1}'.format(lucl, col) for col in df_lu.columns] #rename column with variable else: df_lu.columns=['{0}'.format(lucl) for col in df_lu.columns] #rename column data.append(df_lu) #append data list by lu class df=pd.concat(data, axis=1) #merge all results into 1 dataframe return df
def putna(left, right, xar, scalar=None): ''' Put NaN in xarray according if they are laying in the interval `left,right` Parameters ========== left,right: Extremes of the interval where the values must be NaN xar : Xarray scalar : If set all entries not satisfying the condition are put equal to `scalar` Returns ======= Modified array ''' if scalar: out = scalar else: out = xar return xr.where((xar < right) & (xar > left), np.nan, out)
def test_divergence(od, varNameList): # Add units if None not in varNameList: for varName in varNameList: od._ds[varName].attrs["units"] = "m/s" # Compute divergence dive_ds = divergence(od, iName=varNameList[0], jName=varNameList[1], kName=varNameList[2]) # sin' = cos for varName in varNameList: if varName is not None: axis = varName[-1] diveName = "d" + varName + "_" + "d" + axis var = dive_ds[diveName] coords = {coord[0]: var[coord] for coord in var.coords} coords["Z"], coords["Y"], coords["X"] = xr.broadcast( coords["Z"], coords["Y"], coords["X"]) check = np.cos(coords[axis]) mask = xr.where(np.logical_or(check.isnull(), var.isnull()), 0, 1) # Assert using numpy var = var.where(mask, drop=True).values check = check.where(mask, drop=True).values assert_array_almost_equal(var, check, 1.0e-3)
def count_genotypes( ds: Dataset, dim: Dimension, call_genotype: Hashable = variables.call_genotype, call_genotype_mask: Hashable = variables.call_genotype_mask, merge: bool = True, ) -> Dataset: variables.validate( ds, { call_genotype_mask: variables.call_genotype_mask_spec, call_genotype: variables.call_genotype_spec, }, ) odim = _swap(dim)[:-1] M, G = ds[call_genotype_mask].any(dim="ploidy"), ds[call_genotype] n_hom_ref = (G == 0).all(dim="ploidy") n_hom_alt = ((G > 0) & (G[..., 0] == G)).all(dim="ploidy") n_non_ref = (G > 0).any(dim="ploidy") n_het = ~(n_hom_alt | n_hom_ref) # This would 0 out the `het` case with any missing calls agg = lambda x: xr.where(M, False, x).sum( dim=dim) # type: ignore[no-untyped-call] new_ds = create_dataset({ f"{odim}_n_het": agg(n_het), # type: ignore[no-untyped-call] f"{odim}_n_hom_ref": agg(n_hom_ref), # type: ignore[no-untyped-call] f"{odim}_n_hom_alt": agg(n_hom_alt), # type: ignore[no-untyped-call] f"{odim}_n_non_ref": agg(n_non_ref), # type: ignore[no-untyped-call] }) return conditional_merge_datasets(ds, new_ds, merge)
def _create_and_log_figures(self, results: dict, experiment_logger: Logger, epoch: int): basins = list(results.keys()) random.shuffle(basins) for target_var in self.cfg.target_variables: max_figures = min(self.cfg.validate_n_random_basins, self.cfg.log_n_figures, len(basins)) for freq in results[basins[0]].keys(): figures = [] for i in range(max_figures): xr = results[basins[i]][freq]['xr'] obs = xr[f"{target_var}_obs"].values sim = xr[f"{target_var}_sim"].values # clip negative predictions to zero, if variable is listed in config 'clip_target_to_zero' if target_var in self.cfg.clip_targets_to_zero: sim = xarray.where(sim < 0, 0, sim) figures.append( self._get_plots( obs, sim, title= f"{target_var} - Basin {basins[i]} - Epoch {epoch} - Frequency {freq}" )[0]) # make sure the preamble is a valid file name experiment_logger.log_figures(figures, freq, preamble=re.sub( r"[^A-Za-z0-9\._\-]+", "", target_var))
def mask_lake(data_dir, shp, testname, domain): geo_path = f'/home/zzhzhao/Model/tests/{testname}/WPS/geo_em.d{domain:0>2d}.nc' lu = salem.open_wrf_dataset(os.path.join( data_dir, geo_path))['LU_INDEX'].isel(time=0) lu_lake = lu.salem.roi(shape=shp) mask = xr.where(lu_lake.notnull(), True, False) return mask
def save_cmip_mean(models, p, experiment, y1, y2, cmipstr, subset_y1, subset_y2): ProgressBar().register() era5_data = xr.open_dataset( "/g/data/eg3/ab4502/ExtremeWind/aus/regrid_1.5/ERA5__mean_lr36_historical_1979_2005.nc" ) out_hist = load_model_data(models, p, lsm=False,\ force_cmip_regrid=True,\ experiment=experiment, era5_y1=y1, era5_y2=y2,\ y1=y1, y2=y2, save=False, era5_data=era5_data) print("Computing mean/median for " + p + "...") out_means = [] for i in np.arange(len(models)): sub = out_hist[i].sel({ "time": (out_hist[i]["time.year"] >= subset_y1) & (out_hist[i]["time.year"] <= subset_y2) }) sub = xr.where(np.isinf(sub), np.nan, sub) out_means.append(sub.mean("time", skipna=True).values) for i in np.arange(len(out_means)): plt.contourf(out_means[i]) plt.colorbar() plt.savefig("/g/data/eg3/ab4502/figs/CMIP/" + experiment + "/" + models[i][0] + "_" + p + ".png") plt.close() mean = np.mean(np.stack(out_means), axis=0) print("Saving...") xr.Dataset(data_vars={p:(("lat","lon"), mean)}, \ coords={"lat":out_hist[0].lat.values, "lon":out_hist[0].lon.values}).\ to_netcdf("/g/data/eg3/ab4502/ExtremeWind/aus/regrid_1.5/"+p+\ "_"+str(subset_y1)+"_"+str(subset_y2)+"_ensemble_mean_"+cmipstr+".nc",\ engine="h5netcdf")
def run_length_with_date( da: xr.DataArray, window: int, date: str = "07-01", dim: str = "time", ) -> xr.DataArray: """Return the length of the longest consecutive run of True values found to be semi-continuous before and after a given date. Parameters ---------- da : xr.DataArray Input N-dimensional DataArray (boolean) window : int Minimum duration of consecutive run to accumulate values. date: The date that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). Returns ------- xr.DataArray Length of longest run of True values along a given dimension inclusive of a given date. Notes ----- The run can include holes of False or NaN values, so long as they do not exceed the window size. """ include_date = datetime.strptime(date, "%m-%d").timetuple().tm_yday mid_index = np.where(da.time.dt.dayofyear == include_date)[0] if mid_index.size == 0: # The date is not within the group. Happens at boundaries. return xr.full_like(da.isel(time=0), np.nan, float).drop_vars("time") end = first_run( (~da).where(da.time >= da.time[mid_index][0]), window=window, dim=dim, ) beg = first_run(da, window=window, dim=dim) sl = end - beg sl = xr.where(beg.isnull() & end.notnull(), 0, sl) # If series is never triggered sl = xr.where( beg.notnull() & end.isnull(), da.time.size - beg, sl) # If series is not ended by end of resample time frequency return sl.where(sl >= 0)
def calc_correlation_field(xda, mask, dimlist=['Z', 'YC'], n_shift=15, mask_in_betweens=False): """calculate the correlation field for each shifted distance Parameters ---------- xda : xarray.DataArray The field to compute correlations on, over the 'sample' dimension mask : xarra.DataArray True/False inside/outside of domain dimlist : list of str denoting dimensions to compute shifted correlations n_shift : int number of shifts to do mask_in_betweens : bool, optional if True, then if there is a portion of the domain such that for a particular dimension, there is a gap between two points, ignore all points with larger correlation length than where the gap occurs doesn't affect results much """ xds = xr.Dataset() shifty = np.arange(-n_shift, n_shift + 1) shifty = xr.DataArray(shifty, coords={'shifty': shifty}, dims=('shifty', )) xds['shifty'] = shifty for dim in dimlist: corrfld = f'corr_{dim.lower()}' template = xda.isel(sample=0).drop('sample') xds[corrfld] = xr.zeros_like(shifty * template) x_deviation = (xda - xda.mean('sample')).where(mask) x_ssr = np.sqrt((x_deviation**2).sum('sample')) for s in shifty.values: y_deviation = x_deviation.shift({dim: s}) numerator = (x_deviation * y_deviation).sum('sample') y_ssr = np.sqrt((y_deviation**2).sum('sample')) denominator = x_ssr * y_ssr xds[corrfld].loc[{'shifty': s}] = numerator / denominator if mask_in_betweens: for dim in dimlist: corrfld = f'corr_{dim.lower()}' for s in shifty.values: if s < 0: bigger_than = shifty < s else: bigger_than = shifty > s imnan = np.isnan(xds[corrfld].sel(shifty=s)) xds[corrfld] = xr.where(bigger_than * imnan, np.nan, xds[corrfld]) return xds
def latitude_temperature_index( tas: xarray.DataArray, lat: xarray.DataArray, lat_factor: float = 75, freq: str = "YS", ) -> xarray.DataArray: """Latitude-Temperature Index. Mean temperature of the warmest month with a latitude-based scaling factor. Used for categorizing winegrowing regions. Parameters ---------- tas: xarray.DataArray Mean daily temperature. lat: xarray.DataArray Latitude coordinate. lat_factor: float Latitude factor. Maximum poleward latitude. Default: 75. freq : str Resampling frequency. Returns ------- xarray.DataArray, [unitless] Latitude Temperature Index. Notes ----- The latitude factor of `75` is provided for examining the poleward expansion of winegrowing climates under scenarios of climate change. For comparing 20th century/observed historical records, the original scale factor of `60` is more appropriate. Let :math:`Tn_{j}` be the average temperature for a given month :math:`j`, :math:`lat_{f}` be the latitude factor, and :math:`lat` be the latitude of the area of interest. Then the Latitude-Temperature Index (:math:`LTI`) is: .. math:: LTI = max(TN_{j}: j = 1..12)(lat_f - |lat|) References ---------- Indice originally published in Jackson, D. I., & Cherry, N. J. (1988). Prediction of a District’s Grape-Ripening Capacity Using a Latitude-Temperature Index (LTI). American Journal of Enology and Viticulture, 39(1), 19‑28. Modified latitude factor from Kenny, G. J., & Shao, J. (1992). An assessment of a latitude-temperature index for predicting climate suitability for grapes in Europe. Journal of Horticultural Science, 67(2), 239‑246. https://doi.org/10.1080/00221589.1992.11516243 """ tas = convert_units_to(tas, "degC") tas = tas.resample(time="MS").mean(dim="time") mtwm = tas.resample(time=freq).max(dim="time") lat_mask = (abs(lat) >= 0) & (abs(lat) <= lat_factor) lat_coeff = xarray.where(lat_mask, lat_factor - abs(lat), 0) lti = mtwm * lat_coeff lti.attrs["units"] = "" return lti
def test_poles_datum(self): import xarray as xr h5f = h5py.File(FILENAME_DATA, 'r') orig_lon = to_da(h5f['lon_1km']) lon1 = orig_lon + 180 lon1 = xr.where(lon1 > 180, lon1 - 360, lon1) lat1 = to_da(h5f['lat_1km']) satz1 = to_da(h5f['satz_1km']) lat5 = lat1[2::5, 2::5] lon5 = lon1[2::5, 2::5] satz5 = satz1[2::5, 2::5] lons, lats = modis_5km_to_1km(lon5, lat5, satz5) lons = lons + 180 lons = xr.where(lons > 180, lons - 360, lons) self.assertTrue(np.allclose(orig_lon, lons, atol=1e-2)) self.assertTrue(np.allclose(lat1, lats, atol=1e-2))
def contour_plot(data, threshold = None, contourLevels = None, colbar = True, logscale = False, aspectration='equal', units=None): """ contourf ajusted for the xarray PIV dataset, creates a contour map for the data['w'] property. Input: data : xarray PIV DataArray, converted automatically using .isel(t=0) threshold : a threshold value, default is None (no data clipping) contourLevels : number of contour levels, default is None colbar : boolean (default is True) show/hide colorbar logscale : boolean (True is default) create in linear/log scale aspectration : string, 'equal' is the default """ data = dataset_to_array(data) if units is not None: lUnits = units[0] # ['m' 'm' 'mm/s' 'mm/s'] # velUnits = units[2] # tUnits = velUnits.split('/')[1] # make it 's' or 'dt' else: # lUnits, velUnits = '', '' lUnits = '' f,ax = plt.subplots() if threshold is not None: data['w'] = xr.where(data['w']>threshold, threshold, data['w']) m = np.amax(abs(data['w'])) if contourLevels == None: levels = np.linspace(-m, m, 30) else: levels = np.linspace(-contourLevels, contourLevels, 30) if logscale: c = ax.contourf(data.x,data.y,np.abs(data['w']), levels=levels, cmap = plt.get_cmap('RdYlBu'), norm=plt.colors.LogNorm()) else: c = ax.contourf(data.x,data.y,data['w'], levels=levels, cmap = plt.get_cmap('RdYlBu')) plt.xlabel('x [' + lUnits + ']') plt.ylabel('y [' + lUnits + ']') if colbar: cbar = plt.colorbar(c) cbar.set_label(r'$\omega$ [s$^{-1}$]') ax.set_aspect(aspectration) return f,ax
def test_where(): cond = xr.DataArray([True, False], dims='x') actual = xr.where(cond, 1, 0) expected = xr.DataArray([1, 0], dims='x') assert_identical(expected, actual)
def daily_insolation(lat, day, orb=const.orb_present, S0=const.S0, day_type=1): """Compute daily average insolation given latitude, time of year and orbital parameters. Orbital parameters can be interpolated to any time in the last 5 Myears with ``climlab.solar.orbital.OrbitalTable`` (see example above). Longer orbital tables are available with ``climlab.solar.orbital.LongOrbitalTable`` Inputs can be scalar, ``numpy.ndarray``, or ``xarray.DataArray``. The return value will be ``numpy.ndarray`` if **all** the inputs are ``numpy``. Otherwise ``xarray.DataArray``. **Function-call argument** \n :param array lat: Latitude in degrees (-90 to 90). :param array day: Indicator of time of year. See argument ``day_type`` for details about format. :param dict orb: a dictionary with three members (as provided by ``climlab.solar.orbital.OrbitalTable``) * ``'ecc'`` - eccentricity * unit: dimensionless * default value: ``0.017236`` * ``'long_peri'`` - longitude of perihelion (precession angle) * unit: degrees * default value: ``281.37`` * ``'obliquity'`` - obliquity angle * unit: degrees * default value: ``23.446`` :param float S0: solar constant \n - unit: :math:`\\textrm{W}/\\textrm{m}^2` \n - default value: ``1365.2`` :param int day_type: Convention for specifying time of year (+/- 1,2) [optional]. *day_type=1* (default): day input is calendar day (1-365.24), where day 1 is January first. The calendar is referenced to the vernal equinox which always occurs at day 80. *day_type=2:* day input is solar longitude (0-360 degrees). Solar longitude is the angle of the Earth's orbit measured from spring equinox (21 March). Note that calendar days and solar longitude are not linearly related because, by Kepler's Second Law, Earth's angular velocity varies according to its distance from the sun. :raises: :exc:`ValueError` if day_type is neither 1 nor 2 :returns: Daily average solar radiation in unit :math:`\\textrm{W}/\\textrm{m}^2`. Dimensions of output are ``(lat.size, day.size, ecc.size)`` :rtype: array Code is fully vectorized to handle array input for all arguments. \n Orbital arguments should all have the same sizes. This is automatic if computed from :func:`~climlab.solar.orbital.OrbitalTable.lookup_parameters` For more information about computation of solar insolation see the :ref:`Tutorial` chapter. """ # Inputs can be scalar, numpy vector, or xarray.DataArray. # If numpy, convert to xarray so that it will broadcast correctly lat_is_xarray = True day_is_xarray = True if type(lat) is np.ndarray: lat_is_xarray = False lat = xr.DataArray(lat, coords=[lat], dims=['lat']) if type(day) is np.ndarray: day_is_xarray = False day = xr.DataArray(day, coords=[day], dims=['day']) ecc = orb['ecc'] long_peri = orb['long_peri'] obliquity = orb['obliquity'] # Convert precession angle and latitude to radians phi = deg2rad( lat ) # lambda_long (solar longitude) is the angular distance along Earth's orbit measured from spring equinox (21 March) if day_type==1: # calendar days lambda_long = solar_longitude(day,orb) elif day_type==2: #solar longitude (1-360) is specified in input, no need to convert days to longitude lambda_long = deg2rad(day) else: raise ValueError('Invalid day_type.') # Compute declination angle of the sun delta = arcsin(sin(deg2rad(obliquity)) * sin(lambda_long)) # suppress warning message generated by arccos here! oldsettings = np.seterr(invalid='ignore') # Compute Ho, the hour angle at sunrise / sunset # Check for no sunrise or no sunset: Berger 1978 eqn (8),(9) Ho = xr.where( abs(delta)-pi/2+abs(phi) < 0., # there is sunset/sunrise arccos(-tan(phi)*tan(delta)), # otherwise figure out if it's all night or all day xr.where(phi*delta>0., pi, 0.) ) # this is not really the daily average cosine of the zenith angle... # it's the integral from sunrise to sunset of that quantity... coszen = Ho*sin(phi)*sin(delta) + cos(phi)*cos(delta)*sin(Ho) # Compute insolation: Berger 1978 eq (10) Fsw = S0/pi*( (1+ecc*cos(lambda_long -deg2rad(long_peri)))**2 / (1-ecc**2)**2 * coszen) if not (lat_is_xarray or day_is_xarray): # Dimensional ordering consistent with previous numpy code return Fsw.transpose().values else: return Fsw
def quiver(data, arrScale = 25.0, threshold = None, nthArr = 1, contourLevels = None, colbar = True, logscale = False, aspectratio='equal', colbar_orient = 'vertical', units = None): """ Generates a quiver plot of a 'data' xarray DataArray object (single frame from a dataset) Inputs: data - xarray DataArray of the type defined in pivpy, one of the frames in the Dataset selected by default using .isel(t=0) threshold - values above the threshold will be set equal to threshold arrScale - use to change arrow scales nthArr - use to plot only every nth arrow from the array contourLevels - use to specify the maximum value (abs) of contour plots colbar - True/False wether to generate a colorbar or not logscale - if true then colorbar is on log scale aspectratio - set auto or equal for the plot's apearence colbar_orient - 'horizontal' or 'vertical' orientation of the colorbar (if colbar is True) Outputs: none Usage: graphics.quiver(data, arrScale = 0.2, threshold = Inf, n) """ data = dataset_to_array(data) x = data.x y = data.y u = data.u v = data.v if units is not None: lUnits = units[0] # ['m' 'm' 'mm/s' 'mm/s'] velUnits = units[2] tUnits = velUnits.split('/')[1] # make it 's' or 'dt' else: lUnits, velUnits, tUnits = '', '', '' if threshold is not None: data['u'] = xr.where(data['u']>threshold, threshold, data['u']) data['v'] = xr.where(data['v']>threshold, threshold, data['v']) S = np.array(np.sqrt(u**2 + v**2)) fig = plt.get_fignums() if len(fig) == 0: # if no figure is open fig, ax = plt.subplots() # open a new figure else: ax = plt.gca() if contourLevels is None: levels = np.linspace(0, np.max(S.flatten()), 30) # default contour levels up to max of S else: levels = np.linspace(0, contourLevels, 30) if logscale: c = ax.contourf(x,y,S,alpha=0.8, cmap = plt.get_cmap("Blues"), levels = levels, norm = plt.colors.LogNorm()) else: c = ax.contourf(x,y,S,alpha=0.8, cmap = plt.get_cmap("Blues"), levels=levels) if colbar: cbar = plt.colorbar(c, orientation=colbar_orient) cbar.set_label(r'$\left| \, V \, \right|$ ['+ lUnits +' $\cdot$ '+ tUnits +'$^{-1}$]') ax.quiver(x[::nthArr],y[::nthArr], u[::nthArr,::nthArr],v[::nthArr,::nthArr],units='width', scale = np.max(S*arrScale),headwidth=2) ax.set_xlabel('x (' + lUnits + ')') ax.set_ylabel('y (' + lUnits + ')') ax.set_aspect(aspectratio) return fig,ax
def read_dataset(self, dataset_key, info): h5f = self.h5f channel = chans_dict[dataset_key.name] chan_dict = dict([(key.split("-")[1], key) for key in h5f["All_Data"].keys() if key.startswith("VIIRS")]) h5rads = h5f["All_Data"][chan_dict[channel]]["Radiance"] chunks = h5rads.chunks or CHUNK_SIZE rads = xr.DataArray(da.from_array(h5rads, chunks=chunks), name=dataset_key.name, dims=['y', 'x']).astype(np.float32) h5attrs = h5rads.attrs # scans = h5f["All_Data"]["NumberOfScans"][0] # if channel in ("M9", ): # arr = rads[:scans * 16, :].astype(np.float32) # arr[arr > 65526] = np.nan # arr = np.ma.masked_array(arr, mask=arr_mask) # else: # arr = np.ma.masked_greater(rads[:scans * 16, :].astype(np.float32), # 65526) rads = rads.where(rads <= 65526) try: rads = xr.where(rads <= h5attrs['Threshold'], rads * h5attrs['RadianceScaleLow'] + h5attrs['RadianceOffsetLow'], rads * h5attrs['RadianceScaleHigh'] + h5attrs['RadianceOffsetHigh']) except (KeyError, AttributeError): logger.info("Missing attribute for scaling of %s.", channel) pass unit = "W m-2 sr-1 μm-1" if dataset_key.calibration == 'counts': raise NotImplementedError("Can't get counts from this data") if dataset_key.calibration in ['reflectance', 'brightness_temperature']: # do calibrate try: # First guess: VIS or NIR data a_vis = h5attrs['EquivalentWidth'] b_vis = h5attrs['IntegratedSolarIrradiance'] dse = h5attrs['EarthSunDistanceNormalised'] rads *= 100 * np.pi * a_vis / b_vis * (dse**2) unit = "%" except KeyError: # Maybe it's IR data? try: a_ir = h5attrs['BandCorrectionCoefficientA'] b_ir = h5attrs['BandCorrectionCoefficientB'] lambda_c = h5attrs['CentralWaveLength'] rads *= 1e6 rads = (h * c) / (k * lambda_c * xu.log(1 + (2 * h * c ** 2) / ((lambda_c ** 5) * rads))) rads *= a_ir rads += b_ir unit = "K" except KeyError: logger.warning("Calibration failed.") elif dataset_key.calibration != 'radiance': raise ValueError("Calibration parameter should be radiance, " "reflectance or brightness_temperature") rads = rads.clip(min=0) rads.attrs = self.mda rads.attrs['units'] = unit return rads