def absolute_momentum(u_wind, v_wind, index='index'): r"""Calculate cross-sectional absolute momentum (also called pseudoangular momentum). As given in [Schultz1999]_, absolute momentum (also called pseudoangular momentum) is given by .. math:: M = v + fx where :math:`v` is the along-front component of the wind and :math:`x` is the cross-front distance. Applied to a cross-section taken perpendicular to the front, :math:`v` becomes the normal component of the wind and :math:`x` the tangential distance. If using this calculation in assessing symmetric instability, geostrophic wind should be used so that geostrophic absolute momentum :math:`\left(M_g\right)` is obtained, as described in [Schultz1999]_. Parameters ---------- u_wind : `xarray.DataArray` The input DataArray of the x-component (in terms of data projection) of the wind. v_wind : `xarray.DataArray` The input DataArray of the y-component (in terms of data projection) of the wind. Returns ------- absolute_momentum: `xarray.DataArray` The absolute momentum Notes ----- The coordinates of `u_wind` and `v_wind` must match. """ # Get the normal component of the wind norm_wind = normal_component(u_wind, v_wind, index=index) norm_wind.metpy.convert_units('m/s') # Get other pieces of calculation (all as ndarrays matching shape of norm_wind) latitude = latitude_from_cross_section(norm_wind) # in degrees_north _, latitude = xr.broadcast(norm_wind, latitude) f = coriolis_parameter(np.deg2rad(latitude.values)).magnitude # in 1/s x, y = distances_from_cross_section(norm_wind) x.metpy.convert_units('meters') y.metpy.convert_units('meters') _, x, y = xr.broadcast(norm_wind, x, y) distance = np.hypot(x, y).values # in meters m = norm_wind + f * distance m.attrs = {'units': norm_wind.attrs['units']} return m
def _get_dates_for_extremes(extr_vals: xarray.DataArray, current_data_chunk: xarray.DataArray, extr_dates: xarray.DataArray = None): """ Helper method to determine the times when the extreme values are occurring :param extr_vals: :param current_data_chunk: :param result_dates: """ t3d, _ = xarray.broadcast(current_data_chunk.t, current_data_chunk) if extr_dates is None: result_dates = t3d[0, :, :].copy() else: result_dates = extr_dates tis, xis, yis = np.where(extr_vals == current_data_chunk) npvals = t3d.values # for ti, xi, yi in zip(tis, xis, yis): # result_dates[xi, yi] = npvals[ti, xi, yi] result_dates.values[xis, yis] = npvals[tis, xis, yis] # debug return result_dates
def data_for_reg_calcs(values_for_reg_arr): lat = [-10., 1., 10., 20.] lon = [1., 10.] sfc_area = [0.5, 1., 0.5, 0.25] land_mask = [1., 1., 0., 1.] lat = xr.DataArray(lat, dims=[LAT_STR], coords=[lat]) lon = xr.DataArray(lon, dims=[LON_STR], coords=[lon]) sfc_area = xr.DataArray(sfc_area, dims=[LAT_STR], coords=[lat]) land_mask = xr.DataArray(land_mask, dims=[LAT_STR], coords=[lat]) sfc_area, _ = xr.broadcast(sfc_area, lon) land_mask, _ = xr.broadcast(land_mask, lon) da = xr.DataArray(values_for_reg_arr, coords=[lat, lon]) da.coords[SFC_AREA_STR] = sfc_area da.coords[LAND_MASK_STR] = land_mask return da
def test_laplacian_xarray_lonlat(test_da_lonlat): """Test laplacian with an xarray.DataArray on a lonlat grid.""" laplac = laplacian(test_da_lonlat, axes=('lat', 'lon')) # Build the xarray of the desired values partial = xr.DataArray( np.array([1.67155420e-14, 1.67155420e-14, 1.74268211e-14, 1.74268211e-14]), coords=(('lat', test_da_lonlat['lat']),) ) _, truth = xr.broadcast(test_da_lonlat, partial) truth.coords['crs'] = test_da_lonlat['crs'] truth.attrs['units'] = 'kelvin / meter^2' xr.testing.assert_allclose(laplac, truth) assert laplac.metpy.units == truth.metpy.units
def test_second_derivative_xarray_lonlat(test_da_lonlat): """Test second derivative with an xarray.DataArray on a lonlat grid.""" deriv = second_derivative(test_da_lonlat, axis='lat') # Build the xarray of the desired values partial = xr.DataArray( np.array([1.67155420e-14, 1.67155420e-14, 1.74268211e-14, 1.74268211e-14]), coords=(('lat', test_da_lonlat['lat']),) ) _, truth = xr.broadcast(test_da_lonlat, partial) truth.coords['crs'] = test_da_lonlat['crs'] truth.attrs['units'] = 'kelvin / meter^2' xr.testing.assert_allclose(deriv, truth) assert deriv.metpy.units == truth.metpy.units
def test_first_derivative_xarray_lonlat(test_da_lonlat): """Test first derivative with an xarray.DataArray on a lonlat grid in each axis usage.""" deriv = first_derivative(test_da_lonlat, axis='lon') # dimension coordinate name deriv_alt1 = first_derivative(test_da_lonlat, axis='x') # axis type deriv_alt2 = first_derivative(test_da_lonlat, axis=-1) # axis number # Build the xarray of the desired values partial = xr.DataArray( np.array([-3.30782978e-06, -3.42816074e-06, -3.57012948e-06, -3.73759364e-06]), coords=(('lat', test_da_lonlat['lat']),) ) _, truth = xr.broadcast(test_da_lonlat, partial) truth.coords['crs'] = test_da_lonlat['crs'] truth.attrs['units'] = 'kelvin / meter' # Assert result matches expectation xr.testing.assert_allclose(deriv, truth) assert deriv.metpy.units == truth.metpy.units # Assert alternative specifications give same result xr.testing.assert_identical(deriv_alt1, deriv) xr.testing.assert_identical(deriv_alt2, deriv)
def test_gradient_xarray(test_da_xy): """Test the 3D gradient calculation with a 4D DataArray in each axis usage.""" deriv_x, deriv_y, deriv_p = gradient(test_da_xy, axes=('x', 'y', 'isobaric')) deriv_x_alt1, deriv_y_alt1, deriv_p_alt1 = gradient(test_da_xy, axes=('x', 'y', 'vertical')) deriv_x_alt2, deriv_y_alt2, deriv_p_alt2 = gradient(test_da_xy, axes=(3, 2, 1)) truth_x = xr.full_like(test_da_xy, -6.993007e-07) truth_x.attrs['units'] = 'kelvin / meter' truth_y = xr.full_like(test_da_xy, -2.797203e-06) truth_y.attrs['units'] = 'kelvin / meter' partial = xr.DataArray( np.array([0.04129204, 0.03330003, 0.02264402]), coords=(('isobaric', test_da_xy['isobaric']),) ) _, truth_p = xr.broadcast(test_da_xy, partial) truth_p.coords['crs'] = test_da_xy['crs'] truth_p.attrs['units'] = 'kelvin / hectopascal' # Assert results match expectations xr.testing.assert_allclose(deriv_x, truth_x) assert deriv_x.metpy.units == truth_x.metpy.units xr.testing.assert_allclose(deriv_y, truth_y) assert deriv_y.metpy.units == truth_y.metpy.units xr.testing.assert_allclose(deriv_p, truth_p) assert deriv_p.metpy.units == truth_p.metpy.units # Assert alternative specifications give same results xr.testing.assert_identical(deriv_x_alt1, deriv_x) xr.testing.assert_identical(deriv_y_alt1, deriv_y) xr.testing.assert_identical(deriv_p_alt1, deriv_p) xr.testing.assert_identical(deriv_x_alt2, deriv_x) xr.testing.assert_identical(deriv_y_alt2, deriv_y) xr.testing.assert_identical(deriv_p_alt2, deriv_p)
def noisePower(h, p0): return p0 * h**2 filesW = glob( '/data/data_hatpro/jue/data/joyrad94/l0/201511/2*/joyrad94_joyce_2015112*.nc' ) for fw in filesW: print(fw) ncfile = xr.open_dataset(fw, drop_variables='velocity') Ze = ncfile['Ze'] Ze = Ze.where(Ze != -999.0) Zlin = 10.0**(0.1 * Ze) t, r = xr.broadcast(Ze.time, Ze.range) df = pd.DataFrame() df['Hgt'] = r.data.flatten() df['Ze'] = Ze.data.flatten() spec = ncfile.spec spec = spec.where(spec != -999.0) speclin = 10.0**(0.1 * spec) Zg = speclin.sum(dim='velocity', skipna=True) N = 10.0 * np.log10(Zg - Zlin).data.flatten() N[~np.isfinite(N)] = np.nan df['N'] = N df.dropna(inplace=True, subset=['Ze']) df.to_hdf('joyrad94snr.h5', key='stat', mode='a', append=True) print('done') df = pd.read_hdf('joyrad94snr.h5', key='stat')
def rle(da: xr.DataArray, dim: str = "time", max_chunk: int = 1_000_000) -> xr.DataArray: """Generate basic run length function. Parameters ---------- da : xr.DataArray dim : str max_chunk : int Returns ------- xr.DataArray Values are 0 where da is False (out of runs), are N on the first day of a run, where N is the length of that run, and are NaN on the other days of the runs. """ use_dask = isinstance(da.data, dsk.Array) n = len(da[dim]) # Need to chunk here to ensure the broadcasting is not made in memory i = xr.DataArray(np.arange(da[dim].size), dims=dim) if use_dask: i = i.chunk({dim: -1}) ind, da = xr.broadcast(i, da) if use_dask: # Rechunk, but with broadcasted da ind = ind.chunk(da.chunks) b = ind.where(~da) # find indexes where false end1 = (da.where(b[dim] == b[dim][-1], drop=True) * 0 + n ) # add additional end value index (deal with end cases) start1 = (da.where(b[dim] == b[dim][0], drop=True) * 0 - 1 ) # add additional start index (deal with end cases) b = xr.concat([start1, b, end1], dim) # Ensure bfill operates on entire (unchunked) time dimension # Determine appropraite chunk size for other dims - do not exceed 'max_chunk' total size per chunk (default 1000000) ndims = len(b.shape) if use_dask: chunk_dim = b[dim].size # divide extra dims into equal size # Note : even if calculated chunksize > dim.size result will have chunk==dim.size chunksize_ex_dims = None # TODO: This raises type assignment errors in mypy if ndims > 1: chunksize_ex_dims = np.round( np.power(max_chunk / chunk_dim, 1 / (ndims - 1))) chunks = dict() chunks[dim] = -1 for dd in b.dims: if dd != dim: chunks[dd] = chunksize_ex_dims b = b.chunk(chunks) # back fill nans with first position after z = b.bfill(dim=dim) # calculate lengths d = z.diff(dim=dim) - 1 d = d.where(d >= 0) d = d.isel({dim: slice(None, -1)}).where(da, 0) return d
def r2(a, b, dim=None, weights=None, skipna=False, keep_attrs=False): """R^2 (coefficient of determination) score. We first take the total sum of squares of our known vector, a. .. math:: SS_{\\mathrm{tot}} = \\sum_{i=1}^{n} (a_{i} - \\bar{a})^{2} Next, we take the sum of squares of the error between our known vector a and the predicted vector, b. .. math:: SS_{\\mathrm{res}} = \\sum_{i=1}^{n} (a_{i} - b_{i})^{2} Lastly we compute the coefficient of determiniation using these two terms. .. math:: R^{2} = 1 - \\frac{SS_{\\mathrm{res}}}{SS_{\\mathrm{tot}}} .. note:: The coefficient of determination is *not* symmetric. In other words, ``r2(a, b) != r2(b, a)``. Be careful and note that by our convention, ``b`` is the modeled/predicted vector and ``a`` is the observed vector. Parameters ---------- a : xarray.Dataset or xarray.DataArray Labeled array(s) over which to apply the function. b : xarray.Dataset or xarray.DataArray Labeled array(s) over which to apply the function. dim : str, list The dimension(s) to apply the correlation along. Note that this dimension will be reduced as a result. Defaults to None reducing all dimensions. weights : xarray.Dataset or xarray.DataArray or None Weights matching dimensions of ``dim`` to apply during the function. skipna : bool If True, skip NaNs when computing function. keep_attrs : bool If True, the attributes (attrs) will be copied from the first input to the new one. If False (default), the new object will be returned without attributes. Returns ------- xarray.DataArray or xarray.Dataset R^2 (coefficient of determination) score. See Also -------- sklearn.metrics.r2_score References ---------- https://en.wikipedia.org/wiki/Coefficient_of_determination Examples -------- >>> import numpy as np >>> import xarray as xr >>> from xskillscore import r2 >>> a = xr.DataArray(np.random.rand(5, 3, 3), dims=['time', 'x', 'y']) >>> b = xr.DataArray(np.random.rand(5, 3, 3), dims=['time', 'x', 'y']) >>> r2(a, b, dim='time') """ _fail_if_dim_empty(dim) dim, _ = _preprocess_dims(dim, a) a, b = xr.broadcast(a, b, exclude=dim) a, b, new_dim, weights = _stack_input_if_needed(a, b, dim, weights) weights = _preprocess_weights(a, dim, new_dim, weights) input_core_dims = _determine_input_core_dims(new_dim, weights) return xr.apply_ufunc( _r2, a, b, weights, input_core_dims=input_core_dims, kwargs={ "axis": -1, "skipna": skipna }, dask="parallelized", output_dtypes=[float], keep_attrs=keep_attrs, )
def reproject_xy_to_wgs84( src_dataset: xr.Dataset, src_xy_var_names: Tuple[str, str], src_xy_tp_var_names: Tuple[str, str] = None, src_xy_crs: str = None, src_xy_gcp_step: Union[int, Tuple[int, int]] = 10, src_xy_tp_gcp_step: Union[int, Tuple[int, int]] = 1, dst_size: Tuple[int, int] = None, dst_region: CoordRange = None, dst_resampling: Union[str, Dict[str, str]] = DEFAULT_RESAMPLING, include_xy_vars: bool = False, include_non_spatial_vars: bool = False) -> xr.Dataset: """ Reprojection of xarray datasets with 2D geo-coding, e.g. with variables lon(y,x), lat(y, x) to EPSG:4326 (WGS-84) coordinate reference system. If *dst_resampling* is a string, it provides the default resampling for all variables. If *dst_resampling* is a dictionary, it provides a mapping from variable names to the desired resampling for that variable. The resampling may be one of the following up-sampling algorithms: * ``Nearest`` * ``Bilinear`` * ``Cubic`` * ``CubicSpline`` * ``Lanczos`` Or one of the down-sampling algorithms: * ``Average`` * ``Min`` * ``Max`` * ``Median`` * ``Mode`` * ``Q1`` * ``Q3`` :param src_dataset: :param src_xy_var_names: :param src_xy_tp_var_names: :param src_xy_crs: :param src_xy_gcp_step: :param src_xy_tp_gcp_step: :param dst_size: :param dst_region: :param dst_resampling: The spatial resampling algorithm. Either a string that provides the default resampling algorithm name or a dictionary that maps variable names to per-variable resampling algorithm names. :param include_non_spatial_vars: :param include_xy_vars: Whether to include the variables given by *src_xy_var_names*. Useful for projection-validation. :return: the reprojected dataset """ x_name, y_name = src_xy_var_names tp_x_name, tp_y_name = src_xy_tp_var_names or (None, None) # Set defaults src_xy_crs = src_xy_crs or CRS_WKT_EPSG_4326 gcp_i_step, gcp_j_step = (src_xy_gcp_step, src_xy_gcp_step) if isinstance(src_xy_gcp_step, int) \ else src_xy_gcp_step tp_gcp_i_step, tp_gcp_j_step = (src_xy_tp_gcp_step, src_xy_tp_gcp_step) if src_xy_tp_gcp_step is None or isinstance( src_xy_tp_gcp_step, int) \ else src_xy_tp_gcp_step dst_width, dst_height = dst_size _assert(src_dataset is not None) _assert(dst_width > 1) _assert(dst_height > 1) _assert(gcp_i_step > 0) _assert(gcp_j_step > 0) _assert(x_name in src_dataset) _assert(y_name in src_dataset) x_var = src_dataset[x_name] y_var = src_dataset[y_name] if len(x_var.dims) == 1 and len(y_var.dims) == 1: y_var, x_var = xr.broadcast(y_var, x_var) _assert(len(x_var.dims) == 2) _assert(y_var.dims == x_var.dims) _assert(x_var.shape[-1] >= 2) _assert(x_var.shape[-2] >= 2) _assert(y_var.shape == x_var.shape) src_width = x_var.shape[-1] src_height = x_var.shape[-2] dst_region = _ensure_valid_region(dst_region, GLOBAL_GEO_EXTENT, x_var, y_var) dst_x1, dst_y1, dst_x2, dst_y2 = dst_region dst_res = max((dst_x2 - dst_x1) / dst_width, (dst_y2 - dst_y1) / dst_height) _assert(dst_res > 0) dst_geo_transform = (dst_x1, dst_res, 0.0, dst_y2, 0.0, -dst_res) # Extract GCPs from full-res lon/lat 2D variables gcps = _get_gcps(x_var, y_var, gcp_i_step, gcp_j_step) if tp_x_name and tp_y_name and tp_x_name in src_dataset and tp_y_name in src_dataset: # If there are tie-point variables in the src_dataset tp_x_var = src_dataset[tp_x_name] tp_y_var = src_dataset[tp_y_name] _assert(len(tp_x_var.shape) == 2) _assert(tp_x_var.shape == tp_y_var.shape) tp_width = tp_x_var.shape[-1] tp_height = tp_x_var.shape[-2] _assert(tp_gcp_i_step is not None and tp_gcp_i_step > 0) _assert(tp_gcp_j_step is not None and tp_gcp_j_step > 0) # Extract GCPs also from tie-point lon/lat 2D variables tp_gcps = _get_gcps(tp_x_var, tp_y_var, tp_gcp_i_step, tp_gcp_j_step) else: # No tie-point variables tp_x_var = None tp_width = None tp_height = None tp_gcps = None mem_driver = gdal.GetDriverByName("MEM") dst_x2 = dst_x1 + dst_res * dst_width dst_y1 = dst_y2 - dst_res * dst_height dst_dataset = _new_dst_dataset(dst_width, dst_height, dst_res, dst_x1, dst_y1, dst_x2, dst_y2) if dst_resampling is None: dst_resampling = {} if isinstance(dst_resampling, str): dst_resampling = { var_name: dst_resampling for var_name in src_dataset.variables } for var_name in src_dataset.variables: src_var = src_dataset[var_name] if src_var.dims == x_var.dims: is_tp_var = False if var_name == x_name or var_name == y_name: if not include_xy_vars: # Don't store lat and lon 2D vars in destination continue dst_var_name = 'src_' + var_name else: dst_var_name = var_name # PERF: collect variables of same type and size and set band_count accordingly to speed up reprojection band_count = 1 data_type = numpy_to_gdal_dtype(src_var.dtype) src_var_dataset = mem_driver.Create(f'src_{var_name}', src_width, src_height, band_count, data_type, []) src_var_dataset.SetGCPs(gcps, src_xy_crs) elif tp_x_var is not None and src_var.dims == tp_x_var.dims: is_tp_var = True if var_name == tp_x_name or var_name == tp_y_name: if not include_xy_vars: # Don't store lat and lon 2D vars in destination continue dst_var_name = 'src_' + var_name else: dst_var_name = var_name # PERF: collect variables of same type and size and set band_count accordingly to speed up reprojection band_count = 1 data_type = numpy_to_gdal_dtype(src_var.dtype) src_var_dataset = mem_driver.Create(f'src_{var_name}', tp_width, tp_height, band_count, data_type, []) src_var_dataset.SetGCPs(tp_gcps, src_xy_crs) elif include_non_spatial_vars: # Store any variable as-is, that does not have the lat/lon 2D dims, then continue dst_dataset[var_name] = src_var continue else: continue # We use GDT_Float64 to introduce NaN as no-data-value dst_data_type = gdal.GDT_Float64 dst_var_dataset = mem_driver.Create(f'dst_{var_name}', dst_width, dst_height, band_count, dst_data_type, []) dst_var_dataset.SetProjection(CRS_WKT_EPSG_4326) dst_var_dataset.SetGeoTransform(dst_geo_transform) # TODO (forman): PERFORMANCE: stack multiple variables of same src_data_type # to perform the reprojection only once per stack # TODO (forman): CODE-DUPLICATION: refactor out common code block in reproject_crs_to_wgs84() for band_index in range(1, band_count + 1): src_var_dataset.GetRasterBand(band_index).SetNoDataValue( float('nan')) src_var_dataset.GetRasterBand(band_index).WriteArray( src_var.values) dst_var_dataset.GetRasterBand(band_index).SetNoDataValue( float('nan')) resample_alg, resample_alg_name = _get_resample_alg( dst_resampling, var_name, default=DEFAULT_TP_RESAMPLING if is_tp_var else DEFAULT_RESAMPLING) warp_mem_limit = 0 error_threshold = 0 # See http://www.gdal.org/structGDALWarpOptions.html options = ['INIT_DEST=NO_DATA'] gdal.ReprojectImage( src_var_dataset, dst_var_dataset, None, None, resample_alg, warp_mem_limit, error_threshold, None, # callback, None, # callback_data, options) # options dst_values = dst_var_dataset.GetRasterBand(1).ReadAsArray() # print(var_name, dst_values.shape, np.nanmin(dst_values), np.nanmax(dst_values)) dst_dataset[dst_var_name] = _new_dst_variable(src_var, dst_values, resample_alg_name) return dst_dataset
def vertical_averaging_weights(self, time_slice=slice(None), ztop=None, zbottom=None, dz=None, face_slice=slice(None)): """ reimplementation of sunreader.Sunreader::averaging_weights returns: weights as array [faces,Nk] to average over a cell-centered quantity for the range specified by ztop,zbottom, and dz. range is specified by 2 of the 3 of ztop, zbottom, dz, all non-negative. ztop: dimensional distance from freesurface, zbottom: dimensional distance from bed dz: thickness if the result would be an empty region, return nans. cell_select: an object which can be used to index into the cell dimension defaults to all cells. this thing is slow! - lots of time in adjusting all_dz order of dimensions has been altered to match local suntans netcdf code, i.e. face,level,time """ mesh = self.nc[self.mesh_name] face_dim = self.face_dim if self.face_eta_vname is None: self.face_eta_vname = self.find_var( standard_name='sea_surface_height_above_geoid') assert self.face_eta_vname is not None, "Failed to discern eta variable" surface = self.face_eta_vname face_select = {face_dim: face_slice} h = self.nc[self.face_eta_vname].isel({ self.time_dim: time_slice, face_dim: face_slice }) if self.face_depth_vname is None: self.face_depth_vname = self.find_var( standard_name=[ "sea_floor_depth_below_geoid", "sea_floor_depth" ], location='face') # ala 'Mesh_depth' if self.face_depth_vname is None: # c'mon people -- should be fixed in source now, self.face_depth_vname = self.find_var( stanford_name=[ "sea_floor_depth_below_geoid", "sea_floor_depth" ], location='face') # ala 'Mesh_depth' depth = self.face_depth_vname assert depth is not None, "Failed to find depth variable" bed = self.nc[depth].isel(**face_select) if self.nc[depth].attrs.get('positive') == 'down': log.debug("Cell depth is positive-down") bed = -bed else: log.debug( "Cell depth is positive-up, or at least that is the assumption" ) h, bed = xr.broadcast(h, bed) # for now, can only handle an array of cells - i.e. if you want # a single face, it's still going to process an array, just with # length 1. Ncells = len(bed) layers = self.nc[self.layer_var_name()] layer_vals = layers.values if layers.attrs.get('positive') == 'down': layer_vals = -layer_vals if 'bounds' in layers.attrs: layer_bounds = self.nc[layers.attrs['bounds']].values # hmm - some discrepancies over the dimensionality of layer_interfaces # assumption is probably that the dimensions are [layer,{top,bottom}] if layer_bounds.ndim == 2 and layer_bounds.shape[1] == 2: # same layer interfaces for all cells, all time. layer_interfaces = np.concatenate( (layer_bounds[:, 0], layer_bounds[-1:, 1])) if layers.attrs.get('positive') == 'down': layer_interfaces = -layer_interfaces else: raise Exception( "Not smart enough about layer_bounds to do this") else: dz_single = 0 - bed.values.min( ) # assumes typ eta of 0. only matters for 2D layer_interfaces = utils.center_to_edge(layer_vals, dx_single=dz_single) layer_bounds = np.concatenate( (layer_interfaces[:-1, None], layer_interfaces[1:, None]), axis=1) # used to retain layer_interfaces for the top of the top and the # bottom of the bottom. But that just makes for more cleanup # so now clip this to be interfaces between two layers. layer_interfaces = layer_interfaces[1:-1] # Calls to searchsorted below may need to negate both arguments # if increasing k maps to decreasing elevation. if np.all(np.diff(layer_interfaces) < 0): k_sign = -1 elif np.all(np.diff(layer_interfaces) > 0): k_sign = 1 else: raise Exception("Confused about the ordering of k") # this is a bit trickier, because there could be lumping. for now, it should work okay # with 2-d, but won't be good for 3-d HERE if k is increasing up, this is WRONG # this used to be called Nk, but that's misleading. it's the k index # of the bed layer, not the number of layers per water column. kbed = np.searchsorted(k_sign * layer_interfaces, k_sign * bed) one_dz = k_sign * (layer_bounds[:, 1] - layer_bounds[:, 0]) all_dz = np.ones(h.shape + one_dz.shape) * one_dz all_k = np.ones(h.shape + one_dz.shape, np.int32) * np.arange( len(one_dz)) # adjust bed and # 3 choices here.. # try to clip to reasonable values at the same time: if ztop is not None: if ztop != 0: h = h - ztop # don't modify h # don't allow h to go below the bed h[h < bed] = bed[h < bed] if dz is not None: # don't allow bed to be below the real bed. bed = np.maximum(h - dz, bed) if zbottom is not None: # no clipping checks for zbottom yet. if zbottom != 0: bed = bed + zbottom # don't modify bed! if dz is not None: h = bed + dz # so now h and bed are elevations bounding the integration region # with this min call it's only correct for k_sign==-1 ctops = np.searchsorted( k_sign * (layer_interfaces + self.surface_dzmin), k_sign * h) # default h_to_ctop will use the dzmin appropriate for the surface, # but at the bed, it goes the other way - safest just to say dzmin=0, # and also clamp to known Nk cbeds = np.searchsorted(k_sign * layer_interfaces, k_sign * bed) # dimension problems here - Nk has dimensions like face_slice or face_slice,time_slice # cbeds has dimensions like face_slice,time_slice # how to conditionally add dimensions to Nk? # for now, ASSUME that time is after face, and use shape of h to # figure out how to pad it while h.ndim > kbed.ndim: kbed = kbed[..., None] # also have to expand Nk so that the boolean indexing works # use to make cbeds exclusive indexing, but its cleaner to leave # ctops and cbeds both as inclusive, since it changes based on # k_sign if k_sign == -1: # keep cbed valid w.r.t. to deepest layer kbed, cbeds = np.minimum(cbeds, kbed) drymask = (all_k < ctops[..., None]) | (all_k > cbeds[..., None]) else: cbeds = np.maximum(cbeds, kbed) # maybe redundant now drymask = (all_k < cbeds[..., None]) | (all_k > ctops[..., None]) all_dz[drymask] = 0.0 ii = tuple(np.indices(h.shape)) z = layer_bounds.min(axis=1) # bottom of each cell all_dz[ii + (ctops[ii], )] = h - z[ctops] all_dz[ii + (cbeds[ii], )] -= bed - z[cbeds] # make those weighted averages # have to add extra axis to get broadcasting correct all_dz = all_dz / np.sum(all_dz, axis=-1)[..., None] if all_dz.ndim == 3: # we have both time and level # transpose to match the shape of velocity data - all_dz = all_dz.transpose([0, 2, 1]) return all_dz
def median_absolute_error(a, b, dim=None, skipna=False, keep_attrs=False): """ Median Absolute Error. .. math:: \\mathrm{median}(\\vert a - b\\vert) Parameters ---------- a : xarray.Dataset or xarray.DataArray Labeled array(s) over which to apply the function. b : xarray.Dataset or xarray.DataArray Labeled array(s) over which to apply the function. dim : str, list The dimension(s) to apply the median absolute error along. Note that this dimension will be reduced as a result. Defaults to None reducing all dimensions. skipna : bool If True, skip NaNs when computing function. keep_attrs : bool If True, the attributes (attrs) will be copied from the first input to the new one. If False (default), the new object will be returned without attributes. Returns ------- xarray.Dataset or xarray.DataArray Median Absolute Error. See Also -------- sklearn.metrics.median_absolute_error Examples -------- >>> import numpy as np >>> import xarray as xr >>> from xskillscore import median_absolute_error >>> a = xr.DataArray(np.random.rand(5, 3, 3), dims=['time', 'x', 'y']) >>> b = xr.DataArray(np.random.rand(5, 3, 3), dims=['time', 'x', 'y']) >>> median_absolute_error(a, b, dim='time') """ dim, axis = _preprocess_dims(dim, a) a, b = xr.broadcast(a, b, exclude=dim) return xr.apply_ufunc( _median_absolute_error, a, b, input_core_dims=[dim, dim], kwargs={ "axis": axis, "skipna": skipna }, dask="parallelized", output_dtypes=[float], keep_attrs=keep_attrs, )
dx=median_dx if dz is None: all_dz=[ np.abs(get_z_dz(tran).values.ravel()) for tran in trans] all_dz=np.concatenate( all_dz ) # generally want to retain most of the vertical # resolution, but not minimum dz since there could be # some partial layers, near-field layers, etc. # even 10th percentile may be small. dz=np.percentile(all_dz,10) # Get the maximum range of valid vertical z_bnds=[] for tran in trans: V,z_full,z_dz = xr.broadcast(tran.Ve, tran.z_ctr, get_z_dz(tran)) valid=np.isfinite(V.values) z_valid=z_full.values[valid] z_low=z_full.values[valid] - z_dz.values[valid]/2.0 z_high=z_full.values[valid] + z_dz.values[valid]/2.0 z_bnds.append( [z_low.min(), z_high.max()] ) z_bnds=np.concatenate(z_bnds) z_min=z_bnds.min() z_max=z_bnds.max() # Resample each transect in the vertical: new_z=np.linspace(z_min,z_max,int(round((z_max-z_min)/dz))) ##
start = (37.0, -105.0) end = (35.5, -65.0) ############################## # Get the cross section, and convert lat/lon to supplementary coordinates: cross = cross_section(data, start, end) cross.set_coords(('lat', 'lon'), True) print(cross) ############################## # For this example, we will be plotting potential temperature, relative humidity, and # tangential/normal winds. And so, we need to calculate those, and add them to the dataset: temperature, pressure, specific_humidity = xr.broadcast( cross['Temperature'], cross['isobaric'], cross['Specific_humidity']) theta = mpcalc.potential_temperature(pressure, temperature) rh = mpcalc.relative_humidity_from_specific_humidity(specific_humidity, temperature, pressure) # These calculations return unit arrays, so put those back into DataArrays in our Dataset cross['Potential_temperature'] = xr.DataArray(theta, coords=temperature.coords, dims=temperature.dims, attrs={'units': theta.units}) cross['Relative_humidity'] = xr.DataArray(rh, coords=specific_humidity.coords, dims=specific_humidity.dims, attrs={'units': rh.units})
def _apply_metric_at_given_lead( verif, verif_dates, lead, hind=None, hist=None, inits=None, reference=None, metric=None, comparison=None, dim=None, **metric_kwargs, ): """Applies a metric between two time series at a given lead. .. note:: This will be moved to a method of the `Scoring()` class in the next PR. Args: verif (xr object): Verification data. verif_dates (dict): Lead-dependent verification dates for alignment. lead (int): Given lead to score. hind (xr object): Initialized hindcast. Not required in a persistence forecast. hist (xr object): Historical simulation. Required when ``reference='historical'``. inits (dict): Lead-dependent initialization dates for alignment. reference (str): If not ``None``, return score for this reference forecast. * 'persistence' * 'historical' metric (Metric): Metric class for scoring. comparison (Comparison): Comparison class. dim (str): Dimension to apply metric over. Returns: result (xr object): Metric results for the given lead for the initialized forecast or reference forecast. """ if reference is None: # Use `.where()` instead of `.sel()` to account for resampled inits when # bootstrapping. a = (hind.sel(lead=lead).where(hind['time'].isin(inits[lead]), drop=True).drop_vars('lead')) b = verif.sel(time=verif_dates[lead]) elif reference == 'persistence': a, b = persistence(verif, inits, verif_dates, lead) elif reference == 'historical': a, b = historical(hist, verif, verif_dates, lead) a['time'] = b['time'] # broadcast dims when deterministic metric and apply over member if (a.dims != b.dims) and (dim == 'member') and not metric.probabilistic: a, b = xr.broadcast(a, b) result = metric.function( a, b, dim=dim, comparison=comparison, **metric_kwargs, ) log_compute_hindcast_inits_and_verifs(dim, lead, inits, verif_dates) return result
def lateral_fill(da_in, isvalid_mask, ltripole=False, tol=1.0e-4, use_sor=False, rc=1.8, max_iter=1000): """Perform lateral fill on xarray.DataArray Parameters ---------- da_in : xarray.DataArray DataArray on which to fill NaNs. Fill is performed on the two rightmost dimenions. Grid is assumed periodic in `x` direction (last dimension). isvalid_mask : xarray.DataArray, boolean Valid values mask: `True` where data should be filled. Must have the same rightmost dimenions as `da_in`. ltripole : boolean, optional [default=False] Logical flag; if `True` then treat the top row of the grid as periodic in the sense of a tripole grid. tol : float, optional [default=1.0e-4] Convergence criteria: stop filling when values change is less or equal to `tol * var`; i.e. `delta <= tol * np.abs(var[j, i])`. use_sor: boolean, optional [default=False] switch to select SOR fill algorithm over progressive fill algorithm rc : float, optional [default=1.8, valid bounds=(1.0,2.0)] over-relaxation coefficient to use in SOR fill algorithm. Larger arrrays typically converge faster with larger coefficients. For 1 deg. grid (360x180) a coefficient in the range 1.85-1.9 is near optimal. max_iter : integer, optional, [default=1000] maximum number of iterations to do before giving up if tol is not reached. Returns ------- da_out : xarray.DataArray DataArray with NaNs filled by iterative smoothing. """ print("IN FOB version : lateral_fill") dims_in = da_in.dims non_lateral_dims = dims_in[:-2] attrs = da_in.attrs encoding = da_in.encoding coords = da_in.coords da_in, isvalid_mask = xr.broadcast(da_in, isvalid_mask) if len(non_lateral_dims) > 0: da_in_stack = da_in.stack(non_lateral_dims=non_lateral_dims) da_out_stack = xr.full_like(da_in_stack, fill_value=np.nan) isvalid_mask_stack = isvalid_mask.stack(non_lateral_dims=non_lateral_dims) for i in range(da_in_stack.shape[-1]): arr = da_in_stack.data[:, :, i] da_out_stack[:, :, i] = lateral_fill_np_array(arr, isvalid_mask_stack.data[:, :, i], ltripole,tol,use_sor,rc,max_iter) da_out = da_out_stack.unstack('non_lateral_dims').transpose(*dims_in) else: da_out = xr.full_like(da_in, fill_value=np.nan) da_out[:, :] = lateral_fill_np_array(da_in.data, isvalid_mask.data, ltripole,tol,use_sor,rc,max_iter) da_out.attrs = attrs da_out.encoding = encoding for k, da in coords.items(): da_out[k].attrs = da.attrs return da_out
def compute_perfect_model( init_pm, control, metric='pearson_r', comparison='m2e', dim=None, add_attrs=True, **metric_kwargs, ): """ Compute a predictability skill score for a perfect-model framework simulation dataset. Args: init_pm (xarray object): ensemble with dims ``lead``, ``init``, ``member``. control (xarray object): control with dimension ``time``. metric (str): `metric` name, see :py:func:`climpred.utils.get_metric_class` and (see :ref:`Metrics`). comparison (str): `comparison` name defines what to take as forecast and verification (see :py:func:`climpred.utils.get_comparison_class` and :ref:`Comparisons`). dim (str or list): dimension to apply metric over. default: ['member', 'init'] add_attrs (bool): write climpred compute args to attrs. default: True ** metric_kwargs (dict): additional keywords to be passed to metric. (see the arguments required for a given metric in metrics.py) Returns: skill (xarray object): skill score with dimensions as input `ds` without `dim`. """ # Check that init is int, cftime, or datetime; convert ints or cftime to datetime. init_pm = convert_time_index(init_pm, 'init', 'init_pm[init]', calendar=PM_CALENDAR_STR) # check args compatible with each other metric, comparison, dim = _get_metric_comparison_dim(metric, comparison, dim, kind='PM') forecast, verif = comparison.function(init_pm, metric=metric) # in case you want to compute deterministic skill over member dim if (forecast.dims != verif.dims) and not metric.probabilistic: forecast, verif = xr.broadcast(forecast, verif) skill = metric.function(forecast, verif, dim=dim, comparison=comparison, **metric_kwargs) if comparison.name == 'm2m': skill = skill.mean(M2M_MEMBER_DIM) # Attach climpred compute information to skill if add_attrs: skill = assign_attrs( skill, init_pm, function_name=inspect.stack()[0][3], metric=metric, comparison=comparison, dim=dim, metadata_dict=metric_kwargs, ) return skill
def truncate_dataarray(dataarray, quantile_dims, replace_with_mean=False, mean_dims=None, weights=None, quantiles=None, extra_dim=None): r"""Truncates the dataarray over the given dimensions, meaning that data outside the upper and lower quantiles, which are taken across the dimensions ``quantile_dims``, are replaced either with: 1. the upper and lower quantiles themselves. 2. or with the mean of the in-lier data, which is taken across the dimensions given by ``mean_dims``. **Note**: If weights are given, then weighted-quantiles and weighted-means are taken, otherwise the quantiles and means are unweighted. Args: dataarray (xarray.DataArray): dataarray that has at least the dimensions given by ``dims``, and if ``replace_with_mean`` is True, then also ``mean_dims``. replace_with_mean (bool, optional): If True, then replace values outside of the upper and lower quantiles and with the mean across the dimensions given by `mean_dims`, if False, then replace with the upper and lower bounds themselves. mean_dims (list[str], optional): dimensions to take mean within the bounds over quantile_dims (list[str]): dimensions to take quantiles over -- the quantiles are used to make the bounds. weights (xarray.DataArray, optional): Must have one dimension and can have up two dimensions. quantiles (tuple[float, float] | list[float, float], optional): The tuple of two floats representing the quantiles to take. extra_dim (str): Extra dimension that exists in `weights` and `data`. It should not be in `stat_dims`. Returns: (xarray.DataArray): Same shape as the original array, but with truncated values. Raises: (ValueError): If `replace_with_mean` is True, and `mean_dims` is not list of strings. """ LOGGER.debug("Entering the `truncate_dataarray` function") LOGGER.debug("quantile_dims:{}".format(quantile_dims)) LOGGER.debug("replace_with_mean:{}".format(replace_with_mean)) LOGGER.debug("mean_dims:{}".format(mean_dims)) LOGGER.debug("weights:{}".format(weights)) LOGGER.debug("quantiles:{}".format(quantiles)) LOGGER.debug("extra_dim:{}".format(extra_dim)) if replace_with_mean and not mean_dims: mean_dims_err_msg = ( "If `replace_with_mean` is True, then `mean_dims` " "must be a list of strings") LOGGER.error(mean_dims_err_msg) raise ValueError(mean_dims_err_msg) else: pass # `mean_dims` doesn't can be None quantiles = (Quantiles( *sorted(quantiles)) if quantiles else Quantiles(0.05, 0.95)) if weights is not None: quantile_values = weighted_quantile_with_extra_dim( dataarray, quantiles, list(quantile_dims), weights, extra_dim) else: quantile_values = dataarray.quantile(quantiles, dim=list(quantile_dims)) lower_da = quantile_values.sel(quantile=quantiles.lower) upper_da = quantile_values.sel(quantile=quantiles.upper) if replace_with_mean: good_indexes = (dataarray >= lower_da) & (dataarray <= upper_da) inside_da = dataarray.where(good_indexes) outside_da = dataarray.where(~good_indexes) if weights is not None: inside_mean_da = weighted_mean_with_extra_dim( inside_da, mean_dims, weights, extra_dim) else: inside_mean_da = inside_da.mean(mean_dims) truncated_da = (inside_da.combine_first( xr.ones_like(outside_da) * inside_mean_da)) else: expanded_lower_da, _ = xr.broadcast(lower_da, dataarray) expanded_lower_da = expanded_lower_da.transpose(*dataarray.coords.dims) expanded_upper_da, _ = xr.broadcast(upper_da, dataarray) expanded_upper_da = expanded_upper_da.transpose(*dataarray.coords.dims) truncated_da = dataarray.clip(min=expanded_lower_da, max=expanded_upper_da) LOGGER.debug("Leaving the `truncate_dataarray` function") return truncated_da
) * 100 # assign back to the dataframe df_window.loc[sinds, 'PNI'] = y[f'{var_col}{rolling_window}'].values # ----------------------------------------------------------------------------- ## calculate Percent of Normal Index (PNI) # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- ## Trying to test the broadcast functionality # ----------------------------------------------------------------------------- xr.broadcast(c_window, mthly_climatology) # ASSIGN DIMENSION to xarray object min_yr = 2010 max_yr = 2015 n_yrs = max_yr - min_yr + 1 da = xr.DataArray( c_window['time.month'].values, coords=[('month', np.tile(np.arange(1, 13), n_yrs))] ) c_window, _ = xr.broadcast(c_window, da) # apply to each month (easier to ) mth = 1 c_window.sel(month=mth)
import read_sontek ## six.moves.reload_module(read_sontek) rivr_fn='040518_7_BTref/20180405125420r.rivr' ds=read_sontek.surveyor_to_xr(rivr_fn,proj='EPSG:26910') ## # Transect of speed plt.figure(1).clf() fig,ax=plt.subplots(num=1) x,z,speed = xr.broadcast(ds.track_dist,-ds.location,ds.water_speed) scat=ax.scatter(x, z, 40, speed, cmap='jet') plt.colorbar(scat) ## # Plan view, scatter and quiver plt.figure(2).clf() fig,ax=plt.subplots(num=2) scat=ax.scatter(ds.x_utm, ds.y_utm, 40, ds.mean_water_speed, cmap='jet') avg_east=ds.Ve.mean(dim='cell') avg_north=ds.Vn.mean(dim='cell') quiv=ax.quiver(ds.x_utm.values, ds.y_utm.values, avg_east.values, avg_north.values) plt.colorbar(scat,label='Speed m/s')
def sinusoidal(self): moreval = 1 step = 0.1 # Horizontal Dimensions X = xr.DataArray( np.arange(self.NX*moreval), dims = 'X') * step Xp1 = xr.DataArray( np.arange(self.NX*moreval+1)-0.5, dims = 'Xp1')* step Y = xr.DataArray( np.arange(self.NY*moreval), dims = 'Y') * step Yp1 = xr.DataArray( np.arange(self.NY*moreval+1)-0.5, dims = 'Yp1')* step # Vertical Dimensions Z = xr.DataArray(-np.arange(self.NZ*moreval)-0.5, dims = 'Z') * step Zp1 = xr.DataArray(-np.arange(self.NZ*moreval+1), dims = 'Zp1')* step Zu = xr.DataArray(-np.arange(self.NZ*moreval)-1, dims = 'Zu') * step Zl = xr.DataArray(-np.arange(self.NZ*moreval), dims = 'Zl') * step # Space Coordinates YC, XC = xr.broadcast(Y, X) YG, XG = xr.broadcast(Yp1, Xp1) YU, XU = xr.broadcast(Y , Xp1) YV, XV = xr.broadcast(Yp1, X) # Spacing drC = xr.full_like(Zp1, step) drF = xr.full_like(Z , step) dxC = xr.full_like(XU, step) dyC = xr.full_like(XV, step) dxF = xr.full_like(XC, step) dyF = xr.full_like(XC, step) dxG = xr.full_like(XV, step) dyG = xr.full_like(XU, step) dxV = xr.full_like(XG, step) dyU = xr.full_like(XG, step) # Areas rA = dxF * dyF rAw = dxC * dyG rAs = dxG * dyC rAz = dxV * dyU # HFac HFacC, _ = xr.broadcast(xr.full_like(Z, 1), xr.full_like(XC, 1)) HFacW, _ = xr.broadcast(xr.full_like(Z, 1), xr.full_like(XU, 1)) HFacS, _ = xr.broadcast(xr.full_like(Z, 1), xr.full_like(XV, 1)) # Sin C points sinZ, sinY, sinX = xr.broadcast(np.sin(Z), np.sin(Y), np.sin(X)) # Sin vel points sinUZ, sinUY , sinUX = xr.broadcast(np.sin(Z) , np.sin(Y) , np.sin(Xp1)) sinVZ, sinVY , sinVX = xr.broadcast(np.sin(Z) , np.sin(Yp1), np.sin(X)) sinWZ, sinWY , sinWX = xr.broadcast(np.sin(Zl), np.sin(Y) , np.sin(X)) return xr.Dataset({'X' : X, 'Xp1' : Xp1, 'Y' : Y, 'Yp1' : Yp1, 'Z' : Z, 'Zp1' : Zp1, 'Zu': Zu, 'Zl': Zl, 'YC' : YC, 'XC' : XC, 'YG' : YG, 'XG' : XG, 'YU' : YU, 'XU' : XU, 'YV' : YV, 'XV' : XV, 'drC' : drC, 'drF' : drF, 'dxC' : dxC, 'dyC' : dyC, 'dxF' : dxF, 'dyF' : dyF, 'dxG' : dxG, 'dyG' : dyG, 'dxV' : dxV, 'dyU' : dyU, 'rA' : rA, 'rAw' : rAw, 'rAs' : rAs, 'rAz' : rAz, 'HFacC' : HFacC, 'HFacW' : HFacW, 'HFacS' : HFacS, 'sinZ' : sinZ, 'sinY' : sinY, 'sinX' : sinX, 'sinUZ' : sinUZ, 'sinUY' : sinUY, 'sinUX' : sinUX, 'sinVZ' : sinVZ, 'sinVY' : sinVY, 'sinVX' : sinVX, 'sinWZ' : sinWZ, 'sinWY' : sinWY, 'sinWX' : sinWX})
def getProfileAllBroadcasted(self, variables=None, sel={}): if variables is None: return xr.broadcast(self.profile.sel(**sel))[0] else: return xr.broadcast(self.profile.sel(**sel)[variables])[0]
# Cross section along 69.3 latitude and between 1 and 25 longitude # Andenes = 16deg longitude start = (69.3, 1) end = (69.3, 25) cross_data = data[[ 'cloud_area_fraction_pl', 'air_temperature_pl', 'relative_humidity_pl' ]] cross = cross_section(cross_data, start, end).set_coords( ('latitude', 'longitude')) # Inverse the pressure axes (doesn't work as intended) # cross = cross.reindex(pressure=list(reversed(cross.pressure))) temperature, clouds, relative_humidity = xr.broadcast( cross['air_temperature_pl'], cross['cloud_area_fraction_pl'], cross['relative_humidity_pl']) # Plot the cross section fig, axs = plt.subplots(nrows=3, ncols=3, sharey=True, sharex=True, figsize=(14, 10)) ax = axs.ravel().tolist() j = 0 # Define the figure object and primary axes for i in [0, 6, 12, 18, 24, 30, 36, 42, 48]: # Plot RH using contourf rh_contour = ax[j].contourf(cross['longitude'],
def calc_com_incline_and_orientation_angle(da_mask, return_centerline_pts=False): """ Calculate approximate shear angle of object (theta) and xy-orientation angle (phi) from the change of xy-position of the center-of-mass computed separately at every height """ if np.any(da_mask.isnull()): m = ~da_mask.isnull() else: m = da_mask # need to center coordinates on "center of mass" (assuming constant density) if len(da_mask.x.shape) == 3: x_3d = da_mask.x y_3d = da_mask.y z_3d = da_mask.z else: x_3d, y_3d, z_3d = xr.broadcast(da_mask.x, da_mask.y, da_mask.z) # compute mean xy-position at every height z, this is the effective # centre-of-mass kws = dict(dtype='float64', dim=('x', 'y')) x_c = x_3d.where(m).mean( **kws) # other=nan so that these get excluded from mean calculation y_c = y_3d.where(m).mean(**kws) try: dx = np.gradient(x_c) dy = np.gradient(y_c) dx_mean = np.nanmean(dx) dy_mean = np.nanmean(dy) dl_mean = np.sqrt(dx_mean**2. + dy_mean**2.) dz_mean = np.nanmean(np.gradient(x_c.z)) theta = np.arctan2(dl_mean, dz_mean) phi = np.arctan2(dy_mean, dx_mean) except ValueError: phi = theta = np.nan phi = np.rad2deg(phi) theta = np.rad2deg(theta) if phi < 0: phi += 360. ds = xr.merge([ xr.DataArray(phi, name='phi', attrs=dict(long_name='xy-plane angle', units='deg')), xr.DataArray(theta, name='theta', attrs=dict(long_name='z-axis slope angle', units='deg')), ]) if return_centerline_pts: return ds, [x_c, y_c, da_mask.z] else: return ds
z = -tran_dss[0].depth_bt else: # for untrim output: z = -(tran_dss[0].z_surf - tran_dss[0].z_bed) zmax = 0.0 zmin = z.min() - 0.2 xmin = x_lat.min() - 3.0 xmax = x_lat.max() + 3.0 for repeat, ds in enumerate(tran_dss): ds_lateral = ds_to_linear(ds) Vlong = ds.Ve * along_unit[0] + ds.Vn * along_unit[1] Vlat = ds.Ve * across_unit[0] + ds.Vn * across_unit[1] X, Z = xr.broadcast(ds_lateral, ds.z_ctr) fig = plt.figure(4) fig.clf() fig.set_size_inches((10, 6), forward=True) fig, (ax_lon, ax_lat) = plt.subplots(2, 1, num=4, sharex=True, sharey=True) scat_lon = ax_lon.scatter(X, Z, 30, Vlong, cmap='jet',
def bootstrap_compute( hind, verif, hist=None, alignment="same_verifs", metric="pearson_r", comparison="m2e", dim="init", reference=None, resample_dim="member", sig=95, iterations=500, pers_sig=None, compute=compute_hindcast, resample_uninit=bootstrap_uninitialized_ensemble, **metric_kwargs, ): """Bootstrap compute with replacement. Args: hind (xr.Dataset): prediction ensemble. verif (xr.Dataset): Verification data. hist (xr.Dataset): historical/uninitialized simulation. metric (str): `metric`. Defaults to 'pearson_r'. comparison (str): `comparison`. Defaults to 'm2e'. dim (str or list): dimension(s) to apply metric over. default: 'init'. reference (str, list of str): Type of reference forecasts with which to verify. One or more of ['persistence', 'uninitialized']. If None or empty, returns no p value. resample_dim (str): dimension to resample from. default: 'member':: - 'member': select a different set of members from hind - 'init': select a different set of initializations from hind sig (int): Significance level for uninitialized and initialized skill. Defaults to 95. pers_sig (int): Significance level for persistence skill confidence levels. Defaults to sig. iterations (int): number of resampling iterations (bootstrap with replacement). Defaults to 500. compute (func): function to compute skill. Choose from [:py:func:`climpred.prediction.compute_perfect_model`, :py:func:`climpred.prediction.compute_hindcast`]. resample_uninit (func): function to create an uninitialized ensemble from a control simulation or uninitialized large ensemble. Choose from: [:py:func:`bootstrap_uninitialized_ensemble`, :py:func:`bootstrap_uninit_pm_ensemble_from_control`]. ** metric_kwargs (dict): additional keywords to be passed to metric (see the arguments required for a given metric in :ref:`Metrics`). Returns: results: (xr.Dataset): bootstrapped results for the three different skills: - `initialized` for the initialized hindcast `hind` and describes skill due to initialization and external forcing - `uninitialized` for the uninitialized/historical and approximates skill from external forcing - `persistence` for the persistence forecast computed by `compute_persistence` the different results: - `verify skill`: skill values - `p`: p value - `low_ci` and `high_ci`: high and low ends of confidence intervals based on significance threshold `sig` Reference: * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P. Gonzalez, V. Kharin, et al. “A Verification Framework for Interannual-to-Decadal Predictions Experiments.” Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72. https://doi.org/10/f4jjvf. See also: * climpred.bootstrap.bootstrap_hindcast * climpred.bootstrap.bootstrap_perfect_model """ warn_if_chunking_would_increase_performance(hind, crit_size_in_MB=5) if pers_sig is None: pers_sig = sig if isinstance(dim, str): dim = [dim] if isinstance(reference, str): reference = [reference] if reference is None: reference = [] p = (100 - sig) / 100 ci_low = p / 2 ci_high = 1 - p / 2 p_pers = (100 - pers_sig) / 100 ci_low_pers = p_pers / 2 ci_high_pers = 1 - p_pers / 2 # get metric/comparison function name, not the alias metric = METRIC_ALIASES.get(metric, metric) comparison = COMPARISON_ALIASES.get(comparison, comparison) # get class Metric(metric) metric = get_metric_class(metric, ALL_METRICS) # get comparison function comparison = get_comparison_class(comparison, ALL_COMPARISONS) # Perfect Model requires `same_inits` setup isHindcast = True if comparison.name in HINDCAST_COMPARISONS else False reference_alignment = alignment if isHindcast else "same_inits" chunking_dims = [d for d in hind.dims if d not in CLIMPRED_DIMS] # carry alignment for compute_reference separately metric_kwargs_reference = metric_kwargs.copy() metric_kwargs_reference["alignment"] = reference_alignment # carry alignment in metric_kwargs if isHindcast: metric_kwargs["alignment"] = alignment if hist is None: # PM path, use verif = control hist = verif # slower path for hindcast and resample_dim init if resample_dim == "init" and isHindcast: warnings.warn("resample_dim=`init` will be slower than resample_dim=`member`.") ( bootstrapped_init_skill, bootstrapped_uninit_skill, bootstrapped_pers_skill, ) = _bootstrap_hindcast_over_init_dim( hind, hist, verif, dim, reference, resample_dim, iterations, metric, comparison, compute, resample_uninit, **metric_kwargs, ) else: # faster: first _resample_iterations_idx, then compute skill resample_func = _get_resample_func(hind) if not isHindcast: if "uninitialized" in reference: # create more members than needed in PM to make the uninitialized # distribution more robust members_to_sample_from = 50 repeat = members_to_sample_from // hind.member.size + 1 uninit_hind = xr.concat( [resample_uninit(hind, hist) for i in range(repeat)], dim="member", **CONCAT_KWARGS, ) uninit_hind["member"] = np.arange(1, 1 + uninit_hind.member.size) if dask.is_dask_collection(uninit_hind): # too minimize tasks: ensure uninit_hind get pre-computed # alternativly .chunk({'member':-1}) uninit_hind = uninit_hind.compute().chunk() # resample uninit always over member and select only hind.member.size bootstrapped_uninit = resample_func( uninit_hind, iterations, "member", replace=False, dim_max=hind["member"].size, ) bootstrapped_uninit["lead"] = hind["lead"] # effectively only when _resample_iteration_idx which doesnt use dim_max bootstrapped_uninit = bootstrapped_uninit.isel( member=slice(None, hind.member.size) ) bootstrapped_uninit["member"] = np.arange( 1, 1 + bootstrapped_uninit.member.size ) if dask.is_dask_collection(bootstrapped_uninit): bootstrapped_uninit = bootstrapped_uninit.chunk({"member": -1}) bootstrapped_uninit = _maybe_auto_chunk( bootstrapped_uninit, ["iteration"] + chunking_dims ) else: # hindcast if "uninitialized" in reference: uninit_hind = resample_uninit(hind, hist) if dask.is_dask_collection(uninit_hind): # too minimize tasks: ensure uninit_hind get pre-computed # maybe not needed uninit_hind = uninit_hind.compute().chunk() bootstrapped_uninit = resample_func( uninit_hind, iterations, resample_dim ) bootstrapped_uninit = bootstrapped_uninit.isel( member=slice(None, hind.member.size) ) bootstrapped_uninit["lead"] = hind["lead"] if dask.is_dask_collection(bootstrapped_uninit): bootstrapped_uninit = _maybe_auto_chunk( bootstrapped_uninit.chunk({"lead": 1}), ["iteration"] + chunking_dims, ) if "uninitialized" in reference: bootstrapped_uninit_skill = compute( bootstrapped_uninit, verif, metric=metric, comparison="m2o" if isHindcast else comparison, dim=dim, add_attrs=False, **metric_kwargs, ) # take mean if 'm2o' comparison forced before if isHindcast and comparison != __m2o: bootstrapped_uninit_skill = bootstrapped_uninit_skill.mean("member") bootstrapped_hind = resample_func(hind, iterations, resample_dim) if dask.is_dask_collection(bootstrapped_hind): bootstrapped_hind = bootstrapped_hind.chunk({"member": -1}) bootstrapped_init_skill = compute( bootstrapped_hind, verif, metric=metric, comparison=comparison, add_attrs=False, dim=dim, **metric_kwargs, ) if "persistence" in reference: pers_skill = compute_persistence( hind, verif, metric=metric, dim=dim, **metric_kwargs_reference, ) # bootstrap pers if resample_dim == "init": bootstrapped_pers_skill = compute_persistence( bootstrapped_hind, verif, metric=metric, **metric_kwargs_reference, ) else: # member no need to calculate all again bootstrapped_pers_skill, _ = xr.broadcast( pers_skill, bootstrapped_init_skill ) # calc mean skill without any resampling init_skill = compute( hind, verif, metric=metric, comparison=comparison, dim=dim, **metric_kwargs, ) if "uninitialized" in reference: # uninit skill as mean resampled uninit skill unin_skill = bootstrapped_uninit_skill.mean("iteration") # noqa: F841 if "persistence" in reference: pers_skill = compute_persistence( hind, verif, metric=metric, dim=dim, **metric_kwargs_reference ) if "climatology" in reference: clim_skill = compute_climatology( hind, verif, metric=metric, dim=dim, comparison=comparison, **metric_kwargs ) bootstrapped_clim_skill, _ = xr.broadcast(clim_skill, bootstrapped_init_skill) # get confidence intervals CI init_ci = _distribution_to_ci(bootstrapped_init_skill, ci_low, ci_high) if "uninitialized" in reference: unin_ci = _distribution_to_ci( # noqa: F841 bootstrapped_uninit_skill, ci_low, ci_high ) if "climatology" in reference: clim_ci = _distribution_to_ci( # noqa: F841 bootstrapped_clim_skill, ci_low, ci_high ) if "persistence" in reference: pers_ci = _distribution_to_ci( # noqa: F841 bootstrapped_pers_skill, ci_low_pers, ci_high_pers ) # pvalue whether uninit or pers better than init forecast if "uninitialized" in reference: p_unin_over_init = _pvalue_from_distributions( # noqa: F841 bootstrapped_uninit_skill, bootstrapped_init_skill, metric=metric ) if "climatology" in reference: p_clim_over_init = _pvalue_from_distributions( # noqa: F841 bootstrapped_clim_skill, bootstrapped_clim_skill, metric=metric ) if "persistence" in reference: p_pers_over_init = _pvalue_from_distributions( # noqa: F841 bootstrapped_pers_skill, bootstrapped_init_skill, metric=metric ) # gather return # p defined as probability that reference better than # initialized, therefore not defined for initialized skill # itself results = xr.concat( [ init_skill, init_skill.where(init_skill == -999), init_ci.isel(quantile=0, drop=True), init_ci.isel(quantile=1, drop=True), ], dim="results", coords="minimal", ).assign_coords( results=("results", ["verify skill", "p", "low_ci", "high_ci"]), skill="initialized", ) if reference != []: for r in reference: ref_skill = eval(f"{r[:4]}_skill") ref_p = eval(f"p_{r[:4]}_over_init") ref_ci_low = eval(f"{r[:4]}_ci").isel(quantile=0, drop=True) ref_ci_high = eval(f"{r[:4]}_ci").isel(quantile=1, drop=True) ref_results = xr.concat( [ref_skill, ref_p, ref_ci_low, ref_ci_high], dim="results", **CONCAT_KWARGS, ).assign_coords( skill=r, results=("results", ["verify skill", "p", "low_ci", "high_ci"]) ) if "member" in ref_results.dims: if not ref_results["member"].identical(results["member"]): ref_results["member"] = results[ "member" ] # fixes m2c different member names in reference forecasts results = xr.concat([results, ref_results], dim="skill", **CONCAT_KWARGS) results = results.assign_coords(skill=["initialized"] + reference).squeeze() else: results = results.drop_sel(results="p") results = results.squeeze() # Attach climpred compute information to skill # results.results metadata_dict = { "confidence_interval_levels": f"{ci_high}-{ci_low}", "bootstrap_iterations": iterations, } if reference is not None: metadata_dict[ "p" ] = "probability that reference performs better than initialized" metadata_dict.update(metric_kwargs) results = assign_attrs( results, hind, alignment=alignment, metric=metric, comparison=comparison, dim=dim, metadata_dict=metadata_dict, ) # Ensure that the lead units get carried along for the calculation. The attribute # tends to get dropped along the way due to ``xarray`` functionality. results["lead"] = hind["lead"] if "units" in hind["lead"].attrs and "units" not in results["lead"].attrs: results["lead"].attrs["units"] = hind["lead"].attrs["units"] return results
start = (37.0, -105.0) end = (35.5, -65.0) ############################## # Get the cross section, and convert lat/lon to supplementary coordinates: cross = cross_section(data, start, end) cross.set_coords(('lat', 'lon'), True) print(cross) ############################## # For this example, we will be plotting potential temperature, relative humidity, and # tangential/normal winds. And so, we need to calculate those, and add them to the dataset: temperature, pressure, specific_humidity = xr.broadcast(cross['Temperature'], cross['isobaric'], cross['Specific_humidity']) theta = mpcalc.potential_temperature(pressure, temperature) rh = mpcalc.relative_humidity_from_specific_humidity(specific_humidity, temperature, pressure) # These calculations return unit arrays, so put those back into DataArrays in our Dataset cross['Potential_temperature'] = xr.DataArray(theta, coords=temperature.coords, dims=temperature.dims, attrs={'units': theta.units}) cross['Relative_humidity'] = xr.DataArray(rh, coords=specific_humidity.coords, dims=specific_humidity.dims, attrs={'units': rh.units})
def smape(a, b, dim=None, weights=None, skipna=False, keep_attrs=False): """Symmetric Mean Absolute Percentage Error. .. math:: \\mathrm{SMAPE} = \\frac{1}{n} \\sum_{i=1}^{n} \\frac{ \\vert a_{i} - b_{i} \\vert } { \\vert a_{i} \\vert + \\vert b_{i} \\vert } .. note:: Percent error is reported as decimal percent. I.e., a value of 1 is 100%. Parameters ---------- a : xarray.Dataset or xarray.DataArray Labeled array(s) over which to apply the function. (Truth which will be divided by) b : xarray.Dataset or xarray.DataArray Labeled array(s) over which to apply the function. dim : str, list The dimension(s) to apply the smape along. Note that this dimension will be reduced as a result. Defaults to None reducing all dimensions. weights : xarray.Dataset or xarray.DataArray or None Weights matching dimensions of ``dim`` to apply during the function. skipna : bool If True, skip NaNs when computing function. keep_attrs : bool If True, the attributes (attrs) will be copied from the first input to the new one. If False (default), the new object will be returned without attributes. Returns ------- xarray.Dataset or xarray.DataArray Symmetric Mean Absolute Percentage Error. References ---------- https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error Examples -------- >>> import numpy as np >>> import xarray as xr >>> from xskillscore import smape >>> a = xr.DataArray(np.random.rand(5, 3, 3), dims=['time', 'x', 'y']) >>> b = xr.DataArray(np.random.rand(5, 3, 3), dims=['time', 'x', 'y']) >>> smape(a, b, dim='time') """ dim, axis = _preprocess_dims(dim, a) a, b = xr.broadcast(a, b, exclude=dim) weights = _preprocess_weights(a, dim, dim, weights) input_core_dims = _determine_input_core_dims(dim, weights) return xr.apply_ufunc( _smape, a, b, weights, input_core_dims=input_core_dims, kwargs={ "axis": axis, "skipna": skipna }, dask="parallelized", output_dtypes=[float], keep_attrs=keep_attrs, )
def create_raster_polygons(ds, mask=None,subset_bbox=None, weights=None,weights_target='ds'): """ Create polygons for each pixel in a raster Keyword arguments: ds -- an xarray dataset with the variables 'lat_bnds' and 'lon_bnds', which are both lat/lon x 2 arrays giving the min and max values of lat and lon for each pixel given by lat/lon subset_bbox -- by default None; if a geopandas geodataframe is entered, the bounding box around the geometries in the gdf are used to mask the grid, to reduce the number of pixel polygons created mask -- ## THIS IS WHERE MASKS CAN BE ADDED - # I.E. AN OCEAN MASK. OR MAYBE EVEN ALLOW # SHAPEFILES TO BE ADDED AND CALCULATED # THE MASKED PIXELS ARE JUST IGNORED, AND NOT # ADDED. will make identifying them harder # in the first bit of aggregate, but could # make processing faster if you have a ton # of ocean pixels or something... Returns: a geopandas geodataframe containing a 'geometry' giving the pixel boundaries for each 'lat' / 'lon' pair Note: 'lat_bnds' and 'lon_bnds' can be created through the 'get_bnds' function if they are not already included in the input raster file. Note: Currently this code only supports regular rectangular grids (so where every pixel side is a straight line in lat/lon space). Future versions may include support for irregular grids. """ # Standardize inputs ds = fix_ds(ds) ds = get_bnds(ds) #breakpoint() # Subset by shapefile bounding box, if desired if subset_bbox is not None: if type(subset_bbox) is gpd.geodataframe.GeoDataFrame: # Using the biggest difference in lat/lon to make sure that the pixels are subset # in a way that the bounding box is fully filled out bbox_thresh = np.max([ds.lat.diff('lat').max(),ds.lon.diff('lon').max()])+0.1 ds = ds.sel(lon=slice(subset_bbox.total_bounds[0]-bbox_thresh,subset_bbox.total_bounds[2]+bbox_thresh), lat=slice(subset_bbox.total_bounds[1]-bbox_thresh,subset_bbox.total_bounds[3]+bbox_thresh)) else: warnings.warn('[subset_bbox] is not a geodataframe; no mask by polygon bounding box used.') # Process weights ds,winf = process_weights(ds,weights,target=weights_target) # Mask if mask is not None: warnings.warn('Masking by grid not yet supported. Stay tuned...') # Create dataset which has a lat/lon bound value for each individual pixel, # broadcasted out over each lat/lon pair (ds_bnds,) = (xr.broadcast(ds.isel({d:0 for d in [k for k in ds.dims.keys() if k not in ['lat','lon','bnds']]}). drop_vars([v for v in ds.keys() if v not in ['lat_bnds','lon_bnds']]))) # Stack so it's just pixels and bounds ds_bnds = ds_bnds.stack(loc=('lat','lon')) # In order: # (lon0,lat0),(lon0,lat1),(lon1,lat1),(lon1,lat1), but as a single array; to be # put in the right format for Polygon in the next step pix_poly_coords = np.transpose(np.vstack([ds_bnds.lon_bnds.isel(bnds=0).values,ds_bnds.lat_bnds.isel(bnds=0).values, ds_bnds.lon_bnds.isel(bnds=0).values,ds_bnds.lat_bnds.isel(bnds=1).values, ds_bnds.lon_bnds.isel(bnds=1).values,ds_bnds.lat_bnds.isel(bnds=1).values, ds_bnds.lon_bnds.isel(bnds=1).values,ds_bnds.lat_bnds.isel(bnds=0).values])) # Reshape so each location has a 4 x 2 (vertex vs coordinate) array, # and convert each of those vertices to tuples. This means every element # of pix_poly_coords is the input to shapely.geometry.Polygon of one pixel pix_poly_coords = tuple(map(tuple,np.reshape(pix_poly_coords,(np.shape(pix_poly_coords)[0],4,2)))) # Create empty geodataframe gdf_pixels = gpd.GeoDataFrame() gdf_pixels['lat'] = [None]*ds_bnds.dims['loc'] gdf_pixels['lon'] = [None]*ds_bnds.dims['loc'] gdf_pixels['geometry'] = [None]*ds_bnds.dims['loc'] if weights is not None: # Stack weights so they are linearly indexed like the ds (and fill # NAs with 0s) weights = ds.weights.stack(loc=('lat','lon')).fillna(0) # Preallocate weights column gdf_pixels['weights'] = [None]*ds_bnds.dims['loc'] # Now populate with a polygon for every pixel, and the lat/lon coordinates # of that pixel (Try if preallocating it with the right dimensions above # makes it faster, because it's pretty slow rn (NB: it doesn't really)) for loc_idx in np.arange(0,ds_bnds.dims['loc']): gdf_pixels.loc[loc_idx,'lat'] = ds_bnds.lat.isel(loc=loc_idx).values gdf_pixels.loc[loc_idx,'lon'] = ds_bnds.lon.isel(loc=loc_idx).values gdf_pixels.loc[loc_idx,'geometry'] = Polygon(pix_poly_coords[loc_idx]) if weights is not None: gdf_pixels.loc[loc_idx,'weights'] = weights.isel(loc=loc_idx).values # Add a "pixel idx" to make indexing better later gdf_pixels['pix_idx'] = gdf_pixels.index.values # Add crs (normal lat/lon onto WGS84) gdf_pixels = gdf_pixels.set_crs("EPSG:4326") # Save the source grid for further reference source_grid = {'lat':ds_bnds.lat,'lon':ds_bnds.lon} pix_agg = {'gdf_pixels':gdf_pixels,'source_grid':source_grid} # Return the created geodataframe return pix_agg
def pearson_r_p_value(a, b, dim=None, weights=None, skipna=False, keep_attrs=False): """2-tailed p-value associated with pearson's correlation coefficient. Parameters ---------- a : xarray.Dataset or xarray.DataArray Labeled array(s) over which to apply the function. b : xarray.Dataset or xarray.DataArray Labeled array(s) over which to apply the function. dim : str, list The dimension(s) to apply the correlation along. Note that this dimension will be reduced as a result. Defaults to None reducing all dimensions. weights : xarray.Dataset or xarray.DataArray or None Weights matching dimensions of ``dim`` to apply during the function. skipna : bool If True, skip NaNs when computing function. keep_attrs : bool If True, the attributes (attrs) will be copied from the first input to the new one. If False (default), the new object will be returned without attributes. Returns ------- xarray.Dataset or xarray.DataArray 2-tailed p-value of Pearson's correlation coefficient. See Also -------- scipy.stats.pearsonr Examples -------- >>> import numpy as np >>> import xarray as xr >>> from xskillscore import pearson_r_p_value >>> a = xr.DataArray(np.random.rand(5, 3, 3), dims=['time', 'x', 'y']) >>> b = xr.DataArray(np.random.rand(5, 3, 3), dims=['time', 'x', 'y']) >>> pearson_r_p_value(a, b, dim='time') """ _fail_if_dim_empty(dim) dim, _ = _preprocess_dims(dim, a) a, b = xr.broadcast(a, b, exclude=dim) a, b, new_dim, weights = _stack_input_if_needed(a, b, dim, weights) weights = _preprocess_weights(a, dim, new_dim, weights) input_core_dims = _determine_input_core_dims(new_dim, weights) return xr.apply_ufunc( _pearson_r_p_value, a, b, weights, input_core_dims=input_core_dims, kwargs={ "axis": -1, "skipna": skipna }, dask="parallelized", output_dtypes=[float], keep_attrs=keep_attrs, )
def _plot_transect(self, remappedModelClimatology, remappedRefClimatology): # {{{ """ plotting the transect """ season = self.season config = self.config configSectionName = self.configSectionName mainRunName = config.get('runs', 'mainRunName') # broadcast x and z to have the same dimensions x, z = xr.broadcast(remappedModelClimatology.x, remappedModelClimatology.z) # set lat and lon in case we want to plot versus these quantities lat = remappedModelClimatology.lat lon = remappedModelClimatology.lon # convert x, z, lat, and lon to numpy arrays; make a copy because # they are sometimes read-only (not sure why) x = x.values.copy().transpose() z = z.values.copy().transpose() lat = lat.values.copy().transpose() lon = lon.values.copy().transpose() self.lat = lat self.lon = lon # z is masked out with NaNs in some locations (where there is land) but # this makes pcolormesh unhappy so we'll zero out those locations z[numpy.isnan(z)] = 0. modelOutput = nans_to_numpy_mask( remappedModelClimatology[self.mpasFieldName].values) modelOutput = modelOutput.transpose() if remappedRefClimatology is None: refOutput = None bias = None else: refOutput = remappedRefClimatology[self.refFieldName] dims = refOutput.dims refOutput = nans_to_numpy_mask(refOutput.values) if dims[1] != 'nPoints': assert (dims[0] == 'nPoints') refOutput = refOutput.transpose() bias = modelOutput - refOutput filePrefix = self.filePrefix outFileName = '{}/{}.png'.format(self.plotsDirectory, filePrefix) title = '{}\n({}, years {:04d}-{:04d})'.format(self.fieldNameInTitle, season, self.startYear, self.endYear) xLabel = 'Distance [km]' yLabel = 'Depth [m]' # define the axis labels and the data to use for the upper # x axis or axes, if such additional axes have been requested upperXAxes = config.get('transects', 'upperXAxes') numUpperTicks = config.getint('transects', 'numUpperTicks') upperXAxisTickLabelPrecision = config.getint( 'transects', 'upperXAxisTickLabelPrecision') self._set_third_x_axis_to_none() if upperXAxes == 'neither': self._set_second_x_axis_to_none() elif upperXAxes == 'lat': self._set_second_x_axis_to_latitude() elif upperXAxes == 'lon': self._set_second_x_axis_to_longitude() elif upperXAxes == 'both': self._set_second_x_axis_to_longitude() self._set_third_x_axis_to_latitude() elif upperXAxes == 'greatestExtent': if self._greatest_extent(lat, lon): self._set_second_x_axis_to_latitude() else: self._set_second_x_axis_to_longitude() elif upperXAxes == 'strictlyMonotonic': if self._strictly_monotonic(lat, lon): self._set_second_x_axis_to_latitude() else: self._set_second_x_axis_to_longitude() elif upperXAxes == 'mostMonotonic': if self._most_monotonic(lat, lon): self._set_second_x_axis_to_latitude() else: self._set_second_x_axis_to_longitude() elif upperXAxes == 'mostStepsInSameDirection': if self._most_steps_in_same_direction(lat, lon): self._set_second_x_axis_to_latitude() else: self._set_second_x_axis_to_longitude() elif upperXAxes == 'fewestDirectionChanges': if self._fewest_direction_changes(lat, lon): self._set_second_x_axis_to_latitude() else: self._set_second_x_axis_to_longitude() else: raise ValueError('invalid option for upperXAxes') # get the parameters determining what type of plot to use, # what line styles and line colors to use, and whether and how # to label contours compareAsContours = config.getboolean('transects', 'compareAsContoursOnSinglePlot') contourLineStyle = config.get('transects', 'contourLineStyle') contourLineColor = config.get('transects', 'contourLineColor') comparisonContourLineStyle = config.get('transects', 'comparisonContourLineStyle') comparisonContourLineColor = config.get('transects', 'comparisonContourLineColor') if compareAsContours: labelContours = config.getboolean( 'transects', 'labelContoursOnContourComparisonPlots') else: labelContours = config.getboolean('transects', 'labelContoursOnHeatmaps') contourLabelPrecision = config.getint('transects', 'contourLabelPrecision') # construct a three-panel comparison plot for the transect, or a # single-panel contour comparison plot if compareAsContours is True plot_vertical_section_comparison( config, x, z, modelOutput, refOutput, bias, outFileName, configSectionName, cbarLabel=self.unitsLabel, xlabel=xLabel, ylabel=yLabel, title=title, modelTitle='{}'.format(mainRunName), refTitle=self.refTitleLabel, diffTitle=self.diffTitleLabel, secondXAxisData=self.secondXAxisData, secondXAxisLabel=self.secondXAxisLabel, thirdXAxisData=self.thirdXAxisData, thirdXAxisLabel=self.thirdXAxisLabel, numUpperTicks=numUpperTicks, upperXAxisTickLabelPrecision=upperXAxisTickLabelPrecision, invertYAxis=False, backgroundColor='#918167', compareAsContours=compareAsContours, lineStyle=contourLineStyle, lineColor=contourLineColor, comparisonContourLineStyle=comparisonContourLineStyle, comparisonContourLineColor=comparisonContourLineColor, labelContours=labelContours, contourLabelPrecision=contourLabelPrecision) caption = '{} {}'.format(season, self.imageCaption) write_image_xml(config, filePrefix, componentName='Ocean', componentSubdirectory='ocean', galleryGroup=self.galleryGroup, groupSubtitle=self.groupSubtitle, groupLink=self.groupLink, gallery=self.galleryName, thumbnailDescription=self.thumbnailDescription, imageDescription=caption, imageCaption=caption)
def gen_ds(): for val, d in ds.groupby(dim): del d[dim] # delete grouped labels d[dim] = [val] d, = xr.broadcast(d) yield d
def apply_param_noise(ds, params, noise_types, shape=(0, ), noise_sds=[0], seed=0): """Apply noise to each timestep for each Monte Carlo draw of the outbreak simulations. Parameters ---------- ds : :class:`xarray.Dataset` A Dataset that has variables called ``[varname]_deterministic`` for each of the parameters in `params`. params : list of str The name of the params in this dataset (e.g. ``beta, gamma, sigma`` for SEIR and ``beta, gamma`` for SIR). noise_types : list of str Same length as `params`. Each element is one of ``["normal", "exponential", False]``. This defines the type of noise applied to each. False means no noise. shape : tuple of int, optional (n_samples, n_timesteps). Only needed if any `noise_types` are ``normal`` noise_sds : list of float Standard deviations to use for any parameters with ``noise_type=="normal"``. Must be same length as `params` but unused for any params with other `noise_type`. seed : int Random seed for generating noise Returns ------- out : :class:`xarray.Dataset` Same as `ds` but with ``[varname]_stoch`` stochastic variables added. """ np.random.seed(seed) for px, param in enumerate(params): noise_type = noise_types[px] noise_sd = noise_sds[px] param_stoch = param + "_stoch" param_det = param + "_deterministic" if noise_type is None: continue elif noise_type == "normal": ds[param_stoch] = ( ("sample", "t"), np.random.normal(0, noise_sd, shape), ) ds[param_stoch] = ds[param_det] + ds[param_stoch] elif noise_type == "exponential": (ds[param_det], _, _) = xr.broadcast(ds[param_det], ds.sample, ds.t) ds[param_stoch] = ( ds[param_det].dims, np.random.exponential(ds[param_det]), ) # commented out b/c inverse-exponential has undefined expected value # and empirically changes the mean parameter by order(s) of magnitude # elif noise_type == "inv_exponential": # (out[param_det],_,_) = xr.broadcast( # out[param_det], # out.sample, # out.t) # out[param_stoch] = ( # out[param_det].dims, # 1/np.random.exponential(1/out[param_det]) # ) elif not noise_type: ds[param_stoch] = ds[param_det].copy() else: raise ValueError(noise_type) neg = ds[param_stoch] < 0 n_bad = neg.sum().item() if n_bad > 0: n_tot = np.prod(ds[param_stoch].shape) dims = ["gamma"] if "sigma" in ds[param_stoch].dims: dims.append("sigma") to_sum = [d for d in ds[param_stoch].dims if d not in dims] cross_tab = (neg.sum(to_sum) / neg.count(to_sum)).to_dataframe() warnings.warn( f"Parameter {param} has {n_bad}/{n_tot} values <0 ({n_bad/n_tot:.2%}). " "These are non-physical params. If they are dropped in the simulation, " f"this will change the mean. Fraction of negative values: {cross_tab}" ) return ds
######################################################################### # Calculations # ------------ # # Most of the calculations in `metpy.calc` will accept DataArrays by converting them # into their corresponding unit arrays. While this may often work without any issues, we must # keep in mind that because the calculations are working with unit arrays and not DataArrays: # # - The calculations will return unit arrays rather than DataArrays # - Broadcasting must be taken care of outside of the calculation, as it would only recognize # dimensions by order, not name # # As an example, we calculate geostropic wind at 500 hPa below: lat, lon = xr.broadcast(y, x) f = mpcalc.coriolis_parameter(lat) dx, dy = mpcalc.lat_lon_grid_deltas(lon, lat, initstring=data_crs.proj4_init) heights = data['height'].metpy.loc[{'time': time[0], 'vertical': 500. * units.hPa}] u_geo, v_geo = mpcalc.geostrophic_wind(heights, f, dx, dy) print(u_geo) print(v_geo) ######################################################################### # Also, a limited number of calculations directly support xarray DataArrays or Datasets (they # can accept *and* return xarray objects). Right now, this includes # # - Derivative functions # - ``first_derivative`` # - ``second_derivative`` # - ``gradient``
def gen_xy(): for i, z in enumerate(self._z_vals): das = {} data = {} # multiple data variables rather than z coordinate if self._multi_var: das['x'] = self._ds[self.x_coo] das['y'] = self._ds[z] if (self.y_err is not None) or \ (self.x_err is not None) or \ (self.c_coo is not None): raise ValueError('Multi-var errors/c not implemented.') # z-coordinate to iterate over elif z is not None: try: # try positional indexing first, as much faster sub_ds = self._ds[{self.z_coo: i}] except ValueError: # but won't work e.g. on non-dimensions sub_ds = self._ds.loc[{self.z_coo: z}] das['x'] = sub_ds[self.x_coo] das['y'] = sub_ds[self.y_coo] if self.c_coo is not None: if mode == 'lineplot': self._c_cols.append(np.asscalar( sub_ds[self.c_coo].values.flatten())) elif mode == 'scatter': das['c'] = sub_ds[self.c_coo] if self.y_err is not None: das['ye'] = sub_ds[self.y_err] if self.x_err is not None: das['xe'] = sub_ds[self.x_err] # nothing to iterate over else: das['x'] = self._ds[self.x_coo] das['y'] = self._ds[self.y_coo] if self.c_coo is not None: if mode == 'lineplot': self._c_cols.append(np.asscalar( self._ds[self.c_coo].values.flatten())) elif mode == 'scatter': das['c'] = self._ds[self.c_coo] if self.y_err is not None: das['ye'] = self._ds[self.y_err] if self.x_err is not None: das['xe'] = self._ds[self.x_err] for k, da in zip(das, xr.broadcast(*das.values())): data[k] = da.values.flatten() # Trim out missing data not_null = np.isfinite(data['x']) not_null &= np.isfinite(data['y']) # TODO: if scatter, broadcast *then* ravel x, y, c? data['x'] = data['x'][not_null] data['y'] = data['y'][not_null] # implement jitter if self.xjitter: if self.xlog: data['x'] = data['x'] * np.random.normal( loc=1, scale=self.xjitter, size=data['x'].shape) else: data['x'] = data['x'] + np.random.normal( loc=0, scale=self.xjitter, size=data['x'].shape) if self.yjitter: if self.ylog: data['y'] = data['y'] * np.random.normal( loc=1, scale=self.yjitter, size=data['y'].shape) else: data['y'] = data['y'] + np.random.normal( loc=0, scale=self.yjitter, size=data['y'].shape) if 'c' in data: data['c'] = data['c'][not_null] if 'ye' in data: data['ye'] = data['ye'][not_null] if 'xe' in data: data['xe'] = data['xe'][not_null] yield data
# ------------ # # Nearly all of the calculations in `metpy.calc` will accept DataArrays by converting them # into their corresponding unit arrays. While this may often work without any issues, we must # keep in mind that because the calculations are working with unit arrays and not DataArrays: # # - The calculations will return unit arrays rather than DataArrays # - Broadcasting must be taken care of outside of the calculation, as it would only recognize # dimensions by order, not name # # Also, some of the units used in CF conventions (such as 'degrees_north') are not recognized # by pint, so we must implement a workaround. # # As an example, we calculate geostropic wind at 500 hPa below: lat, lon = xr.broadcast(y, x) f = mpcalc.coriolis_parameter(lat.values * units.degrees) dx, dy = mpcalc.lat_lon_grid_deltas(lon.values, lat.values) heights = data['height'].loc[time[0]].loc[{vertical.name: 500.}] u_geo, v_geo = mpcalc.geostrophic_wind(heights, f, dx, dy, dim_order='yx') print(u_geo) print(v_geo) ######################################################################### # Plotting # -------- # # Like most meteorological data, we want to be able to plot these data. DataArrays can be used # like normal numpy arrays in plotting code, or we can use some of xarray's plotting # functionality. #
def weights_lonlat(a): weights = np.cos(np.deg2rad(a.lat)) _, weights = xr.broadcast(a, weights) return weights.isel(time=0, drop=True)
def add_time_dimension(data, model_run): """ Once all constraints and costs have been loaded into the model dataset, any timeseries data is loaded from file and substituted into the model dataset Parameters: ----------- data : xarray Dataset A data structure which has already gone through `constraints_to_dataset`, `costs_to_dataset`, and `add_attributes` model_run : AttrDict Calliope model_run dictionary Returns: -------- data : xarray Dataset A data structure with an additional time dimension to the input dataset, with all relevant `file=` entries replaced with data from file. """ data['timesteps'] = pd.to_datetime(data.timesteps) # Search through every constraint/cost for use of '=' for variable in data.data_vars: # 1) If '=' in variable, it will give the variable a string data type if data[variable].dtype.kind != 'U': continue # 2) convert to a Pandas Series to do 'string contains' search data_series = data[variable].to_series() # 3) get a Series of all the uses of 'file=' for this variable filenames = data_series[data_series.str.contains('file=')] # 4) If no use of 'file=' then we can be on our way if filenames.empty: continue # 5) remove all before '=' and split filename and location column filenames = filenames.str.split('=').str[1].str.rsplit(':', 1) if isinstance(filenames.index, pd.MultiIndex): filenames.index = filenames.index.remove_unused_levels() # 6) Get all timeseries data from dataframes stored in model_run timeseries_data = [] key_errors = [] for loc_tech, (filename, column) in filenames.iteritems(): try: timeseries_data.append( model_run.timeseries_data[filename].loc[:, column].values) except KeyError: key_errors.append( 'column `{}` not found in file `{}`, but was requested by ' 'loc::tech `{}`.'.format(column, filename, loc_tech)) if key_errors: exceptions.print_warnings_and_raise_errors(errors=key_errors) timeseries_data_series = pd.DataFrame(index=filenames.index, columns=data.timesteps.values, data=timeseries_data).stack() timeseries_data_series.index.rename('timesteps', -1, inplace=True) # 7) Add time dimension to the relevent DataArray and update the '=' # dimensions with the time varying data (static data is just duplicated # at each timestep) timeseries_data_array = xr.broadcast(data[variable], data.timesteps)[0].copy() timeseries_data_array.loc[xr.DataArray.from_series( timeseries_data_series).coords] = xr.DataArray.from_series( timeseries_data_series).values # 8) assign correct dtype (might be string/object accidentally) # string 'nan' to NaN: array_to_check = timeseries_data_array.where( timeseries_data_array != 'nan', drop=True) timeseries_data_array = timeseries_data_array.where( timeseries_data_array != 'nan') if ((array_to_check == 'True') | (array_to_check == '1') | (array_to_check == 'False') | (array_to_check == '0')).all().item(): # Turn to bool timeseries_data_array = ((timeseries_data_array == 'True') | (timeseries_data_array == '1')).copy() else: try: timeseries_data_array = timeseries_data_array.astype( np.float, copy=False) except ValueError: None data[variable] = timeseries_data_array # Add timestep_resolution by looking at the time difference between timestep n # and timestep n + 1 for all timesteps time_delta = (data.timesteps.shift(timesteps=-1) - data.timesteps).to_series() # Last timestep has no n + 1, so will be NaT (not a time), # we duplicate the penultimate time_delta instead time_delta[-1] = time_delta[-2] time_delta.name = 'timestep_resolution' # Time resolution is saved in hours (i.e. seconds / 3600) data['timestep_resolution'] = (xr.DataArray.from_series( time_delta.dt.total_seconds() / 3600)) data['timestep_weights'] = xr.DataArray(np.ones(len(data.timesteps)), dims=['timesteps']) return data
def first_run( da: xr.DataArray, window: int, dim: str = "time", coord: Optional[Union[str, bool]] = False, ufunc_1dim: Union[str, bool] = "auto", ) -> xr.DataArray: """Return the index of the first item of the first run of at least a given length. Parameters ---------- da : xr.DataArray Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive run to accumulate values. dim : str Dimension along which to calculate consecutive run (default: 'time'). coord : Optional[str] If not False, the function returns values along `dim` instead of indexes. If `dim` has a datetime dtype, `coord` can also be a str of the name of the DateTimeAccessor object to use (ex: 'dayofyear'). ufunc_1dim : Union[str, bool] Use the 1d 'ufunc' version of this function : default (auto) will attempt to select optimal usage based on number of data points. Using 1D_ufunc=True is typically more efficient for dataarray with a small number of gridpoints. Returns ------- xr.DataArray Index (or coordinate if `coord` is not False) of first item in first valid run. Returns np.nan if there are no valid runs. """ if ufunc_1dim == "auto": if isinstance(da.data, dsk.Array) and len(da.chunks[da.dims.index(dim)]) > 1: ufunc_1dim = False else: npts = get_npts(da) ufunc_1dim = npts <= npts_opt da = da.fillna( 0) # We expect a boolean array, but there could be NaNs nonetheless if ufunc_1dim: out = first_run_ufunc(x=da, window=window, dim=dim) else: da = da.astype("int") i = xr.DataArray(np.arange(da[dim].size), dims=dim) ind = xr.broadcast(i, da)[0].transpose(*da.dims) if isinstance(da.data, dsk.Array): ind = ind.chunk(da.chunks) wind_sum = da.rolling(time=window).sum(skipna=False) out = ind.where(wind_sum >= window).min(dim=dim) - (window - 1) # remove window - 1 as rolling result index is last element of the moving window if coord: crd = da[dim] if isinstance(coord, str): crd = getattr(crd.dt, coord) out = lazy_indexing(crd, out) if dim in out.coords: out = out.drop_vars(dim) return out