def def_sponge_dampingtimescale_north(Y,sponge_width,idampval): '''Define a sponge grid at the north of the domain based on horizontal grid shape. hgrid is the horizontal grid dataset sponge_width is the degrees of lat to damp over [must be a list, progressively decreasing in width] idampval is the inverse damping rate (in s-1) [must be a list] ''' idamp = xr.zeros_like(Y) for i in range(len(sponge_width)): sponge_region = Y>Y.max(xr.ALL_DIMS)-sponge_width[i] idamp=idamp+xr.zeros_like(Y).where(~sponge_region,idampval[i]) return idamp
def def_sponge_damping_linear_north(Y, sponge_width, idampval_max): '''Define a sponge grid at the north of the domain based on horizontal grid shape. hgrid is the horizontal grid dataset sponge_width is the degrees of lat to damp over and idampval is the inverse damping rate (in s-1) The function prescribes a linear inverse damping rate and it decays from maximum at the northern boundary to 0 at end of the sponge width region. ''' idamp = xr.zeros_like(Y) sponge_region = Y > Y.max(xr.ALL_DIMS) - sponge_width idamp = idamp + xr.zeros_like(Y).where(~sponge_region, idampval_max) idamp = idamp * (Y - Y.max(xr.ALL_DIMS) + sponge_width) / sponge_width return idamp
def reduce_chunked(self, xs, output): """Computes the skew across a chunk Parameters ---------- xs : iterable Iterable of sources Returns ------- UnitsDataArray Skew of the source data over dims """ N = xr.zeros_like(output) M1 = xr.zeros_like(output) M2 = xr.zeros_like(output) M3 = xr.zeros_like(output) check_empty = True for x in xs: Nx = np.isfinite(x).sum(dim=self._dims) M1x = x.mean(dim=self._dims) Ex = x - M1x Ex2 = Ex**2 Ex3 = Ex2 * Ex M2x = (Ex2).sum(dim=self._dims) M3x = (Ex3).sum(dim=self._dims) # premask to omit NaNs b = Nx.data > 0 Nx = Nx.data[b] M1x = M1x.data[b] M2x = M2x.data[b] M3x = M3x.data[b] Nb = N.data[b] M1b = M1.data[b] M2b = M2.data[b] # merge d = M1x - M1b n = Nb + Nx NNx = Nb * Nx M3.data[b] += (M3x + d**3 * NNx * (Nb - Nx) / n**2 + 3 * d * (Nb * M2x - Nx * M2b) / n) M2.data[b] += M2x + d**2 * NNx / n M1.data[b] += d * Nx / n N.data[b] = n # calculate skew skew = np.sqrt(N) * M3 / np.sqrt(M2**3) return skew
def _invert_from_model_any(inc, sigma0_co_db, sigma0_cr_db, dsig_cr, ancillary_wind): # wrapper to allow computation on any type (xarray, numpy) try: # if input is xarray, will return xarray da_ws_co = xr.zeros_like(sigma0_co_db, dtype=np.complex128) da_ws_co.name = 'windspeed_gmf' da_ws_co.attrs.clear() da_ws_cr = xr.zeros_like(sigma0_co_db, dtype=np.float64) da_ws_cr.name = 'windspeed_gmf' da_ws_cr.attrs.clear() try: # if dask array, use map_blocks # raise ImportError import dask.array as da if any( [ isinstance(v.data, da.Array) for v in [inc, sigma0_co_db, sigma0_cr_db, dsig_cr, ancillary_wind] ] ): da_ws_co.data, da_ws_cr.data = da.apply_gufunc( _invert_from_model_numpy, '(n),(n),(n),(n),(n)->(n),(n)', inc.data, sigma0_co_db.data, sigma0_cr_db.data, dsig_cr.data, ancillary_wind.data ) logger.debug('invert with map_blocks') else: raise TypeError except (ImportError, TypeError): # use numpy array, but store in xarray da_ws_co.data, da_ws_cr.data = _invert_from_model_numpy( np.asarray(inc), np.asarray(sigma0_co_db), np.asarray(sigma0_cr_db), np.asarray(dsig_cr), np.asarray(ancillary_wind), ) logger.debug('invert with xarray.values. no chunks') except TypeError: # full numpy logger.debug('invert with numpy') da_ws_co, da_ws_cr = _invert_from_model_numpy( inc, sigma0_co_db, sigma0_cr_db, dsig_cr, ancillary_wind ) return da_ws_co, da_ws_cr
def main (era_filesearch, cesm_base_filesearch, bias_output): print("opening data") era_data = xr.open_mfdataset(era_filesearch, concat_dim='time') base_cesm_data = xr.open_mfdataset(cesm_base_filesearch, concat_dim='time') print("loading data") era_data.load() base_cesm_data.load() print("compute means") emean = era_data.std(dim="time") cmean = base_cesm_data.std(dim="time") print("creating data") interpolated_era = xr.zeros_like(cmean) print("loading data") interpolated_era.load() z_interp_all_vars(emean, interpolated_era, era_data["z"].mean(dim="time"), base_cesm_data["z"].mean(dim="time"), vars_to_correct) interpolated_era.to_netcdf("era_interpolated_std.nc") print("Computing Bias") bias = interpolated_era - cmean print("writing") bias.to_netcdf(bias_output)
def construct_knn_graph( cls, data, dist_mat, k: int, cell_properties: Union[bool, Sequence[str]] = False, cell_channel_properties: Union[bool, Sequence[str]] = False ) -> 'SpatialCellGraph': """Constructs a new k-nearest cell neighbor graph :param data: single-cell data (rows: cell IDs, columns: feature names) :type data: SingleCellData or DataFrame-like :param dist_mat: symmetric distance matrix, shape: ``(cells, cells)`` :type dist_mat: DataArray-like :param k: number of nearest neighbors for the graph construction :param cell_properties: list of cell properties (e.g. regionprops) to include as node attributes; set to ``True`` to include all :param cell_channel_properties: list of cell channel properties (e.g. intensity values) to include as node attributes; set to ``True`` to include all :return: a directed k-nearest cell neighbor graph """ data, dist_mat = cls._prepare_data(data, dist_mat, cell_properties, cell_channel_properties) adj_mat = xr.zeros_like(dist_mat, dtype='bool') knn_indices = np.argpartition(dist_mat.values, k + 1, axis=1)[:, :(k + 1)] for current_index, current_knn_indices in enumerate(knn_indices): adj_mat[current_index, current_knn_indices] = True np.fill_diagonal(adj_mat.values, False) return SpatialCellGraph(data, adj_mat, _skip_data_preparation=True)
def _regrid_given_delp( ds, delp_fine, delp_coarse, weights, x_dim: str = FV_CORE_X_CENTER, y_dim: str = FV_CORE_Y_CENTER, z_dim: str = RESTART_Z_CENTER, ): """Given a fine and coarse delp, do vertical regridding to coarse pressure levels and mask weights below fine surface pressure. """ delp_coarse_on_fine = block_upsample_like( delp_coarse, delp_fine, x_dim=x_dim, y_dim=y_dim ) phalf_coarse_on_fine = pressure_at_interface( delp_coarse_on_fine, dim_center=z_dim, dim_outer=RESTART_Z_OUTER ) phalf_fine = pressure_at_interface( delp_fine, dim_center=z_dim, dim_outer=RESTART_Z_OUTER ) ds_regrid = xr.zeros_like(ds) for var in ds: ds_regrid[var] = regrid_vertical( phalf_fine, ds[var], phalf_coarse_on_fine, z_dim_center=z_dim ) masked_weights = _mask_weights( weights, phalf_coarse_on_fine, phalf_fine, dim_center=z_dim ) return ds_regrid, masked_weights
def compute_climatology(ds, monthValues, calendar=None, maskVaries=True): # {{{ """ Compute a monthly, seasonal or annual climatology data set from a data set. The mean is weighted but the number of days in each month of the data set, ignoring values masked out with NaNs. If the month coordinate is not present, a data array ``month`` will be added based on ``Time`` and the provided calendar. Parameters ---------- ds : ``xarray.Dataset`` or ``xarray.DataArray`` object A data set with a ``Time`` coordinate expressed as days since 0001-01-01 or ``month`` coordinate monthValues : int or array-like of ints A single month or an array of months to be averaged together calendar : ``{'gregorian', 'gregorian_noleap'}``, optional The name of one of the calendars supported by MPAS cores, used to determine ``month`` from ``Time`` coordinate, so must be supplied if ``ds`` does not already have a ``month`` coordinate or data array maskVaries: bool, optional If the mask (where variables in ``ds`` are ``NaN``) varies with time. If not, the weighted average does not need make extra effort to account for the mask. Most MPAS fields will have masks that don't vary in time, whereas observations may sometimes be present only at some times and not at others, requiring ``maskVaries = True``. Returns ------- climatology : object of same type as ``ds`` A data set without the ``'Time'`` coordinate containing the mean of ds over all months in monthValues, weighted by the number of days in each month. Authors ------- Xylar Asay-Davis Last Modified ------------- 04/08/2017 """ ds = add_years_months_days_in_month(ds, calendar) mask = xr.zeros_like(ds.month, bool) for month in monthValues: mask = xr.ufuncs.logical_or(mask, ds.month == month) climatologyMonths = ds.where(mask, drop=True) climatology = _compute_masked_mean(climatologyMonths, maskVaries) return climatology # }}}
def plot(G): # Don't plot first or last bin (expanded to capture full range) G = G.isel(sigma0=slice(1, -1)) levs = G["sigma0"].values # Take annual mean and load G = G.mean("time").load() # Get terms in dataset terms = list(G.data_vars) fig, ax = plt.subplots() # Plot each term for term in terms: if term == "heat": color = "tab:red" elif term == "salt": color = "tab:blue" else: color = "k" ax.plot(levs, G[term], label=term, color=color) # If terms were not grouped then sum them up to get total if len(terms) > 1: total = xr.zeros_like(G[terms[0]]) for term in terms: total += G[term] ax.plot(levs, total, label="total", color="k") ax.legend() ax.set_xlabel("SIGMA0") ax.set_ylabel("TRANSFORMATION ($m^3s^{-1}$)") ax.autoscale(enable=True, axis="x", tight=True) return fig
def day_number_to_date_mars_model(ls_in, calendar_type='none', units_in='days since 0000-00-0 00:00:00'): year_values = xar.zeros_like(ls_in) my_temp = 1 ls_previous = 0. dodgy_ls_list = [] for i in range(len(ls_in.squeeze().values) - 1): if ls_in[i] - ls_previous > 0. and ls_in[i + 1] - ls_in[i] > 0.: year_values[i] = my_temp elif ls_in[i] - ls_previous < 0. and ls_in[i + 1] - ls_in[i] > 0.: year_values[i] = my_temp elif ls_in[i] - ls_previous > 0. and ls_in[i + 1] - ls_in[i] < 0.: year_values[i] = my_temp elif ls_in[i] - ls_previous < 0. and ls_in[i + 1] - ls_in[i] < 0.: dodgy_ls_list.append(i) my_temp = my_temp + 1 year_values[i] = my_temp ls_previous = ls_in[i] year_values[-1] = my_temp ls_in[dodgy_ls_list] = 0. dayofyear_values = np.floor(ls_in) month_values = np.mod(np.ceil((ls_in / 30.) - 0.5) + 3., 12) cdftime = cdftime_mars(dayofyear_values, month_values, year_values) return cdftime, ls_in
def test_merge_into_oceandataset(): # da without name da = od_in.dataset['XC'] * od_in.dataset['YC'] with pytest.raises(ValueError) as e: od_out = od_in.merge_into_oceandataset(da) assert str( e.value ) == "xarray.DataArray doesn't have a name. Set it using da.rename()" # da different name da = da.rename('test') od_out = od_in.merge_into_oceandataset(da) assert od_out.dataset['test'].equals(da) # ds ds = xr.merge([da.rename('test1'), da.rename('test2')]) od_out = od_in.merge_into_oceandataset(ds) assert set(['test1', 'test2']).issubset(od_out.dataset.variables) # da da = xr.zeros_like(od_in.dataset['XC']) with pytest.warns(UserWarning): od_out = od_in.merge_into_oceandataset(da) with pytest.warns(UserWarning): od_out = od_in.merge_into_oceandataset(da, overwrite=True)
def _column_dq1(ds: xr.Dataset) -> xr.DataArray: if "net_heating_due_to_machine_learning" in ds: warnings.warn( "'net_heating_due_to_machine_learning' is a deprecated variable name. " "It will not be supported in future versions of fv3net. Use " "'column_heating_due_to_machine_learning' instead.", DeprecationWarning, ) # fix isochoric vs isobaric transition issue column_dq1 = 716.95 / 1004 * ds.net_heating_due_to_machine_learning elif "net_heating" in ds: warnings.warn( "'net_heating' is a deprecated variable name. " "It will not be supported in future versions of fv3net. Use " "'column_heating_due_to_machine_learning' instead.", DeprecationWarning, ) # fix isochoric vs isobaric transition issue column_dq1 = 716.95 / 1004 * ds.net_heating elif "column_heating_due_to_machine_learning" in ds: column_dq1 = ds.column_heating_due_to_machine_learning elif "storage_of_internal_energy_path_due_to_machine_learning" in ds: column_dq1 = ds.storage_of_internal_energy_path_due_to_machine_learning else: # assume given dataset is for a baseline or verification run column_dq1 = xr.zeros_like(ds.PRATEsfc) column_dq1.attrs = { "long_name": "<dQ1> column integrated heating from ML", "units": "W/m^2", } return column_dq1.rename("column_integrated_dQ1")
def test__bin_stats(ds): from sciapy.level2.binning import _bin_stats _ds = ds.copy() _ds["latitude"] = xr.zeros_like(_ds.latitude) # binning result avg_aw = _bin_stats( _ds, binvar="latitude", tvar="time", area_weighted=True, ) avg_nw = _bin_stats( _ds, binvar="latitude", tvar="time", area_weighted=False, ) xr.testing.assert_allclose(avg_nw, avg_aw) # non-weighted mean using standard functions dims = ("latitude", "time") stacked = "__stacked__" _ds = _ds.stack(**{stacked: dims}) ds_avg = _ds.mean(dim=stacked) ds_cnt = _ds.count(dim=stacked) ds_std = _ds.std(dim=stacked, ddof=1) ds_std = ds_std.rename({v: v + "_std" for v in ds_std.data_vars}) ds_cnt = ds_cnt.rename({v: v + "_cnt" for v in ds_cnt.data_vars}) avg_ds = xr.merge([ds_avg, ds_std, ds_cnt]) # Re-create the sum of squared weights _ws = xr.ones_like(_ds.latitude, dtype=float) _ws /= _ws.sum(dim=stacked) avg_ds["wsqsum"] = (_ws**2).sum(dim=stacked) xr.testing.assert_allclose(avg_nw, avg_ds)
def _column_nq1(ds: xr.Dataset) -> xr.DataArray: if "column_heating_nudge" in ds: # name for column integrated temperature nudging in nudge-to-obs column_nq1 = ds.column_heating_nudge elif "int_t_dt_nudge" in ds: # name for column-integrated temperature nudging in X-SHiELD runs column_nq1 = ds.int_t_dt_nudge elif "net_heating_due_to_nudging" in ds: # old name for column integrated temperature nudging in nudge-to-fine warnings.warn( "'net_heating_due_to_nudging' is a deprecated variable name. " "It will not be supported in future versions of fv3net. Use " "'column_heating_due_to_nudging' instead.", DeprecationWarning, ) # fix isochoric vs isobaric transition issue column_nq1 = 716.95 / 1004 * ds.net_heating_due_to_nudging elif "column_heating_due_to_nudging" in ds: column_nq1 = ds.column_heating_due_to_nudging else: # assume given dataset is for a run without temperature nudging column_nq1 = xr.zeros_like(ds.PRATEsfc) column_nq1.attrs = { "long_name": "<nQ1> column integrated heating from nudging", "units": "W/m^2", } return column_nq1.rename("column_integrated_nQ1")
def wrapper(*args, **kwargs): fn.utils.assert_isdarray(args[0]) fn.utils.assert_isdarray(args[1]) Ton, Tref = args[:2] Tout = xr.zeros_like(Ton) refids = np.unique(Tref.scanid) onids = np.unique(Ton.scanid) for onid in tqdm(onids): # _f denotes former REF (before ON) # _l denotes latter REF (after ON) index = np.searchsorted(refids, onid) if index == 0: index_f = index_l = 0 elif index == len(refids): index_f = index_l = len(refids)-1 else: index_f, index_l = index-1, index index_on = (Ton.scanid == onid) index_ref = ((Tref.scanid == refids[index_f]) | (Tref.scanid == refids[index_l])) Ton_ = Ton[index_on] Tref_ = Tref[index_ref] Tout_ = func(Ton_, Tref_, *args[2:], **kwargs) assert Tout_.shape == Ton_.shape Tout[index_on] = Tout_ return Tout
def apply_binary_mask(times, dep_lat, dep_lon, mask, reverse=True): array_list = [] origins = xr.zeros_like(mask) for i in numba.prange(times.shape[0]): time = times[i] dep_lat_, dep_lon_ = dep_lat.sel(time=time).copy(), dep_lon.sel( time=time).copy() dep_lat_nan = np.isnan(dep_lat_.values.flatten()) dep_lon_nan = np.isnan(dep_lon_.values.flatten()) assert (dep_lat_nan == dep_lon_nan).all(), "This should not happen!" dep_lat_no_nan = dep_lat_.values.flatten()[~dep_lat_nan] dep_lon_no_nan = dep_lon_.values.flatten()[~dep_lon_nan] points = [x for x in zip(dep_lat_no_nan, dep_lon_no_nan)] landsea = list() for point in points: landsea.append( mask.sel(latitude=point[0], longitude=point[1], method='nearest').values) origins.sel(latitude=point[0], longitude=point[1], method='nearest').values += 1 vals = dep_lat_.values if reverse: vals[~np.isnan(vals)] = [0 if x == 1 else 1 for x in landsea ] # switching sea breeze to 1 else: vals[~np.isnan(vals)] = [x for x in landsea] array_list.append(vals) print("Done time {}".format(time)) return array_list, origins
def compute_by_block(dsx): """ """ # determine index key for each chunk slices = [] for chunks in dsx.chunks: L = [ 0, ] + list(np.cumsum(chunks)) slices.append([slice(a, b) for a, b in (zip(L[:-1], L[1:]))]) indexes = list(product(*slices)) # allocate memory to receive result if isinstance(dsx, xr.DataArray): result = xr.zeros_like(dsx).load() else: result = np.zeros(dsx.shape) #evaluate each chunk one at a time for index in tqdm_notebook(indexes, leave=False): block = dsx.__getitem__(index).compute() result.__setitem__(index, block) return result
def create_area_grid(da, res=0.1): da_area = xr.zeros_like(da) da_area.attrs = {'long_name': 'area', 'units': 'ha'} da_area.name = 'area' for lat in da_area.lat.values: da_area.loc[{'lat': lat}] = calc_area(lat, res) return da_area
def ismatch(dataarray: xr.DataArray, pattern: Union[Pattern, str], flags: re.RegexFlag = 0) -> xr.DataArray: """Test whether each string in a DataArray matches a regex pattern. Args: dataarray: String DataArray to be compared. pattern: String or compiled regex pattern. flags: Regex flags to control the matching behavior. Returns: Boolean DataArray each value of which is ``True`` where it matches the pattern and ``False`` otherwise. Raises: TypeError: Raised if ``dataarray.dtype`` is not string-like. """ if not np.issubdtype(dataarray.dtype, np.str_): raise TypeError("Can only be used for string DataArray.") pattern = re.compile(pattern, flags) search = np.vectorize(lambda string: pattern.search(string)) result = xr.zeros_like(dataarray, bool) result.values = search(dataarray.values).astype(bool) return result
def process_per_timestep(dset, flexdust_ds,x0,x1,y0,y1, height=None): dset = dset.sel(lon=slice(x0,x1), lat=slice(y0,y1)) #interpolate flexdust to match flexpart coordinates flexdust_ds = flexdust_ds.interp({'lon':dset.lon,'lat':dset.lat}) if height == None: height = dset.height.values else: height = height scale_factor = (1/height)*1000 print('creating output array') out_data = xr.zeros_like(dset['spec001_mr']) for i in range(len(out_data.time)): temp_data = dset['spec001_mr'].isel(time=i) time_steps = temp_data.time + temp_data.btime emission_field = flexdust_ds['Emission'].sel(time=time_steps) out_data[i] = temp_data.values*emission_field.values*scale_factor print('finish emsfield*sensitvity') surface_sensitivity = dset['spec001_mr'] iedate_stamp = dset.time[-1] ibdate_stamp = dset.time[0] dset.attrs['iedate'] = str(iedate_stamp.dt.strftime('%Y%m%d').values) dset.attrs['ietime'] = str(iedate_stamp.dt.strftime('%H%M%S').values) dset.attrs['ibdate'] = str(ibdate_stamp.dt.strftime('%Y%m%d').values) dset.attrs['ibtime'] = str(ibdate_stamp.dt.strftime('%H%M%S').values) return dset, out_data, surface_sensitivity
def distance( self, direction: str, x1: LabeledArray, x2: LabeledArray, t: LabeledArray, ) -> LabeledArray: """Implementation of calculation of physical distances between points in this coordinate system. This accounts for potential toroidal skew of lines. """ c = np.ceil(x1).astype(int) f = np.floor(x1).astype(int) x_s = (self.x_start[c] - self.x_start[f]) * (x1 - f) + self.x_start[f] x_e = (self.x_end[c] - self.x_end[f]) * (x1 - f) + self.x_end[f] z_s = (self.z_start[c] - self.z_start[f]) * (x1 - f) + self.z_start[f] z_e = (self.z_end[c] - self.z_end[f]) * (x1 - f) + self.z_end[f] y_s = (self.y_start[c] - self.y_start[f]) * (x1 - f) + self.y_start[f] y_e = (self.y_end[c] - self.y_end[f]) * (x1 - f) + self.y_end[f] x = x_s + (x_e - x_s) * x2 y = y_s + (y_e - y_s) * x2 z = z_s + (z_e - z_s) * x2 spacings = np.sqrt( x.diff(direction)**2 + z.diff(direction)**2 + y.diff(direction)**2) result = zeros_like(x) result[{direction: slice(1, None)}] = spacings.cumsum(direction) return result
def load_turbines(decommissioned=True, replace_nan_values="mean"): """Load list of all turbines from CSV file. Includes location, capacity, etc. Missing values are replaced with NaN values. The file uswtdb_v1_2_20181001.xml contains more information about the fields. Parameters ---------- decommissioned : bool if True merge datasets from official CSV with Excel sheet received via e-mail replace_nan_values : str use data imputation to set missing values for turbine diameters and hub heights, set to "" to disable Returns ------- xr.DataSet """ turbines_dataframe = pd.read_csv(INPUT_DIR / "wind_turbines_usa" / "uswtdb_v3_0_1_20200514.csv") # TODO is this really how it is supposed to be done? turbines_dataframe.index = turbines_dataframe.index.rename("turbines") turbines = xr.Dataset.from_dataframe(turbines_dataframe) # Lets not use the turbine on Guam (avoids a huge bounding box for the USA) neglected_capacity_kw = turbines.sel( turbines=turbines.xlong >= 0).t_cap.sum() assert (neglected_capacity_kw == 275 ), f"unexpected total capacity filtered: {neglected_capacity_kw}" turbines = turbines.sel(turbines=turbines.xlong < 0) turbines = turbines.set_index(turbines="case_id") turbines["is_decomissioned"] = xr.zeros_like(turbines.p_year, dtype=np.bool) if not decommissioned: return turbines turbines_decomissioned = pd.read_excel( INPUT_DIR / "wind_turbines_usa" / "decom_clean_032520.xlsx", engine="openpyxl", ) turbines_decomissioned = xr.Dataset(turbines_decomissioned).rename( dim_0="turbines") turbines_decomissioned = turbines_decomissioned.set_index( turbines="case_id") turbines = xr.merge((turbines, turbines_decomissioned)) turbines["is_decomissioned"] = turbines.decommiss == "yes" turbines = turbines.drop_vars("decommiss") if replace_nan_values: turbines = estimate_missing(turbines, method=replace_nan_values) turbines = turbines.chunk(CHUNK_SIZE_TURBINES) return turbines
def estimate_baseline(T_cal, order=1, weight=None): """Estimate polynomial baseline of each sample.""" freq = T_cal.ch - T_cal.ch.mean() n_freq, n_poly = len(freq), order + 1 # make design matrix X = np.zeros([n_freq, n_poly]) for i in range(n_poly): poly = freq**i X[:, i] = poly / np.linalg.norm(poly) y = T_cal.values.T # estimate coeffs by solving linear regression problem if weight is None: weight = 1.0 model = LinearRegression(fit_intercept=False) model.fit(X, y, sample_weight=weight) # estimate baseline T_base = xr.zeros_like(T_cal) + model.coef_ @ X.T for i in range(n_poly): T_base.coords[f"basis_{i}"] = "ch", X[:, i] T_base.coords[f"coeff_{i}"] = "t", model.coef_[:, i] return T_base
def get_D_KL_from_xarray(da_P_X_Y, da_P_X, da_P_Y): """ base 10 : Mutual information of I_matrix = xr.apply_ufunc(func_D_KL, P_X_Y, P_X, P_Y) return I_matrix.sum() """ da_log2 = xr.zeros_like(da_P_X_Y) import itertools str_dim_x = da_P_X.dims[0] str_dim_y = da_P_Y.dims[0] for realiz_id_x, realiz_id_y in itertools.product( da_P_X_Y[str_dim_x].values, da_P_X_Y[str_dim_y].values): p_xy = da_P_X_Y.loc[{str_dim_x: realiz_id_x, str_dim_y: realiz_id_y}] p_x = da_P_X.loc[{str_dim_x: realiz_id_x}] p_y = da_P_Y.loc[{str_dim_y: realiz_id_y}] log_p_xy_over_p_x_p_y = ufunc_log_pxy_over_px_py(p_xy, p_x, p_y) da_log2.loc[{ str_dim_x: realiz_id_x, str_dim_y: realiz_id_y }] = log_p_xy_over_p_x_p_y # da_log2.loc[{str_dim_x:realiz_id_x, str_dim_y:realiz_id_y}] = # print("da_log2: ", da_log2) # print("da_P_X_Y: ", da_P_X_Y) mutual_information = xr.dot(da_P_X_Y, da_log2) print("mutual_information (", str_dim_x, ", ", str_dim_y, "): ", mutual_information.values) return mutual_information
def calc_correlation_field(xda, mask, dimlist=['Z', 'YC'], n_shift=15, mask_in_betweens=False): """calculate the correlation field for each shifted distance Parameters ---------- xda : xarray.DataArray The field to compute correlations on, over the 'sample' dimension mask : xarra.DataArray True/False inside/outside of domain dimlist : list of str denoting dimensions to compute shifted correlations n_shift : int number of shifts to do mask_in_betweens : bool, optional if True, then if there is a portion of the domain such that for a particular dimension, there is a gap between two points, ignore all points with larger correlation length than where the gap occurs doesn't affect results much """ xds = xr.Dataset() shifty = np.arange(-n_shift, n_shift + 1) shifty = xr.DataArray(shifty, coords={'shifty': shifty}, dims=('shifty', )) xds['shifty'] = shifty for dim in dimlist: corrfld = f'corr_{dim.lower()}' template = xda.isel(sample=0).drop('sample') xds[corrfld] = xr.zeros_like(shifty * template) x_deviation = (xda - xda.mean('sample')).where(mask) x_ssr = np.sqrt((x_deviation**2).sum('sample')) for s in shifty.values: y_deviation = x_deviation.shift({dim: s}) numerator = (x_deviation * y_deviation).sum('sample') y_ssr = np.sqrt((y_deviation**2).sum('sample')) denominator = x_ssr * y_ssr xds[corrfld].loc[{'shifty': s}] = numerator / denominator if mask_in_betweens: for dim in dimlist: corrfld = f'corr_{dim.lower()}' for s in shifty.values: if s < 0: bigger_than = shifty < s else: bigger_than = shifty > s imnan = np.isnan(xds[corrfld].sel(shifty=s)) xds[corrfld] = xr.where(bigger_than * imnan, np.nan, xds[corrfld]) return xds
def test_zero_risk_error(self): participants, incidence_scenarios = ( sim_test_util.participants_and_forecast()) c = sim_test_util.c_to_test_events() c['incidence_scaler'] = xr.zeros_like(c.incidence_scaler) with self.assertRaisesRegex(ValueError, 'impossible to account for incidence!'): sim.control_arm_events(c, participants, incidence_scenarios)
def filter_months(data_array, month_list): # To define in GeoDataArray !!!!and GeoDS!!!! if month_list is not None: condition = xr.zeros_like(data_array.t) for i in range(len(data_array.t)): condition[i] = data_array.t[i].values[()].month in util.months_to_number(month_list) data_array = data_array.where(condition, drop=True) return data_array
def crop_months(self, new_month_list): condition = xr.zeros_like(self.data.t) for i in range(len(self.data.t)): condition[i] = self.data.t[i].values[()].month in util.months_to_number(new_month_list) self.data = self.data.where(condition, drop=True) self.months = new_month_list print("____ Data cropped to the new month list.") return self
def strategy(data): close = data['futures'].sel(field="close") commodity = data['commodity'] if commodity.isel(time=-1) > commodity.isel(time=-2) and close.isel( time=-1) > close.isel(time=-20): return xr.ones_like(close.isel(time=-1)) else: return xr.zeros_like(close.isel(time=-1))
def gdf_to_model_dataset(model_ds, gdf, modelgrid, name, gridtype='structured'): """ create 3 data-arrays from a geodataframe with oppervlaktewater: - area: with the area of the geodataframe in the cell - cond: with the conductance based on the area and bweerstand column in gdf - peil: with the surface water lvl based on the peil column in the gdf Parameters ---------- model_ds : xr.DataSet xarray with model data gdf : geopandas.GeoDataFrame polygon shapes with surface water. modelgrid : flopy grid model grid. name : str name of the polygon shapes, name is used to store data arrays in model_ds Returns ------- model_ds : xarray.Dataset dataset with modelgrid data. Has """ area = xr.zeros_like(model_ds['top']) cond = xr.zeros_like(model_ds['top']) peil = xr.zeros_like(model_ds['top']) for i, row in gdf.iterrows(): area_pol = mgrid.polygon_to_area(modelgrid, row['geometry'], xr.ones_like(model_ds['top']), gridtype) cond = xr.where(area_pol > area, area_pol / row['bweerstand'], cond) peil = xr.where(area_pol > area, row['peil'], peil) area = xr.where(area_pol > area, area_pol, area) model_ds_out = util.get_model_ds_empty(model_ds) model_ds_out[f'{name}_area'] = area model_ds_out[f'{name}_cond'] = cond model_ds_out[f'{name}_peil'] = peil return model_ds_out
def test_binarize(): binarize_spec = Preprocessing(name="binarize", kwargs={"threshold": 14}) data = xr.DataArray(np.arange(30).reshape(2, 3, 5), dims=("x", "y", "c")) expected = xr.zeros_like(data) expected[{"x": slice(1, None)}] = 1 preprocessing = make_preprocessing([binarize_spec]) result = preprocessing(data) xr.testing.assert_allclose(expected, result)
def test_inversion(self): # Download the RGI file for the run # Make a new dataframe of those rgidf = gpd.read_file(get_demo_file('SouthGlacier.shp')) # Go - initialize working directories gdirs = workflow.init_glacier_regions(rgidf) # Preprocessing tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.process_cru_data, tasks.local_t_star, tasks.mu_star_calibration, ] for task in task_list: execute_entity_task(task, gdirs) # Inversion tasks execute_entity_task(tasks.prepare_for_inversion, gdirs) # We use the default parameters for this run execute_entity_task(tasks.mass_conservation_inversion, gdirs) execute_entity_task(tasks.distribute_thickness_per_altitude, gdirs, varname_suffix='_alt') execute_entity_task(tasks.distribute_thickness_interp, gdirs, varname_suffix='_int') # Reference data gdir = gdirs[0] df = self.get_ref_data(gdir) with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: v = ds.distributed_thickness_alt df['oggm_alt'] = v.isel(x=('z', df['i']), y=('z', df['j'])) v = ds.distributed_thickness_int df['oggm_int'] = v.isel(x=('z', df['i']), y=('z', df['j'])) ds['ref'] = xr.zeros_like(ds.distributed_thickness_int) * np.NaN ds['ref'].data[df['j'], df['i']] = df['thick'] rmsd_int = ((df.oggm_int - df.thick) ** 2).mean() ** .5 rmsd_alt = ((df.oggm_int - df.thick) ** 2).mean() ** .5 assert rmsd_int < 80 assert rmsd_alt < 80 dfm = df.mean() np.testing.assert_allclose(dfm.thick, dfm.oggm_int, 50) np.testing.assert_allclose(dfm.thick, dfm.oggm_alt, 50) if do_plot: import matplotlib.pyplot as plt df.plot(kind='scatter', x='oggm_int', y='thick') plt.axis('equal') df.plot(kind='scatter', x='oggm_alt', y='thick') plt.axis('equal') f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 3)) ds.ref.plot(ax=ax1) ds.distributed_thickness_int.plot(ax=ax2) ds.distributed_thickness_alt.plot(ax=ax3) plt.tight_layout() plt.show()
def test_optimize_inversion(self): # Download the RGI file for the run # Make a new dataframe of those rgidf = gpd.read_file(get_demo_file('SouthGlacier.shp')) # Go - initialize working directories gdirs = workflow.init_glacier_regions(rgidf) # Preprocessing tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.process_cru_data, tasks.local_t_star, tasks.mu_star_calibration, ] for task in task_list: execute_entity_task(task, gdirs) # Reference data gdir = gdirs[0] df = self.get_ref_data(gdir) # Inversion tasks execute_entity_task(tasks.prepare_for_inversion, gdirs) glen_a = cfg.PARAMS['inversion_glen_a'] fs = cfg.PARAMS['inversion_fs'] def to_optimize(x): tasks.mass_conservation_inversion(gdir, glen_a=glen_a * x[0], fs=fs * x[1]) tasks.distribute_thickness_per_altitude(gdir) with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: thick = ds.distributed_thickness.isel(x=('z', df['i']), y=('z', df['j'])) out = (np.abs(thick - df.thick)).mean() return out opti = optimization.minimize(to_optimize, [1., 1.], bounds=((0.01, 10), (0.01, 10)), tol=0.1) # Check results and save. execute_entity_task(tasks.mass_conservation_inversion, gdirs, glen_a=glen_a*opti['x'][0], fs=0) execute_entity_task(tasks.distribute_thickness_per_altitude, gdirs) with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: df['oggm'] = ds.distributed_thickness.isel(x=('z', df['i']), y=('z', df['j'])) ds['ref'] = xr.zeros_like(ds.distributed_thickness) * np.NaN ds['ref'].data[df['j'], df['i']] = df['thick'] rmsd = ((df.oggm - df.thick) ** 2).mean() ** .5 assert rmsd < 60 dfm = df.mean() np.testing.assert_allclose(dfm.thick, dfm.oggm, 10) if do_plot: import matplotlib.pyplot as plt df.plot(kind='scatter', x='oggm', y='thick') plt.axis('equal') f, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 3)) ds.ref.plot(ax=ax1) ds.distributed_thickness.plot(ax=ax2) plt.tight_layout() plt.show()