def _get_section(self, stp: Tuple[float, float], enp: Tuple[float, float], spacing: int) -> Dataset: r_sec = list() h_sec = list() for x, y, h, r in zip(self.x, self.y, self.h, self.r): d_x = DataArray(r, [("lat", y), ("lon", x)]) d_h = DataArray(h, [("lat", y), ("lon", x)]) x_new = DataArray(np.linspace(stp[0], enp[0], spacing), dims="z") y_new = DataArray(np.linspace(stp[1], enp[1], spacing), dims="z") r_section = d_x.interp(lon=x_new, lat=y_new) h_section = d_h.interp(lon=x_new, lat=y_new) r_sec.append(r_section) h_sec.append(h_section) r = np.asarray(r_sec) h = np.asarray(h_sec) x = np.linspace(0, 1, spacing) * np.ones(r.shape[0])[:, np.newaxis] ret = Dataset({ self.dtype: DataArray(r, dims=["distance", "tilt"]), "y_cor": DataArray(h, dims=["distance", "tilt"]), "x_cor": DataArray(x, dims=["distance", "tilt"]), }) r_attr = self.attrs.copy() del r_attr["elevation"], r_attr["tangential_reso"], r_attr["range"] r_attr["start_lon"] = stp[0] r_attr["start_lat"] = stp[1] r_attr["end_lon"] = enp[0] r_attr["end_lat"] = enp[1] ret.attrs = r_attr return ret
def quick_cr(r_list: Volume_T, resolution: tuple = (1000, 1000)) -> Dataset: r""" Calculate composite reflectivity Paramters --------- r_list: list of xarray.Dataset Returns ------- ret: xarray.Dataset composite reflectivity """ r_data = list() for i in r_list: r, x, y = grid_2d( i["REF"].values, i["longitude"].values, i["latitude"].values, resolution=resolution, ) r_data.append(r) cr = np.nanmax(r_data, axis=0) ret = Dataset( {"CR": DataArray(cr, coords=[x, y], dims=["longitude", "latitude"])}) ret.attrs = i.attrs ret.attrs["elevation"] = 0 return ret
def quick_et(r_list: Volume_T) -> Dataset: r""" Calculate echo tops Paramters --------- r_list: list of xarray.Dataset Returns ------- ret: xarray.Dataset echo tops """ r_data, d, a, elev = _extract(r_list, "REF") i = r_list[0] et = echo_top(r_data.astype(np.double), d.astype(np.double), elev.astype(np.double), 0.0) azimuth = a[:, 0] distance = d[0] ret = Dataset({ "ET": DataArray( np.ma.masked_less(et, 2), coords=[azimuth, distance], dims=["azimuth", "distance"], ) }) ret.attrs = i.attrs ret.attrs["elevation"] = 0 lon, lat = get_coordinate(distance, azimuth, 0, i.site_longitude, i.site_latitude) ret["longitude"] = (["azimuth", "distance"], lon) ret["latitude"] = (["azimuth", "distance"], lat) return ret
def quick_vil(r_list: Volume_T) -> Dataset: r""" Calculate vertically integrated liquid Paramters --------- r_list: list of xarray.Dataset Returns ------- ret: xarray.Dataset vertically integrated liquid """ r_data, d, a, elev = _extract(r_list, "REF") i = r_list[0] vil = vert_integrated_liquid(r_data.astype(np.double), d.astype(np.double), elev.astype(np.double)) azimuth = a[:, 0] distance = d[0] ret = Dataset({ "VIL": DataArray( np.ma.masked_less(vil, 0.1), coords=[azimuth, distance], dims=["azimuth", "distance"], ) }) ret.attrs = i.attrs ret.attrs["elevation"] = 0 lon, lat = get_coordinate(distance, azimuth, 0, i.site_longitude, i.site_latitude) ret["longitude"] = (["azimuth", "distance"], lon) ret["latitude"] = (["azimuth", "distance"], lat) return ret
def xarr_to_netcdf(xarr: xr.Dataset, pth: str, fname: str, attrs: dict = None, idx: int = None): """ Takes in an xarray Dataset and pushes it to netcdf. For use with the output from combine_xarrs and/or _sequential_to_xarray Parameters ---------- xarr Dataset to save pth Path to the folder to contain the written netcdf file fname base file name for the netcdf file attrs optional attribution to store in the netcdf store idx optional file name index Returns ------- str path to the netcdf file """ if idx is not None: finalpth = os.path.join(pth, os.path.splitext(fname)[0] + '_{}.nc'.format(idx)) else: finalpth = os.path.join(pth, fname) if attrs is not None: xarr.attrs = attrs xarr.to_netcdf(path=finalpth, format='NETCDF4', engine='netcdf4') return finalpth
def _readrasterfile(filename, substring=None): # first try openning with xarray's open_dataset try: ds = open_dataset(filename) return ds # if that fails, try various VSI type files # this is done by first trying to get some information on the file except: infos = get_info(filename, substring, format='json') files = [] for info in infos: for f in info['files']: files.append(f) if len(files) > 1: _files = '\n'.join(files) warn('Found more than one raster file in container:\n' f'{_files}\n' 'Try provide a `substring` to refine your selection...') ds = Dataset() for j, f in enumerate(files): for i, band in enumerate(open_rasterio(f), 1): ds[f'band{j}_{i}'] = band ds.attrs = band.attrs return ds
def xarr_to_zarr(xarr: xr.Dataset, outputpth: str, attrs: dict = None): """ Takes in an xarray Dataset and pushes it to zarr store. Must be run once to generate new store. Successive runs append, see mode flag Parameters ---------- xarr xarray Dataset to write to zarr outputpth path to the zarr rootgroup folder to write attrs optional attribution to write to zarr Returns ------- str path to the zarr group """ # grpname = str(datetime.now().strftime('%H%M%S%f')) if attrs is not None: xarr.attrs = attrs if not os.path.exists(outputpth): xarr.to_zarr(outputpth, mode='w-', compute=False) else: sync = zarr.ProcessSynchronizer(outputpth + '.sync') xarr.to_zarr(outputpth, mode='a', synchronizer=sync, compute=False, append_dim='time') return outputpth
def __call__(self, step: Number_T) -> Dataset: r""" Args: step (int, float): Output grid spacing. Returns: xarray.Dataset: Merged grid data. """ x, y = self._process_grid(step, step) grid = self._map_points(x, y) grid = np.ma.masked_outside(grid, 0.1, 100) ret = Dataset({ self.dtype: DataArray(grid, coords=[y[:, 0], x[0]], dims=["latitude", "longitude"]) }) r_attr = self.attr # Keep this attribute temporarily waiting for future fix r_attr["tangential_reso"] = np.nan r_attr["elevation"] = 0 r_attr["site_name"] = "RADMAP" r_attr["site_code"] = "RADMAP" del ( r_attr["site_longitude"], r_attr["site_latitude"], r_attr["nyquist_vel"], ) ret.attrs = r_attr return ret
def quick_vil(r_list: Volume_T) -> Dataset: r"""Calculate vertically integrated liquid. This algorithm process data in polar coordinates, which avoids the loss of data. By default, this function calls low-level function `vert_integrated_liquid` in C-extension. If the C-extension is not available, the python version will be used instead but with much slower speed. Args: r_list (list(xarray.Dataset)): Reflectivity data. Returns: xarray.Dataset: vertically integrated liquid """ r_data, d, a, elev = _extract(r_list, "REF") i = r_list[0] vil = vert_integrated_liquid(r_data.astype(np.double), d.astype(np.double), elev.astype(np.double)) azimuth = a[:, 0] distance = d[0] ret = Dataset({ "VIL": DataArray( np.ma.masked_less(vil, 0.1), coords=[azimuth, distance], dims=["azimuth", "distance"], ) }) ret.attrs = i.attrs ret.attrs["elevation"] = 0 lon, lat = get_coordinate(distance, azimuth, 0, i.site_longitude, i.site_latitude) ret["longitude"] = (["azimuth", "distance"], lon) ret["latitude"] = (["azimuth", "distance"], lat) return ret
def from_tree(cls, tree, ctx): """ Converts basic types representing YAML trees into an 'xarray.Dataset'. Parameters ---------- tree : An instance of a basic Python type (possibly nested) that corresponds to a YAML subtree. ctx : An instance of the 'AsdfFile' object that is being constructed. Returns ------- xarray.Dataset : An instance of the 'xarray.Dataset' type. """ data_vars = {} for variable in tree["variables"]: data_vars[variable.name] = (variable.dimensions, variable.data) coords = {} for coordinate in tree["coordinates"]: coords[coordinate.name] = (coordinate.dimensions, coordinate.data) obj = Dataset(data_vars=data_vars, coords=coords) obj.attrs = tree["attributes"] return obj
def _post_process( self, data: xr.Dataset, scale: FoI_t = 1, cval: FoI_t = 1, mask_circle: bool = False, preserve_dtypes: bool = True, **_: Any, ) -> xr.Dataset: if scale != 1: attrs = data.attrs data = data.map( lambda arr: xr.DataArray( rescale(arr, scale=scale, preserve_range=True, order=1, multichannel=True).astype(arr.dtype), dims=arr.dims, ) ) data.attrs = {**attrs, Key.img.scale: scale} if mask_circle: if data.dims["y"] != data.dims["x"]: raise ValueError( f"Masking circle is only available for square crops, " f"found crop of shape `{(data.dims['y'], data.dims['x'])}`." ) c = data.x.shape[0] // 2 data = data.where((data.x - c) ** 2 + (data.y - c) ** 2 <= c ** 2, other=cval) data.attrs[Key.img.mask_circle] = True if preserve_dtypes: for key, arr in self.data.items(): data[key] = data[key].astype(arr.dtype, copy=False) return data
def initialize_manual_song_events( ds: xr.Dataset, from_segmentation: bool = False, force_overwrite: bool = False, new_manual_event_types=[ 'sine_manual', 'pulse_manual', 'vibration_manual', 'aggression_manual' ], new_manual_event_categories=['segment', 'event', 'event', 'event'] ) -> xr.Dataset: """[summary] Args: ds (xarray.Dataset): [description] from_segmentation (bool, optional): Init manual events from automatic events with same name. Otherwise they initialized as empty. If force_overwrite: will *ADD* existing manual events. otherwise: will *ADD* auto events to existing manual events. Defaults to False. force_overwrite (bool, optional): Overwrite existing manual events. Defaults to False. new_manual_event_types = ['sine_manual', 'pulse_manual', 'vibration_manual', 'aggression_manual'] new_manual_event_categories = ['segment', 'event', 'event', 'event'] Returns: xarray.Dataset: [description] """ # only add new ones if 'song_events' in ds: new_manual_event_categories = [ cat for evt, cat in zip(new_manual_event_types, new_manual_event_categories) if evt not in ds.song_events.event_types ] new_manual_event_types = [ evt for evt in new_manual_event_types if evt not in ds.song_events.event_types ] song_events_manual = None if 'song_events' not in ds or new_manual_event_types: new_manual_events = np.zeros( (ds.time.shape[0], len(new_manual_event_types)), dtype=np.bool) song_events_manual = xr.DataArray( data=new_manual_events, dims=['time', 'event_types'], coords={ 'time': ds.time, 'event_types': new_manual_event_types, 'event_categories': (('event_types'), new_manual_event_categories), 'nearest_frame': (('time'), ds.nearest_frame), }, attrs={ 'description': 'Event times as boolean arrays.', 'sampling_rate_Hz': ds.attrs['target_sampling_rate_Hz'], 'time_units': 'seconds', }) # if song_events_manual is not None 'song_events' in ds and not force_overwrite: if song_events_manual is not None: if not force_overwrite and 'song_events' in ds: combined = xr.concat([ds.song_events, song_events_manual], dim='event_types') else: combined = song_events_manual if 'song_events' in ds: ds.drop('song_events') new_ds = combined.to_dataset(name='song_events') attrs = ds.attrs # for some reason, attrs are not preserved during merge... ds = xr.merge((ds, new_ds)) ds.attrs = attrs if from_segmentation: for evt in new_manual_event_types: auto_key = evt.strip('_manual') if auto_key in ds.song_events.event_types: if force_overwrite: # overwrite manual events with the corresponding auto event ds.song_events.loc[:, evt] = ds.song_events.loc[:, auto_key] else: # add auto events to the corresponding manual event ds.song_events.loc[:, evt] = np.logical_or( (ds.song_events.loc[:, evt], ds.song_events.loc[:, auto_key])) return ds
def load_subset(tstart, tend, bbox=None, path='', concatenate=False, interpolated=False, adjusted=True, qc=True, mask_qcflags=[9], which_vars=['PRES', 'JULD', 'TEMP', 'PSAL']): """ Loads a time-latitude-longitude subset of the full MEOP dataset by searching for all tags that fall within the specified [tstart, tend] and [lon_west, lon_east, lat_south, lat_north] limits. """ ts, te = tstart, tend tstart = Timestamp(tstart).to_pydatetime() tend = Timestamp(tend).to_pydatetime() kwload = dict(varnames=which_vars, adjusted=adjusted, qc=qc, mask_qcflags=mask_qcflags) if interpolated: intrp = '_interp' else: intrp = "" DS = None cdirs = [d.rstrip('/') for d in glob(path + '/*/')] # Get all country data directories. ntags = 0 for cdir in cdirs: fglob = cdir + '/DATA_ncARGO%s/*.nc' % intrp fnames = glob(fglob) fnames.sort() for fname in fnames: ds = open_dataset(fname) try: t = np.array([ Timestamp(tn).to_pydatetime() for tn in ds['JULD'].values ]) except TypeError: t = np.array([ Timestamp(tn.year, tn.month, tn.day, tn.hour, tn.minute, tn.second).to_pydatetime() for tn in ds['JULD'].values ]) in_time = np.logical_and(t > tstart, t < tend) if in_time.any(): # Subset of tag is in desired time. if bbox is not None: # Subset of tag is in desired lat/lon bounding box. lon = ds['LONGITUDE'].values lat = ds['LATITUDE'].values in_lon = np.logical_and(lon >= bbox[0], lon <= bbox[1]) in_lat = np.logical_and(lat >= bbox[2], lat <= bbox[3]) in_bbox = np.logical_and(in_lon, in_lat) else: in_bbox = np.array([True] * in_time.size) if in_bbox.any(): ntags += 1 c1 = fname.split('/') c1, c2 = c1[-1], c1[-3] print("Loading tag " + c1 + ' (' + c2 + ')') ds = strip_profile(ds, **kwload) # Subset data points in the tag that fall within the wanted time and bbox. dsattrs = ds.attrs inxyt = np.logical_and(in_time, in_bbox) dsvars = dict() for wvar in ds.data_vars.keys(): if ds[wvar].values.ndim == 2: dsnew = ds[wvar].values[inxyt, :] dsnew = Variable(('t', 'z'), dsnew) elif ds[wvar].values.ndim == 1: dsnew = ds[wvar].values[inxyt] dsnew = Variable(('t'), dsnew) dsvars.update({wvar: dsnew}) try: pp = ds['PRES_ADJUSTED'].values[inxyt, :] except KeyError: pp = ds['PRES'].values[inxyt, :] # if interpolated: coords = dict(t=t[inxyt], p=(('t', 'z'), pp)) # else: # coords = dict(t=t[inxyt]) # dsvars.update({'p':pp}) ds = Dataset(data_vars=dsvars, coords=coords, attrs=dsattrs) if concatenate: # Concatenate all matching tags in a single section. if DS is None: DS = ds else: if interpolated: DS = concat((DS, ds), dim='t') else: # FIXME. Implement concatenation for non-interpolated data. But maybe it is not a good strategy. raise NotImplementedError else: # Add tag as a dictionary entry. tag = fname.split('/')[-1].split('_')[0] ds.attrs = dsattrs if DS is None: DS = {tag: ds} else: DS.update({tag: ds}) print("") if bbox is None: print("Found %d tags between %s and %s." % (ntags, ts, te)) else: print( "Found %d tags between %s and %s in bbox [%.1f, %.1f, %.1f, %.1f]." % (ntags, ts, te, bbox[0], bbox[1], bbox[2], bbox[3])) return DS
def pet_bygrid(clm_ds: xr.Dataset) -> xr.Dataset: """Compute Potential EvapoTranspiration using Daymet dataset. The method is based on `FAO 56 paper <http://www.fao.org/docrep/X0490E/X0490E00.htm>`__. The following variables are required: tmin (deg c), tmax (deg c), lat, lon, vp (Pa), srad (W/m2), dayl (s/day) The computed PET's unit is mm/day. Parameters ---------- clm_ds : xarray.DataArray The dataset should include the following variables: ``tmin``, ``tmax``, ``lat``, ``lon``, ``vp``, ``srad``, ``dayl`` Returns ------- xarray.DataArray The input dataset with an additional variable called ``pet``. """ keys = list(clm_ds.keys()) reqs = ["tmin", "tmax", "lat", "lon", "vp", "srad", "dayl"] _check_requirements(reqs, keys) dtype = clm_ds.tmin.dtype dates = clm_ds["time"] clm_ds["tmean"] = 0.5 * (clm_ds["tmax"] + clm_ds["tmin"]) clm_ds["tmean"].attrs["units"] = "degree C" clm_ds["delta_r"] = (4098 * (0.6108 * np.exp(17.27 * clm_ds["tmean"] / (clm_ds["tmean"] + 237.3))) / ((clm_ds["tmean"] + 237.3)**2)) gridxy = (clm_ds.x.values, clm_ds.y.values) res = clm_ds.res[0] * 1000 elev = py3dep.elevation_bygrid(gridxy, clm_ds.crs, res) attrs = clm_ds.attrs clm_ds = xr.merge([clm_ds, elev]) clm_ds.attrs = attrs clm_ds["elevation"] = clm_ds.elevation.where( ~np.isnan(clm_ds.isel(time=0)[keys[0]]), drop=True) pa = 101.3 * ((293.0 - 0.0065 * clm_ds["elevation"]) / 293.0)**5.26 clm_ds["gamma"] = pa * 0.665e-3 rho_s = 0.0 # recommended for daily data clm_ds["vp"] *= 1e-3 e_max = 0.6108 * np.exp(17.27 * clm_ds["tmax"] / (clm_ds["tmax"] + 237.3)) e_min = 0.6108 * np.exp(17.27 * clm_ds["tmin"] / (clm_ds["tmin"] + 237.3)) e_s = (e_max + e_min) * 0.5 clm_ds["e_def"] = e_s - clm_ds["vp"] u_2m = 2.0 # recommended when no wind data is available lat = clm_ds.isel(time=0).lat clm_ds["time"] = pd.to_datetime( clm_ds.time.values).dayofyear.astype(dtype) r_surf = clm_ds["srad"] * clm_ds["dayl"] * 1e-6 alb = 0.23 jp = 2.0 * np.pi * clm_ds["time"] / 365.0 d_r = 1.0 + 0.033 * np.cos(jp) delta_r = 0.409 * np.sin(jp - 1.39) phi = lat * np.pi / 180.0 w_s = np.arccos(-np.tan(phi) * np.tan(delta_r)) r_aero = (24.0 * 60.0 / np.pi * 0.082 * d_r * (w_s * np.sin(phi) * np.sin(delta_r) + np.cos(phi) * np.cos(delta_r) * np.sin(w_s))) rad_s = (0.75 + 2e-5 * clm_ds["elevation"]) * r_aero rad_ns = (1.0 - alb) * r_surf rad_nl = (4.903e-9 * (((clm_ds["tmax"] + 273.16)**4 + (clm_ds["tmin"] + 273.16)**4) * 0.5) * (0.34 - 0.14 * np.sqrt(clm_ds["vp"])) * ((1.35 * r_surf / rad_s) - 0.35)) clm_ds["rad_n"] = rad_ns - rad_nl clm_ds["pet"] = (0.408 * clm_ds["delta_r"] * (clm_ds["rad_n"] - rho_s) + clm_ds["gamma"] * 900.0 / (clm_ds["tmean"] + 273.0) * u_2m * clm_ds["e_def"] ) / (clm_ds["delta_r"] + clm_ds["gamma"] * (1 + 0.34 * u_2m)) clm_ds["pet"].attrs["units"] = "mm/day" clm_ds["time"] = dates clm_ds["vp"] *= 1.0e3 clm_ds = clm_ds.drop_vars( ["delta_r", "gamma", "e_def", "rad_n", "tmean"]) return clm_ds
def plot_bootstrapped_skill_over_leadyear( bootstrapped: xr.Dataset, ax: Optional["plt.Axes"] = None, color_initialized: str = "indianred", color_uninitialized: str = "steelblue", color_persistence: str = "gray", color_climatology: str = "tan", capsize: Union[int, float] = 4, fontsize: Union[int, float] = 8, figsize: Tuple = (10, 4), fmt: str = "--o", ) -> "plt.Axes": """ Plot Ensemble Prediction skill as in Li et al. 2016 Fig.3a-c. Args: bootstrapped (xr.DataArray or xr.Dataset with one variable): from PredictionEnsembleEnsemble.bootstrap() or HindcastEnsemble.bootstrap() ax ("plt.Axes"): plot on ax. Defaults to None. Returns: ax Reference: * Li, Hongmei, Tatiana Ilyina, Wolfgang A. Müller, and Frank Sienz. “Decadal Predictions of the North Atlantic CO2 Uptake.” Nature Communications 7 (March 30, 2016): 11076. https://doi.org/10/f8wkrs. """ if isinstance(bootstrapped, xr.Dataset): var = list(bootstrapped.data_vars) if len(var) > 1: raise ValueError( "Please provide only xr.Dataset with one variable or xr.DataArray." ) # copy attributes to xr.DataArray elif len(var) == 1: var = var[0] attrs = bootstrapped.attrs bootstrapped = bootstrapped[var] bootstrapped.attrs = attrs assert isinstance(bootstrapped, xr.DataArray) reference = list( bootstrapped.drop_sel(skill="initialized").coords["skill"].values) sig = bootstrapped.attrs["confidence_interval_levels"].split("-") sig = int(100 * (float(sig[0]) - float(sig[1]))) pers_sig = sig init_skill = bootstrapped.sel(skill="initialized", results="verify skill") init_ci = bootstrapped.sel(skill="initialized", results=["low_ci", "high_ci" ]).rename({"results": "quantile"}) if pers_sig != sig: raise NotImplementedError("pers_sig != sig not implemented yet.") if ax is None: _, ax = plt.subplots(figsize=figsize) # plot init ax.errorbar( init_skill.lead, init_skill, yerr=[ init_skill - init_ci.isel(quantile=0), init_ci.isel(quantile=1) - init_skill, ], fmt=fmt, capsize=capsize, c=color_initialized, label="initialized", ) # plot references for r in reference: r_skill = bootstrapped.sel(skill=r, results="verify skill") if (r_skill == np.nan).all(): warnings.warn(f"Found only NaNs in {r} verify skill and skipped.") continue p_r_over_init = bootstrapped.sel(skill=r, results="p") r_ci = bootstrapped.sel(skill=r, results=["low_ci", "high_ci" ]).rename({"results": "quantile"}) c = eval(f"color_{r}") # add p values over all reference skills for t in init_skill.lead.values: ax.text( r_skill.lead.sel(lead=t), r_ci.isel(quantile=0).sel(lead=t).values, "%.2f" % float(p_r_over_init.sel(lead=t).values), horizontalalignment="center", verticalalignment="bottom", fontsize=fontsize, color=c, ) yerr = [ r_skill - r_ci.isel(quantile=0), r_ci.isel(quantile=1) - r_skill, ] x = r_skill.lead ax.errorbar( x, r_skill, yerr=yerr, fmt=fmt, capsize=capsize, c=c, label=r, ) ax.xaxis.set_ticks(bootstrapped.lead.values) ax.legend(frameon=False, title=f"skill with {sig}% confidence interval:") ax.set_xlabel(f"Lead time [{bootstrapped.lead.attrs['units']}]") ax.set_ylabel( get_metric_class(bootstrapped.attrs["metric"], ALL_METRICS).long_name) return ax