def consolidate_results(data_hpo, data_replicates, rep_types): new_data = dict() for hpo, hpo_datas in data_replicates.items(): ideal_datas = data_hpo[hpo] hpo_namespaces = sorted(ideal_datas.keys()) ideal_data = [ideal_datas[namespace] for namespace in hpo_namespaces] new_data[hpo] = dict(ideal=xarray.combine_by_coords(ideal_data)) new_data[hpo]['ideal'].coords['namespace'] = ('seed', hpo_namespaces) for replication_type in rep_types: hpo_namespaces = sorted(hpo_datas.keys()) replicates_data = [ hpo_datas[hpo_namespace][replication_type] for hpo_namespace in hpo_namespaces ] new_data[hpo][replication_type] = xarray.combine_by_coords( replicates_data) replicate_namespaces = [ env(hpo_namespace, replication_type) for hpo_namespace in hpo_namespaces ] new_data[hpo][replication_type].coords['namespace'] = ( 'seed', replicate_namespaces) return new_data
def test_combine_by_coords_combine_attrs_variables( self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): """check that combine_attrs is used on data variables and coords""" data1 = Dataset( {"x": ("a", [0], attrs1), "y": ("a", [0], attrs1), "a": ("a", [0], attrs1)} ) data2 = Dataset( {"x": ("a", [1], attrs2), "y": ("a", [1], attrs2), "a": ("a", [1], attrs2)} ) if expect_exception: with pytest.raises(MergeError, match="combine_attrs"): combine_by_coords([data1, data2], combine_attrs=combine_attrs) else: actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) expected = Dataset( { "x": ("a", [0, 1], expected_attrs), "y": ("a", [0, 1], expected_attrs), "a": ("a", [0, 1], expected_attrs), } ) assert_identical(actual, expected)
def test_check_for_impossible_ordering(self): ds0 = Dataset({"x": [0, 1, 5]}) ds1 = Dataset({"x": [2, 3]}) with raises_regex( ValueError, "does not have monotonic global indexes" " along dimension x"): combine_by_coords([ds1, ds0])
def test_combine_by_coords_no_concat(self): objs = [Dataset({'x': 0}), Dataset({'y': 1})] actual = combine_by_coords(objs) expected = Dataset({'x': 0, 'y': 1}) assert_identical(expected, actual) objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})] actual = combine_by_coords(objs) expected = Dataset({'x': 0, 'y': 1, 'z': 2}) assert_identical(expected, actual)
def test_combine_by_coords_raises_for_differing_types(): # str and byte cannot be compared da_1 = DataArray([0], dims=["time"], coords=[["a"]], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[[b"b"]], name="a").to_dataset() with pytest.raises( TypeError, match=r"Cannot combine along dimension 'time' with mixed types." ): combine_by_coords([da_1, da_2])
def test_combine_coords_mixed_datasets_arrays(self): objs = [ DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), Dataset({"x": [2, 3]}), ] with pytest.raises( ValueError, match=r"Can't automatically combine datasets with unnamed arrays.", ): combine_by_coords(objs)
def test_combine_by_coords_no_concat(self): objs = [Dataset({"x": 0}), Dataset({"y": 1})] actual = combine_by_coords(objs) expected = Dataset({"x": 0, "y": 1}) assert_identical(expected, actual) objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})] actual = combine_by_coords(objs) expected = Dataset({"x": 0, "y": 1, "z": 2}) assert_identical(expected, actual)
def test_combine_by_coords_still_fails(self): # concat can't handle new variables (yet): # https://github.com/pydata/xarray/issues/508 datasets = [ Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, { "y": 1, "z": 1 }) ] with pytest.raises(ValueError): combine_by_coords(datasets, "y")
def _compute_threshold_grid(percentile, yearrange_ref, input_dir, gh_model, cl_model, scenario, soc, fn_str_var, bbox, yearchunks, mask_threshold=None, keep_dis_data=False): """given model run and year range specification, this function returns the x-th percentile for every pixel over a given time horizon (based on daily data) [all-year round percentiles!], as well as the mean at each grid cell. Parameters: c.f. parameters in LowFlow.set_from_nc() Optional parameters: mask_threshold (tuple or list), Threshold(s) of below which the grid is masked out. e.g. ('mean', 1.) Returns: p_grid (xarray): grid with dis of given percentile (1-timestep) mean_grid (xarray): grid with mean(dis) """ LOGGER.info('Computing threshold value per grid cell for Q%i, %i-%i', percentile, yearrange_ref[0], yearrange_ref[1]) if isinstance(mask_threshold, tuple): mask_threshold = [mask_threshold] bbox = _split_bbox(bbox) p_grid = [] mean_grid = [] # loop over coordinate bounding boxes to save memory: for box in bbox: dis_xarray = _read_and_combine_nc(yearrange_ref, input_dir, gh_model, cl_model, scenario, soc, fn_str_var, box, yearchunks) if dis_xarray.dis.data.size: # only if data is not empty p_grid += [_xarray_reduce(dis_xarray, fun='p', percentile=percentile)] # only compute mean_grid if required by user or mask_threshold: if keep_dis_data or (mask_threshold and True in ['mean' in x for x in mask_threshold]): mean_grid += [_xarray_reduce(dis_xarray, fun='mean')] del dis_xarray p_grid = xr.combine_by_coords(p_grid) if mean_grid: mean_grid = xr.combine_by_coords(mean_grid) if isinstance(mask_threshold, list): for crit in mask_threshold: if 'mean' in crit[0]: p_grid.dis.values[mean_grid.dis.values < crit[1]] = 0 mean_grid.dis.values[mean_grid.dis.values < crit[1]] = 0 if 'percentile' in crit[0]: p_grid.dis.values[p_grid.dis.values < crit[1]] = 0 mean_grid.dis.values[p_grid.dis.values < crit[1]] = 0 if keep_dis_data: return p_grid, mean_grid return p_grid, None
def test_combine_by_coords_incomplete_hypercube(self): # test that this succeeds with default fill_value x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) actual = combine_by_coords([x1, x2, x3]) expected = Dataset( {"a": (("y", "x"), [[1, 1], [1, np.nan]])}, coords={"y": [0, 1], "x": [0, 1]}, ) assert_identical(expected, actual) # test that this fails if fill_value is None with pytest.raises(ValueError): combine_by_coords([x1, x2, x3], fill_value=None)
def test_combine_by_coords_all_unnamed_dataarrays(self): unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") actual = combine_by_coords([unnamed_array]) expected = unnamed_array assert_identical(expected, actual) unnamed_array1 = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") unnamed_array2 = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") actual = combine_by_coords([unnamed_array1, unnamed_array2]) expected = DataArray( data=[1.0, 2.0, 3.0, 4.0], coords={"x": [0, 1, 2, 3]}, dims="x" ) assert_identical(expected, actual)
def fetch_results(client, namespace, configs, medians, params, defaults): variables = list(sorted(configs.keys())) metrics = fetch_all_metrics(client, namespace, variables) epoch = defaults.get('epoch', 1) arrays = [] trial_stats = fetch_vars_stats(client, namespace) if remaining(trial_stats): raise RuntimeError('Not all trials are completed') for variable in variables: trials = create_trials(configs[variable], params, metrics) variables_except_reference = [v for v in variables if v != 'reference'] arrays.append( create_valid_curves_xarray(trials, metrics, variables_except_reference, epoch, list(sorted(params.keys())), variable)) data = xarray.combine_by_coords(arrays) data.attrs['medians'] = medians data.coords['namespaces'] = (('seed', ), [ env(namespace, v) for v in sorted(configs.keys()) ]) return data
def test_combine_by_coords_mixed_unnamed_dataarrays(self): named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") unnamed_da = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") with pytest.raises( ValueError, match="Can't automatically combine unnamed DataArrays with" ): combine_by_coords([named_da, unnamed_da]) da = DataArray([0, 1], dims="x", coords=({"x": [0, 1]})) ds = Dataset({"x": [2, 3]}) with pytest.raises( ValueError, match="Can't automatically combine unnamed DataArrays with", ): combine_by_coords([da, ds])
def _record(self): """Method to get data from ANNarchy.Monitor instances, and merge and store them to the _data buffer of xarray.DataArray type.""" for monitor, population in self.monitors.items(): data = monitor.get() variables = list(data.keys()) data = np.array(list(data.values())) if data.size > 0: data = data.transpose((1, 0, 2)) data = DataArray(data, dims=["Time", "Variable", "Neuron"], coords={ "Time": self._compute_times( monitor.times(), data.shape[0]), "Variable": variables, "Neuron": self._get_senders(population, population.ranks) }, name=self.label) if self._data.size: self._data = combine_by_coords([self._data, data], fill_value=np.nan) else: self._data = data
def open_and_combine_lat_lon_data(folder, tiles=None): """ Load lat lon data stored as 10x10 degree tiles in folder If tiles is none, load all data available If no file is available, return None """ fs = GCSFileSystem(cache_timeout=0) if not tiles: tiles = [ os.path.splitext(os.path.split(path)[-1])[0] for path in fs.ls(folder) if not path.endswith('/') ] uris = [f'{folder}{tile}.zarr' for tile in tiles] ds_list = [] for uri in uris: if fs.exists(uri): da = open_zarr_file(uri) if da.lat[0] > da.lat[-1]: da = da.reindex(lat=da.lat[::-1]) if da.lon[0] > da.lon[-1]: da = da.reindex(lat=da.lon[::-1]) ds_list.append(da) if len(ds_list) > 0: ds = xr.combine_by_coords(ds_list, combine_attrs="drop_conflicts").chunk({ 'lat': 2000, 'lon': 2000 }) return ds # print(f'No data available at {folder} for tiles {tiles}') return None
def try_to_open_grib_file(path: str, ) -> xr.Dataset: """Try a few different ways to open up a grib file. Parameters ---------- path : str Path pointing to location of grib file Returns ------- ds : xr.Dataset The xarray Dataset that contains information from the grib file. """ try: ds = xr.open_dataset(path, engine="cfgrib") except Exception as e: try: import cfgrib ds = cfgrib.open_datasets(path) ds = xr.combine_by_coords(ds) except: logger.error(f"Oh no! There was a problem opening up {path}: {e}") return return ds
def test_combine_by_coords_raises_for_differing_calendars(): # previously failed with uninformative StopIteration instead of TypeError # https://github.com/pydata/xarray/issues/4495 import cftime time_1 = [cftime.DatetimeGregorian(2000, 1, 1)] time_2 = [cftime.DatetimeProlepticGregorian(2001, 1, 1)] da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset() with raises_regex(TypeError, r"cannot compare .* \(different calendars\)"): combine_by_coords([da_1, da_2])
def merge_datacubes(ds_merge): ''' Merges datacubes by coordinates Parameters: ds_merge (xArray Dataset[]): Array of datasets to be merged Returns: ds1 (xArray Dataset): A single datacube with all merged datacubes ''' start = datetime.now() if len(ds_merge) == 0: print("Error: No datacubes to merge") return if len(ds_merge) == 1: return ds_merge[0] else: print('Start merging') ds1 = ds_merge[0] count = 1 while count < len(ds_merge): start1 = datetime.now() ds1 = xr.combine_by_coords([ds1, ds_merge[count]]) count += 1 diff = datetime.now() - start1 print("Succesfully merged cube nr " + str(count) + " to the base cube in " + str(diff.seconds) + 's') diff = datetime.now() - start print('All cubes merged for ' + str(diff.seconds) + 's') return ds1
def _setup(self): """ 1. Concatenate a "modern" time-series, for 1950-2100 """ print("Setting up the analysis...") # Modern time-series self.modern = xr.combine_by_coords([ self.hist.to_dataset(name=self.variable_id), self.fut.to_dataset(name=self.variable_id) ])[self.variable_id] # Sub-select data for the "modern" timeseries for 1950-2100 # TODO: user-configuration for this parameter self.modern = self.modern.sel(year=slice(1950, 2100)) # Center the pi data around its mean self.pi = self.pi - self.pi.mean('year') # Fit anomaly calculator and then compute anomalies self.baseline_anomalizer = BaselineAnomalizer('year', (1980, 2010)) self.baseline_anomalizer.fit(self.modern) self.modern_anom = self.baseline_anomalizer.transform(self.modern) # Global averages x = self.pi.isel(year=0) _area = area_grid(x['lon'].data, x['lat'].data, asarray=False) # We eagerly did the area grid calculation in memory, so let's # turn it into a dask array now (inside a DataArray) _area = _area.chunk() self.area = _area self.modern_anom_gavg = global_avg(self.modern_anom, weights=self.area)
def batch_load(obj, factor=2): """ Load xarray object values by calling compute on block subsets (that are an integral multiple of chunks along each chunked dimension) Parameters ---------- obj: xarray object factor: int multiple of chunksize to load at a single time. Passed on to split_blocks """ if isinstance(obj, xr.DataArray): dataset = obj._to_temp_dataset() else: dataset = obj # result = xr.full_like(obj, np.nan).load() computed = [] for label, chunk in split_blocks(dataset, factor=factor): print(f"computing {label}") computed.append(chunk.compute()) result = xr.combine_by_coords(computed) if isinstance(obj, xr.DataArray): result = obj._from_temp_dataset(result) return result
def combine(self, cleanup=False): """Create volume data (excluding surface data) by combining lat/lon coordinates across all datasets. Tested for data on a regular grid. Notes: - This has a _very_ large memory overhead, i.e., need enough memory to store and manipulate all of the tower data simultaneously, otherwise it may hang. - xarray.combine_by_coords fails with a cryptic "the supplied objects do not form a hypercube" message if the lat/lon values do not form a regular grid """ datalist = [data for key, data in self.data.items()] self.ds = xr.combine_by_coords(datalist) if cleanup is True: import gc # garbage collector try: del self.data except AttributeError: pass else: if self.verbose: print('Cleared data dict from memory') finally: gc.collect() return self.ds
def test_combine_by_coords_all_dataarrays_with_the_same_name(self): named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") named_da2 = DataArray(name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") actual = combine_by_coords([named_da1, named_da2]) expected = merge([named_da1, named_da2]) assert_identical(expected, actual)
def test_combine_by_coords(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1]}) assert_identical(expected, actual) actual = combine_by_coords([actual]) assert_identical(expected, actual) objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) # ensure auto_combine handles non-sorted variables objs = [ Dataset({ "x": ("a", [0]), "y": ("a", [0]), "a": [0] }), Dataset({ "x": ("a", [1]), "y": ("a", [1]), "a": [1] }), ] actual = combine_by_coords(objs) expected = Dataset({ "x": ("a", [0, 1]), "y": ("a", [0, 1]), "a": [0, 1] }) assert_identical(expected, actual) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1], "y": [0, 1]}) assert_equal(actual, expected) objs = [Dataset({"x": 0}), Dataset({"x": 1})] with pytest.raises(ValueError, match=r"Could not find any dimension coordinates"): combine_by_coords(objs) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] with pytest.raises(ValueError, match=r"Every dimension needs a coordinate"): combine_by_coords(objs) def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([]))
def get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining GFS 0.50 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) base_url = 'https://www.ncei.noaa.gov/thredds/model-gfs-g4-anl-files-old/' CheckConnection.set_url('ncei.noaa.gov') x_arr_list = [] start_date = datetime(date_lo.year, date_lo.month, date_lo.day) - timedelta(days=1) for day in range((date_hi - start_date).days + 1): dt = datetime(start_date.year, start_date.month, start_date.day) + timedelta(days=day) catalog = TDSCatalog( '%s%s%.2d/%s%.2d%.2d/catalog.xml' % (base_url, dt.year, dt.month, dt.year, dt.month, dt.day)) for hour in [3, 6]: for cycle in [0, 6, 12, 18]: attempts = 0 while True: try: attempts += 1 name = 'gfsanl_4_%s%.2d%.2d_%.2d00_00%s.grb2' % ( dt.year, dt.month, dt.day, cycle, hour) if name in list(catalog.datasets): ds_subset = catalog.datasets[name].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_50_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) else: logger.warning('dataset %s is not found' % name) break except Exception as e: logger.error(traceback.format_exc()) CheckConnection.is_online() logger.error(e) logger.error( 'Filename %s - Failed connecting to GFS Server - number of attempts: %d' % (name, attempts)) time.sleep(2) dataset = xr.combine_by_coords(x_arr_list).squeeze() lon_points = ((lon_points + 180) % 360) + 180 res = dataset.interp(lon=lon_points, lat=lat_points, time=time_points).to_dataframe()[GFS_50_VAR_LIST] res[[ 'Wind_speed_gust_surface', 'Dewpoint_temperature_height_above_ground' ]] = [[np.nan, np.nan]] * len(res) return res
def forward_propagation(filenames, total_data, nscans, min_peak, nlevel, avg_area, cgridx, cgridy): if filenames[-1] == filenames[nscans[-1]]: return total_data, 0, cgridx, cgridy, nscans else: future_cgridy = [] future_cgridx = [] peak_ref = [] cgridxf = cgridx[-1] cgridyf = cgridy[-1] for future_i in np.arange(len(filenames) - nscans[-1]): data = xr.open_dataset(filenames[nscans[-1] + future_i]) peak = np.nanmax( data['reflectivity'].values[0, nlevel, cgridyf - avg_area:cgridyf + avg_area, cgridxf - avg_area:cgridxf + avg_area]) if peak < min_peak or math.isnan(peak): future_scan = future_i break peak_ref.append(peak) future_scan = future_i + 1 future_cgridy.append( np.where(data['reflectivity'].values[0, nlevel, :, :] == peak_ref[future_i])[0][0]) future_cgridx.append( np.where(data['reflectivity'].values[0, nlevel, :, :] == peak_ref[future_i])[1][0]) # update center cgridxf = future_cgridx[future_i] cgridyf = future_cgridy[future_i] del (data) fw_nscans = np.zeros(len(nscans) + future_scan) fw_cgridy, fw_cgridx = np.zeros(len(nscans) + future_scan), np.zeros( len(nscans) + future_scan) for idx in np.arange(future_scan): fw_nscans[len(nscans) + idx] = nscans[-1] + idx + 1 fw_cgridy[len(nscans) + idx] = future_cgridy[idx] fw_cgridx[len(nscans) + idx] = future_cgridx[idx] fw_nscans[:len(nscans)] = nscans fw_cgridy[:len(nscans)] = cgridy fw_cgridx[:len(nscans)] = cgridx fw_nscans = fw_nscans.astype(int) fw_cgridx = fw_cgridx.astype(int) fw_cgridy = fw_cgridy.astype(int) if future_cgridx: for i in np.arange(1, future_scan): if len(filenames) >= nscans[-1] + i: data = xr.open_dataset(filenames[nscans[-1] + i]) total_data = xr.combine_by_coords([total_data, data]) del (data) return total_data, future_i, fw_cgridx, fw_cgridy, fw_nscans
def open_fastoutput(datapath="BOUT.fast.*.nc"): """ Opens fast output data and combines into a single dataset. """ # Get list of all files filepaths, filetype = _expand_filepaths(datapath) # Iterate over all files, extracting DataArrays ready for combining fo_data = [] for i, filepath in enumerate(filepaths): fo = xr.open_dataset(filepath) if i == 0: # Get time coordinate from first file time = fo["time"] # Time is global, and we already extracted it fo = fo.drop_vars("time", errors="ignore") # There might be no virtual probe in this region if len(fo.data_vars) > 0: for name, da in fo.items(): # Save the physical position (in index units) da = da.expand_dims(x=1, y=1, z=1) da = da.assign_coords( x=xr.DataArray([da.attrs["ix"]], dims=["x"]), y=xr.DataArray([da.attrs["iy"]], dims=["y"]), z=xr.DataArray([da.attrs["iz"]], dims=["z"]), ) # Re-attach the time coordinate da = da.assign_coords(time=time) # We saved the position, so don't care what number the variable was # Only need it's name (i.e. n, T, etc.) regex = re.compile(r"(\D+)([0-9]+)") match = regex.match(name) if match is None: raise ValueError( f"Regex could not parse the variable named {name}") var, num = match.groups() da.name = var # Must promote DataArrays to Datasets until we require xarray-0.19.0 # where xarray GH #3248 is fixed ds = xr.Dataset({var: da}) fo_data.append(ds) fo.close() # This will merge different variables, and arrange by physical position full_fo = xr.combine_by_coords(fo_data, combine_attrs="drop_conflicts") return full_fo
def test_combine_coords_mixed_datasets_named_dataarrays(self): da = DataArray(name="a", data=[4, 5], dims="x", coords=({"x": [0, 1]})) ds = Dataset({"b": ("x", [2, 3])}) actual = combine_by_coords([da, ds]) expected = Dataset( {"a": ("x", [4, 5]), "b": ("x", [2, 3])}, coords={"x": ("x", [0, 1])} ) assert_identical(expected, actual)
def test_combine_by_coords(self): objs = [Dataset({'x': [0]}), Dataset({'x': [1]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1]}) assert_identical(expected, actual) actual = combine_by_coords([actual]) assert_identical(expected, actual) objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1, 2]}) assert_identical(expected, actual) # ensure auto_combine handles non-sorted variables objs = [ Dataset({ 'x': ('a', [0]), 'y': ('a', [0]), 'a': [0] }), Dataset({ 'x': ('a', [1]), 'y': ('a', [1]), 'a': [1] }) ] actual = combine_by_coords(objs) expected = Dataset({ 'x': ('a', [0, 1]), 'y': ('a', [0, 1]), 'a': [0, 1] }) assert_identical(expected, actual) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1], 'y': [0, 1]}) assert_equal(actual, expected) objs = [Dataset({'x': 0}), Dataset({'x': 1})] with raises_regex(ValueError, 'Could not find any dimension ' 'coordinates'): combine_by_coords(objs) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})] with raises_regex(ValueError, 'Every dimension needs a coordinate'): combine_by_coords(objs) def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([]))
def Generate_HIST_Covariates(self): ''' Load, fix, and resample (hourly) all historical covariates: AWTs, DWTs, MJO, MMSL, AT ''' # load data AWT = self.Load_SST_KMA() # bmus + 1 MSL = self.Load_TIDE_hist_mmsl() # mmsl (mm) MJO = self.Load_MJO_hist() DWT = self.Load_ESTELA_KMA() # bmus + 1 ATD_h = self.Load_TIDE_hist_astro() # fix WTs id format AWT = xr.Dataset({'bmus': AWT.bmus + 1}, coords = {'time': AWT.time}) DWT = xr.Dataset({'bmus': (('time',), DWT.sorted_bmus_storms + 1)}, coords = {'time': DWT.time.values[:]}) # get MJO categories mjo_cs, _ = MJO_Categories(MJO['rmm1'], MJO['rmm2'], MJO['phase']) MJO['bmus'] = (('time',), mjo_cs) # reindex data to hourly (pad) AWT_h = fast_reindex_hourly(AWT) MSL_h = MSL.resample(time='1h').pad() MJO_h = fast_reindex_hourly(MJO) DWT_h = fast_reindex_hourly(DWT) # generate time envelope for output d1, d2 = xds_further_dates( [AWT_h, ATD_h, MSL_h, MJO_h, DWT_h, ATD_h] ) ten = pd.date_range(d1, d2, freq='H') # generate empty output dataset OUT_h = xr.Dataset(coords={'time': ten}) # prepare data AWT_h = AWT_h.rename({'bmus':'AWT'}) MJO_h = MJO_h.drop_vars(['mjo','rmm1','rmm2','phase']).rename({'bmus':'MJO'}) MSL_h = MSL_h.drop_vars(['mmsl_median']).rename({'mmsl':'MMSL'}) MSL_h['MMSL'] = MSL_h['MMSL'] / 1000.0 # mm to m DWT_h = DWT_h.rename({'bmus':'DWT'}) # TODO: revisar esto ATD_h = ATD_h.drop_vars(['WaterLevels','Residual']).rename({'Predicted': 'AT'}) #ATD_h = ATD_h.drop_vars(['observed','ntr','sigma']).rename({'predicted':'AT'}) # combine data xds = xr.combine_by_coords( [OUT_h, AWT_h, MJO_h, MSL_h, DWT_h, ATD_h], fill_value = np.nan, ) # repair times: round to hour and remove duplicates (if any) xds = repair_times_hourly(xds) return xds
def test_combine_by_coords_all_named_dataarrays(self): named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") actual = combine_by_coords([named_da]) expected = named_da.to_dataset() assert_identical(expected, actual) named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") named_da2 = DataArray(name="b", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") actual = combine_by_coords([named_da1, named_da2]) expected = Dataset( { "a": DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"), "b": DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"), } ) assert_identical(expected, actual)