def if_then_else(condition, val_if_true, val_if_false): # checking if parameters are xarrays con_xr = isinstance(condition, xr.DataArray) true_xr = isinstance(val_if_true, xr.DataArray) false_xr = isinstance(val_if_false, xr.DataArray) # short circuit if a scalar condition # made to avoid computation of other part # if not con_xr: # return val_if_true if condition else val_if_false # Todo improve this!!! if true_xr and false_xr and con_xr: val_if_true, val_if_false, condition = xr.align( val_if_true, val_if_false, condition) return val_if_true.where(condition, val_if_false) elif not true_xr and false_xr and con_xr: val_if_false, condition = xr.align(val_if_false, condition) return val_if_false.where(np.logical_not(condition), val_if_true) elif true_xr and not false_xr and con_xr: val_if_true, condition = xr.align(val_if_true, condition) return val_if_true.where(condition, val_if_false) elif true_xr and false_xr: val_if_true, val_if_false = xr.align(val_if_true, val_if_false) return val_if_true.where(condition, val_if_false) elif not true_xr and false_xr: return val_if_false.where(not condition, val_if_true) elif true_xr and not false_xr: return val_if_true.where(condition, val_if_false) elif con_xr: return (condition * 0 + val_if_true).where(condition, val_if_false) else: return np.where(condition, val_if_true, val_if_false)
def lag_linregress_3D_range(x, y, lagx=0, lagy=0): x, y = xr.align(x, y) if lagx != 0: # If x lags y by 1, x must be shifted 1 step backwards. # Works with negative and positive lags # E.g., if lag = -1, x is shifted 1 step backwards # If lag = 2, x is shifted 2 steps forwards x = x.shift(time=lagx).dropna(dim='time') x, y = xr.align(x, y) if lagy != 0: y = y.shift(time=lagy).dropna(dim='time') x, y = xr.align(x, y) # 3. Compute data length, mean and standard deviation along time axis for further use: n = x.shape[0] xmean = x.mean(axis=0, skipna=True) ymean = y.mean(axis=0, skipna=True) xstd = x.std(axis=0, skipna=True) ystd = y.std(axis=0, skipna=True) # 4. Compute covariance along time axis cov = np.nansum((x - xmean) * (y - ymean), axis=0) / (n - 1) # 5. Compute correlation along time axis cor = cov / (xstd * ystd) return cor
def lag_linregress_3D(x, y, lagx=0, lagy=0): """ Input: Two xr.Datarrays of any dimensions with the first dim being time. Thus the input data could be a 1D time series, or for example, have three dimensions (time,lat,lon). Datasets can be provided in any order, but note that the regression slope and intercept will be calculated for y with respect to x. Output: Covariance, correlation, regression slope and intercept, p-value, and standard error on regression between the two datasets along their aligned time dimension. Lag values can be assigned to either of the data, with lagx shifting x, and lagy shifting y, with the specified lag amount. """ #1. Ensure that the data are properly alinged to each other. x, y = xr.align(x, y) #2. Add lag information if any, and shift the data accordingly if lagx != 0: # If x lags y by 1, x must be shifted 1 step backwards. # But as the 'zero-th' value is nonexistant, xr assigns it as invalid # (nan). Hence it needs to be dropped x = x.shift(time=-lagx).dropna(dim='time') # Next important step is to re-align the two datasets so that y adjusts # to the changed coordinates of x x, y = xr.align(x, y) if lagy != 0: y = y.shift(time=-lagy).dropna(dim='time') x, y = xr.align(x, y) #3. Compute data length, mean and standard deviation along time axis: n = y.notnull().sum(dim='time') xmean = x.mean(axis=0) ymean = y.mean(axis=0) xstd = x.std(axis=0) ystd = y.std(axis=0) #4. Compute covariance along time axis cov = np.sum((x - xmean) * (y - ymean), axis=0) / (n) #5. Compute correlation along time axis cor = cov / (xstd * ystd) #6. Compute regression slope and intercept: slope = cov / (xstd**2) intercept = ymean - xmean * slope #7. Compute P-value and standard error #Compute t-statistics tstats = cor * np.sqrt(n - 2) / np.sqrt(1 - cor**2) stderr = slope / tstats from scipy.stats import t pval = t.sf(tstats, n - 2) * 2 pval = xr.DataArray(pval, dims=cor.dims, coords=cor.coords) return cov, cor, slope, intercept, pval, stderr
def lag_linregress(x, y, lagx=0, lagy=0): """ Calculate the lead-lag linear regression. Parameters ---------- x : xarray.DataArray The x-coordinates at which to evaluate the interpolated values. y : xarray.DataArray The x-coordinates of the data points. lagx : 1-D sequence of float or complex The y-coordinates of the data points, same length as `xp`. inc : boolean xp is increasing or decresing. Returns ---------- re : tuple of floats Covariance, correlation, regression slope and intercept, p-value, and standard error on regression between the two datasets along their aligned time dimension. Lag values can be assigned to either of the data, with lagx shifting x, and lagy shifting y, with the specified lag amount. Input: Two xr.Datarrays of any dimensions with the first dim being time. Thus the input data could be a 1D time series, or for example, have three dimensions (time,lat,lon). Datasets can be provied in any order, but note that the regression slope and intercept will be calculated for y with respect to x. """ #1. Ensure that the data are properly alinged to each other. x, y = xr.align(x, y) #2. Add lag information if any, and shift the data accordingly if lagx != 0: #If x lags y by 1, x must be shifted 1 step backwards. #But as the 'zero-th' value is nonexistant, xr assigns it as invalid (nan). Hence it needs to be dropped x = x.shift(time=-lagx).dropna(dim='time') #Next important step is to re-align the two datasets so that y adjusts to the changed coordinates of x x, y = xr.align(x, y) if lagy != 0: y = y.shift(time=-lagy).dropna(dim='time') x, y = xr.align(x, y) # slope, intercept, r_value, p_value, std_err = linregress(x, y) slp, itc, r, p, std = xr.apply_ufunc( linregress, x, y, dask='allowed', input_core_dims=[['time'], ['time']], output_core_dims=[[], [], [], [], []], # exclude_dims=set(('contour',)), # output_dtypes=[theta.dtype], vectorize=True) return slp, itc, r, p, std
def multi_linregress_3D(x1, x2, y, lagx=0, lagy=0): """ Input: Two xr.Datarrays of any dimensions with the first dim being time. Thus the input data could be a 1D time series, or for example, have three dimensions (time,lat,lon). Datasets can be provied in any order, but note that the regression slope and intercept will be calculated for y with respect to x. Output: Covariance, correlation, regression slope and intercept, p-value, and standard error on regression between the two datasets along their aligned time dimension. """ #1. Ensure that the data are properly alinged to each other. x1, y = xr.align(x1, y) x2, y = xr.align(x2, y) #3. Compute data length, mean and standard deviation along time axis for further use: n = y.notnull().sum(dim='time') x1mean = np.nanmean(x1, axis=0) x2mean = np.nanmean(x2, axis=0) ymean = np.nanmean(y, axis=0) x1std = np.nanstd(x1, axis=0) x2std = np.nanstd(x2, axis=0) ystd = np.nanstd(y, axis=0) #4. Compute covariance along time axis #cov = np.sum((x1 - x1mean)*(x2 - x2mean)*(y - ymean), axis=0) / (n) #5. Compute correlation along time axis #cor = cov/(x1std*x2std*ystd) #6. Compute regression slope and intercept: slopex1 = (np.sum((x2)**2, axis=0) * np.sum((x1) * (y), axis=0) - np.sum( (x1) * (x2), axis=0) * np.sum((x2) * (y), axis=0)) / (np.sum( (x1) * (x1), axis=0) * np.sum((x2) * (x2), axis=0) - (np.sum( (x1) * (x2), axis=0))**2) slopex2 = (np.sum((x1)**2, axis=0) * np.sum((x2) * (y), axis=0) - np.sum( (x1) * (x2), axis=0) * np.sum((x1) * (y), axis=0)) / (np.sum( (x1) * (x1), axis=0) * np.sum((x2) * (x2), axis=0) - (np.sum( (x1) * (x2), axis=0))**2) intercept = ymean - x1mean * slopex1 - x2mean * slopex2 y_pred = intercept + x1 * slopex1 + x2 * slopex2 rss = np.nansum((y - y_pred)**2, axis=0) ss_tot = np.nansum((y - ymean)**2, axis=0) r_2 = 1 - (rss / ss_tot) r_2 = xr.DataArray(r_2, dims=intercept.dims, coords=intercept.coords) #7. Compute P-value and standard error #Compute t-statistics # tstats = cor*np.sqrt(n-2)/np.sqrt(1-cor**2) # stderr = slope/tstats # from scipy.stats import t # pval = t.sf(tstats, n-2)*2 # pval = xr.DataArray(pval, dims=cor.dims, coords=cor.coords) return slopex1, slopex2, intercept, y_pred, r_2 #,pval,stderr
def project_w(self, data, z=False, sel=None, align=True): """ projection on w-mode (varphi = -c**2/N2 * dphidz) for reconstructing, use w = wn*varphi (see reconstruct_w) interpolation uses linear interpolation, but midpoints should be OK (since it gives something equivalent to trapezoidal integration upon integration) Parameters: ___________ data: xarray.DataArray Returns: ________ xarray.Datarray See also: _________ project, reconstruct_w """ if sel is None: dm = self.ds else: dm = self.ds.sel(sel) if align: data, dm = xr.align(data, dm, join="inner") _check_hdim_mismatch(data, dm) if not (z is None or z is False): if z is True: z, = gop.get_z_coord(data) if isinstance(z, str): z = data.coords[z] elif align: data, z = xr.align(data, z, join="inner") data = gop.interp2z(dm[self._znames['zc']], z, data) zf, zc = self._znames['zf'], self._znames["zc"] prov = (data * self._w2rho(-dm.dphidz, zc=dm[zc], zf=dm[zf]) * dm.dz).sum(self._zdims['zc']) if self.free_surf: prov += self.g * (-dm.dphidz / dm.N2 * self.xgrid.interp( data, self._xgrid_z, boundary="extrapolate")).isel({ self._zdims["zf"]: -1 }).drop(self._znames["zf"]) return prov / dm.norm
def align_debug(): v2_base_path = "/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1" nc_files = sorted( glob.glob(os.path.join(v2_base_path, "*/*/*.nc"), recursive=True)) gd_arrays = [] nonzeros_raw = [] for f in (nc_files[0], nc_files[5]): print(f) gd_array = xr.open_dataarray(f) # gd_array = gd_array.T.rename({"image_file_name": "presentation"}) # gd_array.coords["presentation_id"] = ("presentation", range(gd_array.shape[1])) # gd_array = gd_array.rename({"image_file_name": "presentation"}) # gd_array.coords["presentation_id"] = ("presentation", range(gd_array.shape[0])) gd_array.coords["presentation_id"] = ("image_file_name", range(gd_array.shape[0])) # gd_array.coords["neuroid_id"] = ("neuroid", gd_array["neuroid"].values) # df_massage = pd.DataFrame(list(map(massage_file_name, gd_array["presentation"].values))) # for column in df_massage.columns: # gd_array.coords[column] = ("presentation", df_massage[column]) # gd_array.reset_index(["neuroid", "presentation"], drop=True, inplace=True) gd_array.reset_index("category_name", drop=True, inplace=True) mkgu.assemblies.gather_indexes(gd_array) gd_arrays.append(gd_array) nonzeros_raw.append(np.nonzero(~np.isnan(gd_array))) print("nonzeros_raw: ") print(nonzeros_raw) align_test = xr.align(*gd_arrays, join="outer") nonzeros_aligned = [np.nonzero(~np.isnan(da)) for da in align_test] print("nonzeros_aligned: ") print(nonzeros_aligned) assert nonzeros_raw[0].shape == nonzeros_aligned[0].shape
def river_3d( geo, sea_level, coastline_rho, ): assert type(sea_level) == xr.core.dataarray.DataArray assert type(geo) == xr.core.dataarray.Dataset top_active_layer = xr.where(geo["IBOUND"]==1, geo.layer, np.nan).min(dim="z") h_grid, dhdx, outer_ridge = river_grid(geo, sea_level, coastline_rho) h_grid = xr.Dataset({"h_grid" : h_grid}) z_bins = _mid_to_binedges(geo["z"].values) h_grid = h_grid.groupby_bins("h_grid", z_bins, labels=geo.layer).apply(_dumb).rename({"h_grid_bins" : "h_l"}) h_grid = h_grid.sortby("x").sortby("y") #Needed for xarray > 0.15 h_grid, top_active_layer = xr.align(h_grid, top_active_layer, join="outer") #Ensure river layer does not exceed IBOUND. h_grid["h_l"] = xr.where(h_grid["h_l"] < top_active_layer, top_active_layer, h_grid["h_l"]) riv = h_grid * geo["IBOUND"].where((geo["IBOUND"] == 1) & (geo.layer == h_grid["h_l"])) return(riv, z_bins, dhdx, outer_ridge)
def align_cdump(cdump1, cdump2, dd, tag1, tag2): cdump1 = align_subA(cdump1) cdump2 = align_subA(cdump2) minlat = np.min([np.min(cdump1.latitude.values), np.min(cdump2.latitude.values)]) maxlat = np.max([np.max(cdump1.latitude.values), np.max(cdump2.latitude.values)]) maxlon = np.max([np.max(cdump1.longitude.values), np.max(cdump2.longitude.values)]) minlon = np.min([np.min(cdump1.longitude.values), np.min(cdump2.longitude.values)]) # round to nearest hundred minlat = np.round(minlat * 100) / 100 maxlat = np.round(maxlat * 100) / 100 minlon = np.round(minlon * 100) / 100 maxlon = np.round(maxlon * 100) / 100 print(minlon, maxlon) print(minlat, maxlat) nlat = np.abs(np.ceil((maxlat - minlat) / dd)) + 1 nlon = np.abs(np.ceil((maxlon - minlon) / dd)) + 1 conc1 = par2conc.reindex(cdump1, minlat, minlon, nlat, nlon, dd, dd) conc2 = par2conc.reindex(cdump2, minlat, minlon, nlat, nlon, dd, dd) conc1 = conc1.drop("latitude") conc2 = conc2.drop("latitude") conc1 = conc1.drop("longitude") conc2 = conc2.drop("longitude") new1, new2 = xr.align(conc1, conc2, join="outer") new1.expand_dims("run") new1["run"] = tag1 new2.expand_dims("run") new2["run"] = tag2 return xr.concat([new1, new2], dim="run")
def align(*objects, **kwargs): """Given any number of Dataset objects, returns new objects with aligned indexes. Array from the aligned objects are suitable as input to mathematical operators, because along each dimension they have the same indexes. Missing values (if ``join != 'inner'``) are filled with NaN. Parameters ---------- *objects : Dataset Objects to align. join : {'outer', 'inner', 'left', 'right'}, optional Method for joining the indexes of the passed objects along each dimension: - 'outer': use the union of object indexes - 'inner': use the intersection of object indexes - 'left': use indexes from the first object with each dimension - 'right': use indexes from the last object with each dimension copy : bool, optional If ``copy=True``, the returned objects contain all new variables. If ``copy=False`` and no reindexing is required then the aligned objects will include original variables. Returns ------- aligned : same as *objects Tuple of objects with aligned coordinates. """ xarray_datasets = [obj.xarray for obj in objects] aligned_datasets = xarray.align(xarray_datasets, **kwargs) return [Dataset(ds) for ds in aligned_datasets]
def read_all_ensembles_extremes(): """ This function reads all the extremes into a dataset, concatenating in time and along a new ensemble dimension. The resulting dimensions of the dataset should be (ensemble member, year, lat, lon). Returns a dataset, with four arrays according to each type of extreme (hot/cold djf/jja). """ extremes_pth = "$somepath/extreme_counts/" member_ids = [i for i in chain(range(2, 36), range(101, 106))] dsets = [] for mem in member_ids: glob_res = sorted(glob(f'{extremes_pth}/*.{mem:03d}*.nc')) dsets.append( xr.open_mfdataset(glob_res, concat_dim='year', combine='nested')) dsets_aligned = xr.align(*dsets, join='inner') first = dsets_aligned[0] rest = [ds.reset_coords(drop=True) for ds in dsets_aligned[1:]] objs_to_concat = [first] + rest # concatenate ensemble_dim = xr.DataArray(member_ids, dims='member_id', name='member_id') ds = xr.concat(objs_to_concat, dim=ensemble_dim, coords='minimal') # restore non_dim_coords to variables non_dim_coords_reset = set(ds.coords) - set(ds.dims) ds = ds.reset_coords(non_dim_coords_reset) return ds
def scatter_xarray(x, y, hue="location", time="time", ax=None, window=1, xlim=None, ylim=None, **kwargs): if ax is None: _, ax = subplots() if window != 1: x = x.rolling({time: window}, center=True).mean().dropna(time) y = y.rolling({time: window}, center=True).mean().dropna(time) x, y = xr.align(x, y) for h, color in zip(x[hue].values, itertools.cycle(sns.color_palette())): xx = x.sel(**{hue: h}).values yy = y.sel(**{hue: h}).values ax.plot(xx, yy, "-", color=color, alpha=0.3, linewidth=2) ax.plot(xx[-1:], yy[-1:], "o", color=color, label=h, **kwargs) xp = xx[-1:] * 1.05 yp = yy[-1:] if (xlim is None or xlim[0] < xp < xlim[1]) and \ (ylim is None or ylim[0] < yp < ylim[1]): ax.annotate(h, (xp, yp), color=color) return ax
def test_align_and_fillna_complex(self): ds1 = case_runner_to_ds(foo2_zarray1_zarray2, fn_args=['a', 'b'], cases=[(1j, 10), (2j, 20)], var_names=['x', 'y'], var_dims=[['time']], var_coords={'time': ['a', 'b', 'c', 'd', 'e']}) ds2 = case_runner_to_ds(foo2_zarray1_zarray2, fn_args=['a', 'b'], cases=[(2j, 10), (1j, 20)], var_names=['x', 'y'], var_dims=[['time']], var_coords={'time': ['a', 'b', 'c', 'd', 'e']}) assert not np.logical_not(np.isnan(ds1['x'].data)).all() assert not np.logical_not(np.isnan(ds1['y'].data)).all() assert not np.logical_not(np.isnan(ds2['x'].data)).all() assert not np.logical_not(np.isnan(ds2['y'].data)).all() assert all(t == complex for t in (ds1.x.dtype, ds2.x.dtype, ds1.y.dtype, ds2.y.dtype)) assert ds1.y.dtype == complex assert ds2.y.dtype == complex ds1, ds2 = xr.align(ds1, ds2, join='outer') fds = ds1.fillna(ds2) assert np.logical_not(np.isnan(fds['x'].data)).all() assert np.logical_not(np.isnan(fds['y'].data)).all()
def mix_weights(primary, secondary, max_weight=0.049): primary, secondary = xr.align(primary, secondary, join='outer') primary = primary.fillna(0) secondary = secondary.fillna(0) primary_exposure = qnstats.calc_exposure(primary) primary_max_exposure = primary_exposure.max('asset') primary_abs_sum = abs(primary).sum('asset') secondary_exposure = qnstats.calc_exposure(secondary) secondary_max_exposure = secondary_exposure.max('asset') secondary_abs_sum = abs(secondary).sum('asset') # formula k = primary_abs_sum * (primary_max_exposure - max_weight) / \ (secondary_abs_sum * ( max_weight - secondary_max_exposure) ) k = k.where(k > 0, 0) # k > 0 mix = primary + secondary * k # normalization sum = abs(mix).sum('asset') sum = sum.where(sum > 1, 1) mix = mix / sum return mix
def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox: self._assert( 'juxtapose' in datasets.bag and len(datasets.bag['juxtapose']) == len(self._children), "invalid dataset bag") groups = [ product.group( VirtualDatasetBag(dataset_bag, datasets.geopolygon, datasets.product_definitions), **group_settings) for product, dataset_bag in zip( self._children, datasets.bag['juxtapose']) ] aligned_boxes = xarray.align(*[grouped.box for grouped in groups]) def tuplify(indexes, _): return { 'juxtapose': [box.sel(**indexes).item() for box in aligned_boxes] } return VirtualDatasetBox( xr_apply(aligned_boxes[0], tuplify), select_unique([grouped.geobox for grouped in groups]), select_unique([grouped.load_natively for grouped in groups]), merge_dicts([grouped.product_definitions for grouped in groups]), geopolygon=select_unique( [grouped.geopolygon for grouped in groups]))
def read_ds_anew(): ds = [] for i, task_name in enumerate(params.tasks): if not task_name: task_name = params.matrix_directory name = params.matrix_labels else: name = params.matrix_labels[i] if params.verbose: print( f'\nReading in {task_name} data from directory {params.matrix_directory}...' ) partial, subjects = read_mat_data( f'{input_dir}/{params.matrix_directory}/{task_name}') nodes = [f'node_{x}' for x in range(partial.shape[-1])] partial = xr.DataArray(partial.squeeze(), coords=[subjects, nodes, nodes], dims=['subject', 'dim1', 'dim2'], name=name) ds.append(partial) ds = xr.align(*ds, join='inner') # 'inner' takes intersection of ds objects ds = xr.merge(ds, compat='override', join='exact') return ds
def test_align_and_fillna_complex(self): ds1 = case_runner_to_ds(foo2_zarray1_zarray2, fn_args=['a', 'b'], cases=[(1j, 10), (2j, 20)], var_names=['x', 'y'], var_dims={('x', 'y'): 'time'}, var_coords={'time': ['a', 'b', 'c', 'd', 'e']}) ds2 = case_runner_to_ds(foo2_zarray1_zarray2, fn_args=['a', 'b'], cases=[(2j, 10), (1j, 20)], var_names=['x', 'y'], var_dims={('x', 'y'): 'time'}, var_coords={'time': ['a', 'b', 'c', 'd', 'e']}) assert not np.logical_not(np.isnan(ds1['x'].data)).all() assert not np.logical_not(np.isnan(ds1['y'].data)).all() assert not np.logical_not(np.isnan(ds2['x'].data)).all() assert not np.logical_not(np.isnan(ds2['y'].data)).all() assert all(t == complex for t in (ds1.x.dtype, ds2.x.dtype, ds1.y.dtype, ds2.y.dtype)) assert ds1.y.dtype == complex assert ds2.y.dtype == complex ds1, ds2 = xr.align(ds1, ds2, join='outer') fds = ds1.fillna(ds2) assert np.logical_not(np.isnan(fds['x'].data)).all() assert np.logical_not(np.isnan(fds['y'].data)).all()
def cor(x, y, time_axis = 0,lagx=0, lagy=0): """ Computes Pearson Correlation coefficient between x and y along time dimension, accounting for given lags (if any) Input: Two single- or multi-dimensional xrarray DataArray objects (x and y) which have 'time' as the first dimension Default time axis is considered as 0, but can be changed using the 'time_axis' argument. Lag values (lagx for input data x, and lagy for input data y) can also be prescribed. Default lag values are zero. Output: An xarray DataArray object showing Pearson Correlation coefficient between x and y along the 'time' dimension If lag values are provided, the returned object will show lagged correlation. """ #1. Add lag information if any, and shift the data accordingly if lagx!=0: #If x lags y by 1, x must be shifted 1 step backwards. #But as the 'zero-th' value is nonexistant, xr assigns it as invalid (nan). Hence it needs to be dropped x = x.shift(time = -lagx).dropna(dim='time', how = 'all') if lagy!=0: y = y.shift(time = -lagy).dropna(dim='time', how = 'all') #2. Ensure that the data are properly alinged to each other. x,y = xr.align(x,y) #3. Compute data length, mean and standard deviation along time dimension for further use: n = x.time.shape[0] xmean = x.mean(dim='time') ymean = y.mean(dim='time') xstd = x.std(dim='time') ystd = y.std(dim='time') #4. Compute covariance along time dimension cov = np.sum((x - xmean)*(y - ymean), axis=time_axis)/(n) #5. Compute correlation along time dimension cor = cov/(xstd*ystd) return cor
def getGridInformation(self): """ """ atm = ocn = lnd = None try: atm = self._getVariableChild("areacella", synonyms=["area"]).convert("m2") atm = atm.ds[atm.varname] except Exception as e: pass try: ocn = self._getVariableChild("areacello").convert("m2") ocn = ocn.ds[ocn.varname] except Exception as e: pass try: lnd = self._getVariableChild("sftlf", synonyms=["landfrac"]).convert("1") lnd = lnd.ds[lnd.varname] except Exception as e: pass if atm is not None and lnd is not None: atm, lnd = xr.align(atm, lnd, join='override', copy=False) if atm is not None: self.area_atm = atm if ocn is not None: self.area_ocn = ocn if lnd is not None: self.frac_lnd = lnd for child in self.children: self.children[child].getGridInformation()
def align_bug_reproduce(): dims = ("x", "y") shape = (10, 5) das = [] for j in (0, 1): data = np.full(shape, np.nan, dtype="float64") for i in range(shape[0]): data[i, i % shape[1]] = float(i) coords_d = { "ints": ("x", range(j * shape[0], (j + 1) * shape[0])), "nans": ("x", np.array([np.nan] * shape[0], dtype="float64")), "lower": ("y", list(string.ascii_lowercase[:shape[1]])) } da = xr.DataArray(data=data, dims=dims, coords=coords_d) da.set_index(append=True, inplace=True, x=["ints", "nans"], y=["lower"]) das.append(da) nonzeros_raw = [np.nonzero(~np.isnan(da)) for da in das] print("nonzeros_raw: ") print(nonzeros_raw) aligned = xr.align(*das, join="outer") nonzeros_aligned = [np.nonzero(~np.isnan(da)) for da in aligned] print("nonzeros_aligned: ") print(nonzeros_aligned) assert nonzeros_raw[0].shape == nonzeros_aligned[0].shape
def xrmerge(das, accept_new=True): """ Merges xarrays with different dimension sets Parameters ---------- das : list of data_arrays accept_new Returns ------- da : an xarray that is the merge of das References ---------- Thanks to @jcmgray https://github.com/pydata/xarray/issues/742#issue-130753818 In the future, we may not need this as xarray may provide the merge for us. """ da = das[0] for new_da in das[1:]: # Expand both to have same dimensions, padding with NaN da, new_da = xr.align(da, new_da, join='outer') # Fill NaNs one way or the other re. accept_new da = new_da.fillna(da) if accept_new else da.fillna(new_da) return da
def setUp(self): self.relief_map = xr.open_rasterio(test_relief, parse_coordinates=True)[0] self.slope_map = xr.open_rasterio(test_slope, parse_coordinates=True)[0] self.pga = xr.open_dataset(test_pga) self.saturation = xr.open_rasterio(test_saturation, parse_coordinates=True)[0] self.friction = xr.open_rasterio(test_friction, parse_coordinates=True)[0] self.cohesion = xr.open_rasterio(test_cohesion, parse_coordinates=True)[0] self.Dn_single = xr.open_dataset(test_Dn_single)["Dn"] self.Dn_set = xr.open_dataset(test_Dn_set) ( self.relief_map, self.slope_map, self.pga, self.saturation, self.friction, self.cohesion, ) = xr.align( self.relief_map, self.slope_map, self.pga, self.saturation, self.friction, self.cohesion, join="override", )
def check_forward_looking(cropped_output, whole_output): cropped_output = sort_and_crop_output(cropped_output) whole_output = sort_and_crop_output(whole_output) max_time = min(cropped_output.coords[ds.TIME].values.max(), whole_output.coords[ds.TIME].values.max()) cropped_output = cropped_output.loc[:max_time] whole_output = whole_output.loc[:max_time] cropped_output, whole_output = xr.align(cropped_output, whole_output, join='outer') cropped_output = cropped_output.fillna(0) whole_output = whole_output.fillna(0) diff = whole_output - cropped_output # print(diff.where(diff!=0).dropna('time', 'all').dropna('asset','all')) delta = abs(diff).max().values if delta > FORWARD_LOOKING_TEST_DELTA: print('WARNING: This strategy uses forward looking! Delta = ' + str(delta)) return True else: print('Ok. There is no forward looking.') return False
def set_dataset(self, array_list, align_type="outer"): """ :param array_list: list of xarrays :type array_list: list of :class:`mth5.timeseries.ChannelTS` objects :param align_type: how the different times will be aligned * ’outer’: use the union of object indexes * ’inner’: use the intersection of object indexes * ’left’: use indexes from the first object with each dimension * ’right’: use indexes from the last object with each dimension * ’exact’: instead of aligning, raise ValueError when indexes to be aligned are not equal * ’override’: if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. :type align_type: string """ if isinstance(array_list, (list, tuple)): x_array_list = self._validate_array_list(array_list) # first need to align the time series. x_array_list = xr.align(*x_array_list, join=align_type) # input as a dictionary xdict = dict([(x.component.lower(), x) for x in x_array_list]) self._dataset = xr.Dataset(xdict) elif isinstance(array_list, xr.Dataset): self._dataset = array_list self.validate_metadata() self._dataset.attrs.update(self.run_metadata.to_dict(single=True))
def group(self, datasets: VirtualDatasetBag, **search_terms: Dict[str, Any]) -> VirtualDatasetBox: self._assert( 'juxtapose' in datasets.pile and len(datasets.pile['juxtapose']) == len(self._children), "invalid dataset pile") groups = [ product.group( VirtualDatasetBag(dataset_pile, datasets.geopolygon, datasets.product_definitions), **search_terms) for product, dataset_pile in zip( self._children, datasets.pile['juxtapose']) ] aligned_piles = xarray.align(*[grouped.pile for grouped in groups]) def tuplify(indexes, _): return { 'juxtapose': [pile.sel(**indexes).item() for pile in aligned_piles] } return VirtualDatasetBox( xr_apply(aligned_piles[0], tuplify), select_unique([grouped.geobox for grouped in groups]), merge_dicts([grouped.product_definitions for grouped in groups]))
def _process(self, overlay, key=None): if not isinstance(overlay, CompositeOverlay): return overlay elif len(overlay) == 1: return overlay.last if isinstance(overlay, NdOverlay) else overlay.get(0) imgs = [] for rgb in overlay: if not isinstance(rgb, RGB): raise TypeError('stack operation expect RGB type elements, ' 'not %s name.' % type(rgb).__name__) rgb = rgb.rgb dims = [kd.name for kd in rgb.kdims][::-1] coords = {kd.name: rgb.dimension_values(kd, False) for kd in rgb.kdims} imgs.append(tf.Image(self.uint8_to_uint32(rgb), coords=coords, dims=dims)) try: imgs = xr.align(*imgs, join='exact') except ValueError: raise ValueError('RGB inputs to stack operation could not be aligned, ' 'ensure they share the same grid sampling.') stacked = tf.stack(*imgs, how=self.p.compositor) arr = shade.uint32_to_uint8(stacked.data)[::-1] data = (coords[dims[1]], coords[dims[0]], arr[:, :, 0], arr[:, :, 1], arr[:, :, 2]) if arr.shape[-1] == 4: data = data + (arr[:, :, 3],) return rgb.clone(data, datatype=[rgb.interface.datatype]+rgb.datatype)
def divide_sessions_old(temp_comp, div_dict): temp_comp_div_list = [] len_frame = len(temp_comp.coords['frame']) for s_orig, s_group in temp_comp.groupby('session_id'): if s_orig in div_dict.keys(): div = div_dict[s_orig] s_new_array = np.empty(len_frame, dtype='U10') sd_new_array = np.empty(len_frame) nan_mask = np.full(len_frame, False) for s_new, sd_new in div.items(): b, e = sd_new s_new_array[b:e] = s_new sd_new_array[b:e] = np.arange(e - b) nan_mask[b:e] = True s_new_array[nan_mask] = 'trivial' sd_new_array[nan_mask] = np.arange(np.sum(nan_mask)) idx_new = pd.MultiIndex.from_arrays( [s_new_array, sd_new_array], names=['segment_id', 'frame_split']) s_group.coords['frame'] = idx_new else: s_new_array = np.full(len_frame, 'all') idx_new = pd.MultiIndex.from_arrays( [s_new_array, s_group.coords['frame']], names=['segment_id', 'frame_split']) s_group.coords['frame'] = idx_new s_group.unstack('frame').rename({'frame_split': 'frame'}) print("finished unstacking for" + s_orig) temp_comp_div_list.append(s_group) temp_comp_div = xr.align( *temp_comp_div_list, copy=False, join='outer', exclude=('animal', 'mapping_id')) return xr.concat(temp_comp_div, dim='session_id')
def fix_grid_continuity(dset): # if grid already continuos don't do anything. if check_grid_continuity(dset): return dset xvv = dset.x.values yvv = dset.y.values xlim = [xvv[0], xvv[-1]] ylim = [yvv[0], yvv[-1]] xindx = np.arange(xlim[0], xlim[1] + 1) yindx = np.arange(ylim[0], ylim[1] + 1) mgrid = get_latlongrid(dset, xindx, yindx) # mgrid = get_even_latlongrid(dset, xlim, ylim) conc = np.zeros_like(mgrid[0]) dummy = xr.DataArray(conc, dims=["y", "x"]) dummy = dummy.assign_coords(latitude=(("y", "x"), mgrid[1])) dummy = dummy.assign_coords(longitude=(("y", "x"), mgrid[0])) dummy = dummy.assign_coords(x=(("x"), xindx)) dummy = dummy.assign_coords(y=(("y"), yindx)) cdset, dummy2 = xr.align(dset, dummy, join="outer") cdset = cdset.assign_coords(latitude=(("y", "x"), mgrid[1])) cdset = cdset.assign_coords(longitude=(("y", "x"), mgrid[0])) return cdset.fillna(0)
def _integrate_space(self, region=None, mean=False): assert "cell_measure" in self.ds ds = self.ds if region is None else ilamb_regions.maskedDataset( region, self) da = ds[self.varname] cm = ds['cell_measure'] v, dx = xr.align(da, xr.where(cm < 1, np.nan, cm), join='override', copy=False) out = (v * dx).sum(dx.dims) units = Unit(self.ds[self.varname].attrs['units']) out.attrs = { key: a for key, a in v.attrs.items() if "cell_" not in key } if 'ilamb' not in out.attrs: out.attrs['ilamb'] = '' out.attrs['ilamb'] += "integrate(dim='space',mean=%s%s); " % ( mean, "" if region is None else ",region='%s'" % region) if mean: mask = da.isnull() dims = set(mask.dims).difference(set(dx.dims)) if dims: mask = mask.all(dims) out /= (dx * (mask == False)).sum() else: if 'm-2' in str(units): units = str(units).replace("m-2", "") else: units *= Unit('m2') out.attrs['units'] = str(units) tm = self.ds.time_measure if "time_measure" in self.ds else None v = Variable(da=out, varname=str(da.name) + "_sint", time_measure=tm) return v
def stack(*imgs, **kwargs): """Combine images together, overlaying later images onto earlier ones. Parameters ---------- imgs : iterable of Image The images to combine. how : str, optional The compositing operator to combine pixels. Default is `'over'`. """ if not imgs: raise ValueError("No images passed in") shapes = [] for i in imgs: if not isinstance(i, Image): raise TypeError("Expected `Image`, got: `{0}`".format(type(i))) elif not shapes: shapes.append(i.shape) elif shapes and i.shape not in shapes: raise ValueError("The stacked images must have the same shape.") name = kwargs.get('name', None) op = composite_op_lookup[kwargs.get('how', 'over')] if len(imgs) == 1: return imgs[0] imgs = xr.align(*imgs, copy=False, join='outer') with np.errstate(divide='ignore', invalid='ignore'): out = tz.reduce(tz.flip(op), [i.data for i in imgs]) return Image(out, coords=imgs[0].coords, dims=imgs[0].dims, name=name)
def create_and_train_models(data): asset_name_all = data.coords['asset'].values data = data.sel(time=slice( '2013-05-01', None)) # cut the head before 2013-05-01 (a lot of noise) features_all = get_features(data) target_all = get_target_classes(data) models = dict() for asset_name in asset_name_all: target_cur = target_all.sel(asset=asset_name).dropna('time', 'any') features_cur = features_all.sel(asset=asset_name).dropna('time', 'any') # align features and targets target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join='inner') if len(features_cur.time) < 10: # not enough points for training continue model = create_model() try: model.fit(feature_for_learn_df.values, target_for_learn_df) models[asset_name] = model except KeyboardInterrupt as e: raise e except: logging.exception("model training failed") return models
def get_decoupling_dataset(time_range=None, window='3H'): ceilometer = get_dataset('ceilometer', time_range=time_range) first_cbh = ceilometer['first_cbh'] first_cbh = first_cbh[first_cbh.values < 3000.].resample( window, dim='time', how=lambda x, axis=None: np.percentile(x, q=95, axis=axis)) cf_dataset = get_dataset('cloud_fraction', time_range=time_range, window=window, label='left') cloud_fraction, first_cbh = xarray.align(cf_dataset['low_cloud_fraction'], first_cbh) first_cbh[cloud_fraction < 0.5] = np.nan surface = get_dataset('marmet', time_range=time_range).xarray.resample( window, dim='time', how='mean') surface, first_cbh = xarray.align(surface, first_cbh, join='outer') zlcl = zlcl_from_T_RH(surface['air_temperature'], surface['relative_humidity']) data_vars = { 'LCL': (['time'], zlcl, {'units': 'm'}), 'cbh': (['time'], first_cbh, {'units': 'm'}), } coords = {'time': (['time'], surface['time'])} return Dataset(xarray.Dataset(data_vars, coords))
def run(params): start_time = datetime.now() bin_width, filter_bandwidth, theta, shift, signal_field = params # Get file paths signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) output_dir = '/scratch/pkittiwi/fg1p/stats_semi/signal/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc'\ .format(signal_dir, bin_width, filter_bandwidth, theta, shift, signal_field) output_file = '{:s}/stats_semi_signal_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(output_dir, bin_width, filter_bandwidth, theta, shift, signal_field) mask_file = '/scratch/pkittiwi/fg1p/hera331_fov_mask.nc' # Load data to memory and align coordinates with xr.open_dataarray(signal_file) as da: signal = da.load() with xr.open_dataarray(mask_file) as da: mask = da.load() # Load one noise file to get coordinates. noise = xr.open_dataarray( '/scratch/pkittiwi/fg1p/noise_map/bin0.08/fbw8.00/theta90.0/shift0/' 'noise_map_bin0.08_fbw8.00_theta90.0_shift0_333.nc' ) for key, values in noise.coords.items(): signal.coords[key] = values mask.coords[key] = values signal, noise, mask = xr.align(signal, noise, mask) # Mask observation signal = signal.where(mask == 1) # Calculate statistic out = get_stats(signal) out.attrs = {'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift} os.makedirs(output_dir, exist_ok=True) out.to_netcdf(output_file) out.close() print('Finish. signal_file = {:s}. output_file = {:s}. ' 'Time spent {:.5f} sec.' .format(signal_file, output_file, (datetime.now() - start_time).total_seconds()))
def stack(*imgs): """Merge a number of images together, overlapping earlier images with later ones.""" _validate_images(imgs) if len(imgs) == 1: return imgs[0] imgs = xr.align(*imgs, copy=False, join='outer') out = imgs[0].data.copy() for img in imgs[1:]: out = np.where(_to_channels(img.data)['a'] == 0, out, img.data) return Image(out, coords=imgs[0].coords, dims=imgs[0].dims)
def merge(*imgs): """Merge a number of images together, averaging the channels""" _validate_images(imgs) if len(imgs) == 1: return imgs[0] imgs = xr.align(*imgs, copy=False, join='outer') coords, dims = imgs[0].coords, imgs[0].dims imgs = _to_channels(np.stack([i.data for i in imgs])) r = imgs['r'].mean(axis=0, dtype='f8').astype('uint8') g = imgs['g'].mean(axis=0, dtype='f8').astype('uint8') b = imgs['b'].mean(axis=0, dtype='f8').astype('uint8') a = imgs['a'].mean(axis=0, dtype='f8').astype('uint8') out = np.dstack([r, g, b, a]).view(np.uint32).reshape(a.shape) return Image(out, coords=coords, dims=dims)
def stack(*imgs, **kwargs): """Combine images together, overlaying later images onto earlier ones. Parameters ---------- imgs : iterable of Image The images to combine. how : str, optional The compositing operator to combine pixels. Default is `'over'`. """ if not imgs: raise ValueError("No images passed in") for i in imgs: if not isinstance(i, Image): raise TypeError("Expected `Image`, got: `{0}`".format(type(i))) op = composite_op_lookup[kwargs.get('how', 'over')] if len(imgs) == 1: return imgs[0] imgs = xr.align(*imgs, copy=False, join='outer') out = tz.reduce(tz.flip(op), [i.data for i in imgs]) return Image(out, coords=imgs[0].coords, dims=imgs[0].dims)
def aggregate(*dss, accept_new=False): """ Aggregates xarray Datasets and DataArrays """ # TODO: overwrite option, rather than accept_new, raise error if not # TODO: rename --> aggregate, look into, part_align -> concat. # TODO: check if result var is all non-nan and could be all same dtype if accept_new: dss = tuple(reversed(dss)) ds = dss[0] for new_ds in dss[1:]: # First make sure both datasets have the same variables for data_var in new_ds.data_vars: if data_var not in ds.data_vars: ds[data_var] = np.nan # Expand both to have same dimensions, padding with NaN ds, _ = xr.align(ds, new_ds, join="outer") # assert all(ds.loc[new_ds.coords].isnull()) # Fill NaNs one way or the other w.r.t. accept_new ds = ds.fillna(new_ds) return ds
def test_align_and_fillna_int(self): ds1 = case_runner_to_ds(foo2_array_array, fn_args=['a', 'b'], cases=[(1, 10), (2, 20)], var_names=['x', 'y'], var_dims=[['time']], var_coords={'time': ['a', 'b', 'c', 'd', 'e']}) ds2 = case_runner_to_ds(foo2_array_array, fn_args=['a', 'b'], cases=[(2, 10), (1, 20)], var_names=['x', 'y'], var_dims=[['time']], var_coords={'time': ['a', 'b', 'c', 'd', 'e']}) assert not np.logical_not(ds1['x'].isnull()).all() assert not np.logical_not(ds1['y'].isnull()).all() assert not np.logical_not(ds2['x'].isnull()).all() assert not np.logical_not(ds2['y'].isnull()).all() ds1, ds2 = xr.align(ds1, ds2, join='outer') fds = ds1.fillna(ds2) assert np.logical_not(fds['x'].isnull()).all() assert np.logical_not(fds['y'].isnull()).all()
try: netwtsd = pickle.load(f, encoding='latin1') except: netwtsd = pickle.load(f) #%% wt_av_cov = spatial_weight_normcov(netwtsd) resp_av_cov = spatial_resp_normcov(da) k_pos, k_stim = kurtosis_da(da) #%% pwr = tot_var(da) #non_k_var = (k_pos<42) * (k_pos>2) * (pwr>0) *(k_stim<42) * (k_stim>2) #resp_av_cov = resp_av_cov[non_k_var] #%% wt_av_cov, resp_av_cov = xr.align(wt_av_cov, resp_av_cov, join='inner') layer_labels_ind = np.array(map(str, wt_av_cov.coords['layer_label'].values)) n_plots = len(np.unique(layer_labels_ind)) plt.figure(figsize=(12,3)) layer_labels = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc6'] for i, layer in enumerate(layer_labels[1:]): plt.subplot(1, n_plots, i+1) x = wt_av_cov[layer_labels_ind==layer].values y = resp_av_cov[layer_labels_ind==layer].values if i<4: s=4 else: s=1 plt.scatter(x, y, s=s, color='k', edgecolors='none')
rx = np.double(np.squeeze(mat['data'][0][0][0])) ry = np.double(np.squeeze(mat['data'][0][0][1])) #print ry rfDiameter.append(np.sqrt( rx**2 + ry**2 )*0.625 + 40) transPos.append(np.squeeze(mat['data'][0][0][2])) resps.append(np.squeeze(mat['data'][0][0][3])) #lets get svd measurements over cells #originally: resps cellXposXrotXshape --> converted to cell X pos X unique_shape cell_resps = [np.dstack(cell).T.reshape(cell.shape[0], np.prod(cell[0].shape)) for cell in resps] ## putting yasmin data into data_array lsxr = [xr.DataArray(aresp, dims=['x','shapes']) for aresp in cell_resps] resp= xr.concat(xr.align(*lsxr, join='outer'), dim='cells') resp.to_dataset('resp').to_netcdf(top_dir + 'data/an_results/v4_ti_resp.nc') print([cell[2].shape==cell[0].shape for cell in resps]) #acell = cell_resps[0] #acell = acell - np.mean(acell, 1, keepdims=True) #u, s, v = np.linalg.svd(acell, full_matrices=False) ##use first princomp #recell = np.dot(np.expand_dims(u[:,0],1), np.expand_dims(v[0,:]*s[0],0)) ##convince myself these are all the same #np.corrcoef(acell.ravel(), recell.ravel()) #np.dot(acell.ravel(), recell.ravel()) / (np.linalg.norm(acell.ravel())*np.linalg.norm(recell.ravel())) #(s[0]**2/sum(s**2))**0.5
rx = np.double(np.squeeze(mat['data'][0][0][0])) ry = np.double(np.squeeze(mat['data'][0][0][1])) #print ry rfDiameter.append(np.sqrt( rx**2 + ry**2 )*0.625 + 40) transPos.append(np.squeeze(mat['data'][0][0][2])) resps.append(np.squeeze(mat['data'][0][0][3])) #lets get svd measurements over cells #originally: resps cellXposXrotXshape --> converted to cell X pos X unique_shape cell_resps = [np.dstack(cell).T.reshape(cell.shape[0], np.prod(cell[0].shape)) for cell in resps] # putting yasmin data into data_array lsxr = [xr.DataArray(aresp, dims=['x','shapes']) for aresp in cell_resps] resp = xr.concat(xr.align(*lsxr, join='outer'), dim='unit') resp = resp #convert to spk/s 300 ms averaging window resp.to_dataset('resp').to_netcdf(top_dir + 'data/an_results/v4_ti_resp.nc') #apc 109 m = l.loadmat(top_dir + 'data/responses/V4_370PC2001.mat') v4=m['resp'][0][0] v4_da = xr.DataArray(v4.T, dims=['shapes', 'unit']).chunk() #adjustment for repeats [ 14, 15, 16,17, 318, 319, 320, 321] #a = np.hstack((range(14), range(18,318))) #a = np.hstack((a, range(322, 370))) #v4_da = v4_da[a, :] v4_da = v4_da.to_dataset('resp') v4_da.to_netcdf(top_dir + 'data/responses/V4_370PC2001.nc')
def run(params): print("Calculating bin_width={:.2f} MHz, filter_bandwidth={:.2f} MHz," "theta={:.1f}, shift={:d}, signal_field={:d}, noise_multiplier={:.3f}" .format(*params)) start_time = datetime.now() bin_width, filter_bandwidth, theta, shift, \ signal_field, noise_multiplier = params nnoise = 500 # Get file path signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(noise_multiplier, bin_width, filter_bandwidth, theta, shift) signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(signal_dir, bin_width, filter_bandwidth, theta, shift, signal_field) noise_file = [ '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_theta{:.1f}_shift{:d}_{:03d}.nc' .format(noise_dir, bin_width, filter_bandwidth, theta, shift, noise_field) for noise_field in range(nnoise) ] output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}_all.nc' \ .format(output_dir, noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field) # Load data signal = xr.open_dataarray(signal_file) noise = xr.open_mfdataset( noise_file, concat_dim=pd.Index(range(nnoise), name='noise_field'), autoclose=True ) # open as dask.array, chunk over noise field mask = xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc') # Align coordinates - they must match for XArray broadcasting for key in ['x', 'y', 'f']: signal.coords[key] = noise.coords[key].values mask.coords[key] = noise.coords[key].values signal, noise, mask = xr.align(signal, noise, mask) # Make observation signal = signal.where(mask == 1).stack(s=('x', 'y')) noise = noise.where(mask == 1).stack(s=('x', 'y')) * noise_multiplier obs = signal + noise del signal # Calculate noise variance noise_var = noise.var(dim='s') del noise # Calculate biased moments m2_biased = xr_moment(obs, 's', order=2) m3_biased = xr_moment(obs, 's', order=3) m4_biased = xr_moment(obs, 's', order=4) del obs # Calculate unbiased moments m2_unbiased = m2_biased - noise_var m3_unbiased = m3_biased m4_unbiased = m4_biased - (6 * m2_unbiased * noise_var) - \ (3 * noise_var ** 2) # Note: the second term in m4_unbiased is actually "m2_true", which we # estimate here with m2_unbiased # Calculate biased vsk v_biased = m2_biased s_biased = m3_biased / m2_biased ** (3 / 2) k_biased = (m4_biased / m2_biased ** 2) - 3 # Calculate unbiased vsk v_unbiased = m2_unbiased s_unbiased = m3_unbiased / m2_unbiased ** (3 / 2) k_unbiased = (m4_unbiased / m2_unbiased ** 2) - 3 # # Distribute computation on the cluster # m2_biased, m3_biased, m4_biased, \ # m2_unbiased, m3_unbiased, m4_unbiased,\ # v_biased, s_biased, k_biased, \ # v_unbiased, s_unbiased, k_unbiased = dask.compute( # m2_biased, m3_biased, m4_biased, # m2_unbiased, m3_unbiased, m4_unbiased, # v_biased, s_biased, k_biased, # v_unbiased, s_unbiased, k_unbiased # ) # Rename variables and merge m2_biased = m2_biased.rename({'__xarray_dataarray_variable__': 'm2_biased'}) m3_biased = m3_biased.rename({'__xarray_dataarray_variable__': 'm3_biased'}) m4_biased = m4_biased.rename({'__xarray_dataarray_variable__': 'm4_biased'}) m2_unbiased = m2_unbiased.rename( {'__xarray_dataarray_variable__': 'm2_unbiased'}) m3_unbiased = m3_unbiased.rename( {'__xarray_dataarray_variable__': 'm3_unbiased'}) m4_unbiased = m4_unbiased.rename( {'__xarray_dataarray_variable__': 'm4_unbiased'}) v_biased = v_biased.rename({'__xarray_dataarray_variable__': 'v_biased'}) s_biased = s_biased.rename({'__xarray_dataarray_variable__': 's_biased'}) k_biased = k_biased.rename({'__xarray_dataarray_variable__': 'k_biased'}) v_unbiased = v_unbiased.rename( {'__xarray_dataarray_variable__': 'v_unbiased'}) s_unbiased = s_unbiased.rename( {'__xarray_dataarray_variable__': 's_unbiased'}) k_unbiased = k_unbiased.rename( {'__xarray_dataarray_variable__': 'k_unbiased'}) out = xr.merge([m2_biased, m3_biased, m4_biased, m2_unbiased, m3_unbiased, m4_unbiased, v_biased, s_biased, k_biased, v_unbiased, s_unbiased, k_unbiased]) # Distribute computation on the cluster out = out.compute() # Save output out.attrs = {'signal_field': signal_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift} os.makedirs(output_dir, exist_ok=True) out.to_netcdf(output_file) print( 'Finish {:s}. Time spent: {:.5f} minutes' .format(signal_file, (datetime.now() - start_time).total_seconds() / 60) )
def run(params): bin_width, filter_bandwidth, theta, shift, \ signal_field, noise_field, noise_multiplier = params # Get file path signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \ .format(noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field) signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(signal_dir, bin_width, filter_bandwidth, theta, shift, signal_field) noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(noise_dir, bin_width, filter_bandwidth, theta, shift, noise_field) output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \ .format(output_dir, noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field, noise_field) # Load data signal = xr.open_dataarray(signal_file) noise = xr.open_dataarray(noise_file) mask = xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc') for key, values in noise.coords.items(): signal.coords[key] = values mask.coords[key] = values signal, noise, mask = xr.align(signal, noise, mask) # Make observation signal = signal.where(mask == 1).stack(s=('x', 'y')) noise = noise.where(mask == 1).stack(s=('x', 'y')) * noise_multiplier obs = signal + noise # Get noise variance noise_var = moment(noise.values, moment=2, axis=-1, nan_policy='omit') # Get biased moments m2_biased = moment(obs.values, moment=2, axis=-1, nan_policy='omit') m3_biased = moment(obs.values, moment=3, axis=-1, nan_policy='omit') m4_biased = moment(obs.values, moment=4, axis=-1, nan_policy='omit') # Get unbiased moments m2_unbiased = m2_biased - noise_var m3_unbiased = m3_biased m4_unbiased = m4_biased - (6 * m2_unbiased * noise_var) - \ (3 * noise_var ** 2) # Get biased vsk v_biased = m2_biased s_biased = m3_biased / m2_biased ** (3 / 2) k_biased = (m4_biased / m2_biased ** 2) - 3 # Get unbiased vsk v_unbiased = m2_unbiased s_unbiased = m3_unbiased / m2_unbiased ** (3 / 2) k_unbiased = (m4_unbiased / m2_unbiased ** 2) - 3 # Save output out = xr.Dataset( {'m2_biased': (['f'], m2_biased), 'm3_biased': (['f'], m3_biased), 'm4_biased': (['f'], m4_biased), 'm2_unbiased': (['f'], m2_unbiased), 'm3_unbiased': (['f'], m3_unbiased), 'm4_unbiased': (['f'], m4_unbiased), 'v_biased': (['f'], v_biased), 's_biased': (['f'], s_biased), 'k_biased': (['f'], k_biased), 'v_unbiased': (['f'], v_unbiased), 's_unbiased': (['f'], s_unbiased), 'k_unbiased': (['f'], k_unbiased)}, coords={'f': noise.coords['f']}, attrs={ 'signal_field': signal_field, 'noise_field': noise_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift } ) os.makedirs(output_dir, exist_ok=True) out.to_netcdf(output_file) print( 'Finish. signal_file = {:s}. noise_file = {:s}. output_file = {:s}.' .format(signal_file, noise_file, output_file) )
def run(params, chunks=(1, 256, 256)): print("Calculating bin_width={:.2f} MHz, filter_bandwidth={:.2f} MHz," "theta={:.1f}, shift={:d}, signal_field={:d}, noise_field={:d}, " "noise_multiplier={:.3f}" .format(*params)) start_time = datetime.now() bin_width, filter_bandwidth, theta, shift, \ signal_field, noise_field, noise_multiplier = params # Get file path signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \ .format(noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field) signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(signal_dir, bin_width, filter_bandwidth, theta, shift, signal_field) noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(noise_dir, bin_width, filter_bandwidth, theta, shift, noise_field) output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \ .format(output_dir, noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field, noise_field) # Load data chunks_dict = {'f': chunks[0], 'y': chunks[1], 'x': chunks[2]} signal = xr.open_dataarray(signal_file, chunks=chunks_dict) noise = xr.open_dataarray(noise_file, chunks=chunks_dict) mask = xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc', chunks=chunks_dict) # Align coordinates - they must match for XArray broadcasting for key in ['x', 'y', 'f']: signal.coords[key] = noise.coords[key].values mask.coords[key] = noise.coords[key].values signal, noise, mask = xr.align(signal, noise, mask) # Make observation signal = signal.where(mask == 1).stack(s=('x', 'y')) noise = noise.where(mask == 1).stack(s=('x', 'y')) * noise_multiplier obs = signal + noise # Calculate noise variance noise_var = noise.var(dim='s') # Calculate biased moments m2_biased = xr_moment(obs, 's', order=2) m3_biased = xr_moment(obs, 's', order=3) m4_biased = xr_moment(obs, 's', order=4) # Calculate unbiased moments m2_unbiased = m2_biased - noise_var m3_unbiased = m3_biased m4_unbiased = m4_biased - (6 * m2_unbiased * noise_var) - \ (3 * noise_var ** 2) # Note: the second term in m4_unbiased is actually "m2_true", which we # estimate here with m2_unbiased # Calculate biased vsk v_biased = m2_biased s_biased = m3_biased / m2_biased ** (3 / 2) k_biased = (m4_biased / m2_biased ** 2) - 3 # Calculate unbiased vsk v_unbiased = m2_unbiased s_unbiased = m3_unbiased / m2_unbiased ** (3 / 2) k_unbiased = (m4_unbiased / m2_unbiased ** 2) - 3 # Rename variables and merge m2_biased.name = 'm2_biased' m3_biased.name = 'm3_biased' m4_biased.name = 'm4_biased' m2_unbiased.name = 'm2_unbiased' m3_unbiased.name = 'm3_unbiased' m4_unbiased.name = 'm4_unbiased' v_biased.name = 'v_biased' s_biased.name = 's_biased' k_biased.name = 'k_biased' v_unbiased.name = 'v_unbiased' s_unbiased.name = 's_unbiased' k_unbiased.name = 'k_unbiased' out = xr.merge([m2_biased, m3_biased, m4_biased, m2_unbiased, m3_unbiased, m4_unbiased, v_biased, s_biased, k_biased, v_unbiased, s_unbiased, k_unbiased]) # Distribute computation on the cluster out = out.compute() # Save output out.attrs = {'signal_field': signal_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift} os.makedirs(output_dir, exist_ok=True) out.to_netcdf(output_file) print( 'Finish {:s}. Time spent: {:.5f} minutes' .format(signal_file, (datetime.now() - start_time).total_seconds() / 60) )
def run(params): print(params) start_time = datetime.now() bin_width, filter_bandwidth, theta, shift, \ signal_field, noise_field, noise_multiplier = params # Get file path signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) output_dir = '/scratch/pkittiwi/fg1p/obs_map/obsn{:.1f}/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \ .format(noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field) signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(signal_dir, bin_width, filter_bandwidth, theta, shift, signal_field) noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(noise_dir, bin_width, filter_bandwidth, theta, shift, noise_field) output_file = '{:s}/obs_map_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \ .format(output_dir, noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field, noise_field) # Load data with xr.open_dataarray(signal_file) as ds: signal = ds.load() with xr.open_dataarray(noise_file) as ds: noise = ds.load() with xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc') as ds: mask = ds.load() # Align coordinates - they must match for XArray broadcasting for key in ['x', 'y', 'f']: signal.coords[key] = noise.coords[key].values mask.coords[key] = noise.coords[key].values signal, noise, mask = xr.align(signal, noise, mask) # Make observation signal = signal.where(mask == 1) noise = noise.where(mask == 1) * noise_multiplier obs = signal + noise obs.name = 'obs' obs.attrs = {'signal_field': signal_field, 'noise_field': noise_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift} # Calculate noise variance noise_var = noise.var(dim=['y', 'x']) noise_var.name = 'noise_var' noise_var.attrs = { 'noise_field': noise_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift } # Save output out = xr.merge([obs, noise_var]) os.makedirs(output_dir, exist_ok=True) out.to_netcdf(output_file) print('Finish {:s}. Time spent: {:.5f} minutes' .format(output_file, (datetime.now() - start_time).total_seconds() / 60)) return 0
def run(params): start_time = datetime.now() bin_width, filter_bandwidth, theta, shift, \ signal_field, noise_field, noise_multiplier = params # Get file paths signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \ .format(noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field) signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc'\ .format(signal_dir, bin_width, filter_bandwidth, theta, shift, signal_field) noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc'\ .format(noise_dir, bin_width, filter_bandwidth, theta, shift, noise_field) output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \ .format(output_dir, noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field, noise_field) mask_file = '/scratch/pkittiwi/fg1p/hera331_fov_mask.nc' obs_dir = '/scratch/pkittiwi/fg1p/obs_map/obsn{:.1f}/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \ .format(noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field) obs_file = '{:s}/obs_map_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \ .format(obs_dir, noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field, noise_field) # Load data to memory and align coordinates with xr.open_dataarray(signal_file) as da: signal = da.load() with xr.open_dataarray(noise_file) as da: noise = da.load() with xr.open_dataarray(mask_file) as da: mask = da.load() for key, values in noise.coords.items(): signal.coords[key] = values mask.coords[key] = values signal, noise, mask = xr.align(signal, noise, mask) # Make observation signal = signal.where(mask == 1) noise = noise.where(mask == 1) * noise_multiplier obs = signal + noise obs.name = 'obs' obs.attrs = {'signal_field': signal_field, 'noise_field': noise_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift} # Calculate noise variance noise_var = noise.var(dim=['y', 'x']) noise_var.name = 'noise_var' noise_var.attrs = { 'noise_field': noise_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift } # Save observation and noise_variance os.makedirs(obs_dir, exist_ok=True) obs = xr.merge([obs, noise_var]) obs.to_netcdf(obs_file) del signal del noise del mask # Calculate statistic out = get_stats(obs) out.attrs = {'signal_field': signal_field, 'noise_field': noise_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift} os.makedirs(output_dir, exist_ok=True) out.to_netcdf(output_file) out.close() print( 'Finish. signal_file = {:s}. noise_file = {:s}. output_file = {:s}.' 'Time spent {:.5f} sec.' .format(signal_file, noise_file, output_file, (datetime.now() - start_time).total_seconds()) )