def __init__(self, dataset_name, img_res=(512, 512), downsize_factor=(4, 4), local_path=None): self.dataset_name = dataset_name self.img_res = img_res self.downsize_factor = downsize_factor self.use_local_data = True if self.use_local_data: # load from test dataset assert local_path is not None #sst_dataset_path = local_path + "/{}.npz".format(self.dataset_name) self.sst_dataset_path = local_path else: #sst_dataset_path = os.path.join(SST_DATASETS_PATH, "{}.npz".format(dataset_name)) self.sst_dataset_path = SST_DATASETS_PATH if not os.path.exists(SST_DATASETS_PATH): try: os.makedirs(SST_DATASETS_PATH) except: print(SST_DATASETS_PATH + " created error") uris = [ 'gcs://pangeo-ocean-ml/LLC4320/SST.{id:010d}.zarr'.format( id=tstep) for tstep in range(0, 4088 + 1, 73) ][:] #uris = [f'gcs://pangeo-ocean-ml/LLC4320/SST.{tstep:010d}.zarr' for tstep in range(0, 4088+1, 73)][:10] dsets = [ xr.open_zarr(fsspec.get_mapper(uri), consolidated=True) for uri in uris ] ds = xr.combine_nested(dsets, 'timestep') print(ds) # to use ds.SST[0] to calculate nan value for all timestep num_nans = ds.SST[0].isnull().sum(dim=['x', 'y']).load() sst_valid = ds.SST.where(num_nans == 0, drop=True) print(sst_valid) sst_coarse = sst_valid.coarsen(x=self.downsize_factor[0], y=self.downsize_factor[1]).mean() print(sst_coarse) temp = sst_valid[0][0].load().values length = img_res[0] * img_res[1] + img_res[0] // downsize_factor[ 0] * img_res[1] // downsize_factor[1] print(length) hdf5_path = SST_DATASETS_PATH + "/output.hdf5" # 和普通文件操作类似,'w'、'r'、'a' 分别表示写数据、读数据、追加数据 hdf5_file = tables.open_file(hdf5_path, mode='w') # 设定压缩级别和压缩方法 filters = tables.Filters(complevel=5, complib='blosc') earray = hdf5_file.create_earray( hdf5_file.root, 'data', # 数据名称,之后需要通过它来访问数据 tables.Atom.from_dtype(temp.dtype), # 设定数据格式(和data1格式相同) shape=(0, length), # 第一维的 0 表示数据可沿行扩展 filters=filters, expectedrows=15000 # 完整数据大约规模,可以帮助程序提高时空利用效率 ) for timestep in range(sst_valid.shape[0]): for region in range(sst_valid.shape[1]): # hr.append(sst_valid[timestep, region].load().values) # lr.append(sst_coarse[timestep, region].load().values) hr = sst_valid[timestep, region].load().values lr = sst_coarse[timestep, region].load().values hr = hr.flatten() lr = lr.flatten() temp = np.append(hr, lr).reshape((1, -1)) print(temp.shape) earray.append(temp) print("got values!") hdf5_file.close() #np.savez(sst_dataset_path, name1=np.array(hr), name2=np.array(lr)) print("hdf5 successfully saved")
def readwrf(filein): """ This Fucntion read wrfout file and grabs varibels of internts and outputs as an xarray Parameters ---------- files: netcdf files, delacred file name to write zar Returns ------- ds_wrf: an xarray (zar) of wind speed (km h-1), temp (degC) & rh (%) """ ds_list = [] pathlist = sorted(Path(filein).glob('wrfout_d01*')) # print(pathlist) for path in pathlist: path_in_str = str(path) wrf_file = Dataset(path_in_str, 'r') slp = getvar(wrf_file, "slp") rh = getvar(wrf_file, "rh2") temp = getvar(wrf_file, "T2") wsp_wdir = g_uvmet.get_uvmet10_wspd_wdir(wrf_file, units='m s-1') rain_c = getvar(wrf_file, "RAINC") rain_sh = getvar(wrf_file, "RAINSH") rain_nc = getvar(wrf_file, "RAINNC") # cord = get_cartopy(rh) # lat,lon = latlon_coords(rh) var_list = [slp, rh, temp, wsp_wdir, rain_c, rain_sh, rain_nc] # var_list = [rh] ds = xr.merge(var_list) # cord_list.append(cord) # lat_list.append(lat) # lon_list.append(lon) ds_list.append(ds) ds_wrf = xr.combine_nested(ds_list, 'time') # cord = cord_list[0] # lat, lon = lat_list[0], lon_list[0] # out_dir = str(context.data_dir) # out_dir = Path(str(context.data_dir)+str('/xr/') + str('/') + str(ds_name) + str(f".zarr")) # out_dir.mkdir(parents=True, exist_ok=True) # # now = datetime.now() # current date and time # # folder_date = now.strftime("%Y%m%d") # # file_date = now.strftime("%Y%m%d_%H") # # print("date and time:",file_date) # ## Write and save DataArray (.zarr) file # # full_dir = str(out_dir) + str('/') + str(ds_name) + str(f".zarr") # ds_wrf.compute() # ds_wrf.to_zarr(out_dir, "w") # print(f"wrote {out_dir}") return ds_wrf
def test_nested_concat_too_many_dims_at_once(self): objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})] with pytest.raises(ValueError, match="not equal across datasets"): combine_nested(objs, concat_dim="x", coords="minimal")
def test_combine_coords_join_exact(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] with pytest.raises(ValueError, match=r"indexes along dimension"): combine_nested(objs, concat_dim="x", join="exact")
def open_mf_wrf_dataset(paths, chunks=None, compat='no_conflicts', lock=None, preprocess=None): """Open multiple WRF files as a single WRF dataset. Requires dask to be installed. Note that if your files are sliced by time, certain diagnostic variable computed out of accumulated variables (e.g. PRCP) won't be available, because not computable lazily. This code is adapted from xarray's open_mfdataset function. The xarray license is reproduced in the salem/licenses directory. Parameters ---------- paths : str or sequence Either a string glob in the form `path/to/my/files/*.nc` or an explicit list of files to open. chunks : int or dict, optional Dictionary with keys given by dimension names and values given by chunk sizes. In general, these should divide the dimensions of each dataset. If int, chunk each dimension by ``chunks`` . By default, chunks will be chosen to load entire input files into memory at once. This has a major impact on performance: please see xarray's full documentation for more details. compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional String indicating how to compare variables of the same name for potential conflicts when merging: - 'broadcast_equals': all values must be equal when variables are broadcast against each other to ensure common dimensions. - 'equals': all values and dimensions must be the same. - 'identical': all values, dimensions and attributes must be the same. - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. preprocess : callable, optional If provided, call this function on each dataset prior to concatenation. lock : False, True or threading.Lock, optional This argument is passed on to :py:func:`dask.array.from_array`. By default, a per-variable lock is used when reading data from netCDF files with the netcdf4 and h5netcdf engines to avoid issues with concurrent access when using dask's multithreaded backend. Returns ------- xarray.Dataset """ if isinstance(paths, basestring): paths = sorted(glob(paths)) if not paths: raise IOError('no files to open') # TODO: current workaround to dask thread problems import dask dask.config.set(scheduler='single-threaded') if lock is None: lock = NETCDF4_PYTHON_LOCK datasets = [ open_wrf_dataset(p, chunks=chunks or {}, lock=lock) for p in paths ] file_objs = [ds._file_obj for ds in datasets] if preprocess is not None: datasets = [preprocess(ds) for ds in datasets] try: combined = xr.combine_nested(datasets, concat_dim='time', compat=compat) except AttributeError: combined = xr.auto_combine(datasets, concat_dim='time', compat=compat) combined._file_obj = _MultiFileCloser(file_objs) combined.attrs = datasets[0].attrs # drop accumulated vars if needed (TODO: make this not hard coded) vns = ['PRCP', 'PRCP_C', 'PRCP_NC'] vns = [vn for vn in vns if vn in combined.variables] try: combined = combined.drop_vars(vns) except AttributeError: combined = combined.drop(vns) return combined
def time_combine_nested(self): datasets = [[self.dsA0, self.dsA1], [self.dsB0, self.dsB1]] xr.combine_nested(datasets, concat_dim=[None, "T"])
def test_combine_echodata(raw_datasets): ( files, sonar_model, xml_file, concat_dims, concat_data_vars, ) = raw_datasets eds = [echopype.open_raw(file, sonar_model, xml_file) for file in files] combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore for group_name in combined.group_map: if group_name in ("top", "sonar", "provenance"): continue combined_group: xr.Dataset = getattr(combined, group_name) eds_groups = [ getattr(ed, group_name) for ed in eds if getattr(ed, group_name) is not None ] def union_attrs(datasets: List[xr.Dataset]) -> Dict[str, Any]: """ Merges attrs from a list of datasets. Prioritizes keys from later datasets. """ total_attrs = {} for ds in datasets: total_attrs.update(ds.attrs) return total_attrs test_ds = xr.combine_nested( eds_groups, [concat_dims.get(group_name, concat_dims["default"])], data_vars=concat_data_vars.get(group_name, concat_data_vars["default"]), coords="minimal", combine_attrs="drop", ) test_ds.attrs.update(union_attrs(eds_groups)) test_ds = test_ds.drop_dims( [ "concat_dim", "old_ping_time", "ping_time", "old_time1", "time1", "old_time2", "time2", ], errors="ignore", ).drop_dims([f"{group}_attrs" for group in combined.group_map], errors="ignore") assert combined_group is None or test_ds.identical( combined_group.drop_dims( [ "old_ping_time", "ping_time", "old_time1", "time1", "old_time2", "time2", ], errors="ignore", ))
def make_cherab_image(self): """ run cherab to generate the synthetic spectral cube :return: """ if self.radiance is not NotImplemented: self.radiance.close() if self.spectral_radiance is not NotImplemented: self.spectral_radiance.close() import_mastu_mesh(self.world, ) # first, define camera, calculate view vectors and calculate ray lengths pipeline_spectral = SpectralPowerPipeline2D() pipeline_spectral_rad = SpectralRadiancePipeline2D() pipelines = [pipeline_spectral, pipeline_spectral_rad, ] camera = PinholeCamera(self.sensor_format_ds, fov=self.fov, pipelines=pipelines, parent=self.world) # orient and position the camera init_view_vector, init_up_vector = Vector3D(0, 0, 1), Vector3D(0, 1, 0) axle_1 = init_view_vector.cross(self.view_vector) angle = init_view_vector.angle(self.view_vector) t_1 = rotate_vector(angle, axle_1) final_up_vector = rotate_vector(-90, axle_1) * self.view_vector intermediate_up_vector = t_1 * init_up_vector angle_between = intermediate_up_vector.angle(final_up_vector) t_2 = rotate_vector(-angle_between, self.view_vector) camera.transform = translate(self.pupil_point[0], self.pupil_point[1], self.pupil_point[2], ) * t_2 * t_1 vector_xyz = np.arange(3) vector_xyz = xr.DataArray(vector_xyz, coords=(vector_xyz, ), dims=('vector_xyz',), name='vector_xyz', ) # calculating the pixel view directions view_vectors = xr.combine_nested( [xr.zeros_like(self.x_pixel_ds + self.y_pixel_ds) + self.view_vector[i] for i in [0, 1, 2, ]], concat_dim=(vector_xyz,), ) view_vectors = view_vectors.rename('view_vectors') def v3d2da(v3d): """ raysect Vector3D to xarray DataArray :param v3d: :return: """ da = np.array([v3d.x, v3d.y, v3d.z, ]) da = xr.DataArray(da, coords=(np.arange(3),), dims=('vector_xyz',), ) return da # basis unit vectors defining camera view -- v_z is forward and v_y is up v_y = final_up_vector.normalise() v_x = self.view_vector.cross(v_y).normalise() v_z = self.view_vector.normalise() v_x, v_y, v_z = [v3d2da(i) for i in [v_x, v_y, v_z, ]] # FOV defines the widest view, with pixels defined as square. sensor_aspect = self.sensor_format[1] / self.sensor_format[0] if sensor_aspect > 1: fov_v = self.fov fov_h = self.fov / sensor_aspect elif sensor_aspect == 1: fov_v = fov_h = self.fov elif sensor_aspect < 1: fov_h = self.fov fov_v = self.fov * sensor_aspect else: raise Exception() pixel_projection = 2 * np.tan(fov_h * np.pi / 360) / self.sensor_format[0] view_vectors = view_vectors + (v_x * (self.x_pixel_ds - self.sensor_format[0] / 2 + 0.5) * pixel_projection) + \ (v_y * (self.y_pixel_ds - self.sensor_format[1] / 2 + 0.5) * pixel_projection) if self.verbose: print('--status: calculating ray lengths') # TODO there has to be a better way of doing this?! ray_lengths = xr.DataArray(np.zeros(self.sensor_format_ds), dims=('x', 'y', ), coords=(self.x_ds, self.y_ds, )) for idx_x, x_pixel in enumerate(self.x_pixel_ds.values): if self.verbose and idx_x % 10 == 0: print('x =', str(x_pixel)) for idx_y, y_pixel in enumerate(self.y_pixel_ds.values): direction = Vector3D(*list(view_vectors.isel(x=idx_x, y=idx_y, ).values)) intersections = [] for p in self.world.primitives: intersection = p.hit(CoreRay(self.pupil_point, direction, )) if intersection is not None: intersections.append(intersection) # find the intersection corresponding to the shortest ray length no_intersections = len(intersections) if no_intersections == 0: ray_lengths.values[idx_x, idx_y] = 3 else: ray_lengths.values[idx_x, idx_y] = min([i.ray_distance for i in intersections if i.primitive.name != 'Plasma Geometry']) camera.spectral_bins = 40 camera.pixel_samples = 10 camera.min_wavelength = self.wl_min_nm camera.max_wavelength = self.wl_max_nm camera.quiet = not self.verbose camera.observe() # output to netCDF via xarray wl = pipeline_spectral.wavelengths wl = xr.DataArray(wl, coords=(wl, ), dims=('wavelength', )) * 1e-9 # ( m ) spec_power_ds = pipeline_spectral.frame.mean * 1e9 # converting units from (W/nm) --> (W/m) spec_radiance_ds = pipeline_spectral_rad.frame.mean * 1e9 coords = (self.x_ds, self.y_ds, wl, ) dims = ('x', 'y', 'wavelength', ) name = 'spec_power' attrs = {'units': 'W/m^2/str/m'} spec_power_ds = xr.DataArray(np.flip(spec_power_ds, axis=1), coords=coords, dims=dims, name=name, attrs=attrs, ) spec_radiance_ds = xr.DataArray(np.flip(spec_radiance_ds, axis=1, ), coords=coords, dims=dims, name=name, attrs=attrs, ) # calculate the centre-of-mass wavelength radiance_ds = spec_power_ds.integrate(dim='wavelength').assign_attrs({'units': 'W/m^2/str', }) ds_ds = xr.Dataset({'spectral_radiance_ds': spec_radiance_ds, 'radiance_ds': radiance_ds, 'view_vectors_ds': view_vectors, 'ray_lengths_ds': ray_lengths }) x_p_y = self.x + self.y spec_power = spec_power_ds.interp_like(x_p_y) / self.cherab_down_sample # to conserve power ds = xr.Dataset({'spectral_radiance': spec_power, }) ds_ds.to_netcdf(self.fpath_ds, mode='w', ) ds.to_netcdf(self.fpath, mode='w', )
def mfpad(dataIn, thres = 1e-2, inds = {'Type':'L','it':1}, res = 50, R = None, p = 0): """ Parameters ---------- dataIn : Xarray Contains set(s) of matrix elements to use, as output by epsproc.readMatEle(). thres : float, optional, default 1e-2 Threshold value for matrix elements to use in calculation. ind : dictionary, optional. Used for sub-selection of matrix elements from Xarrays. Default set for length gauage, single it component only, inds = {'Type':'L','it':'1'}. res : int, optional, default 50 Resolution for output (theta,phi) grids. R : list of Euler angles or quaternions, optional. Define LF > MF polarization geometry/rotations. For default case (R = None), 3 geometries are calculated, corresponding to z-pol, x-pol and y-pol cases. Defined by Euler angles (p,t,c) = [0 0 0] for z-pol, [0 pi/2 0] for x-pol, [pi/2 pi/2 0] for y-pol. p : int, optional. Defines LF polarization state, p = -1...1, default p = 0 (linearly pol light along z-axis). TODO: add summation over p for multiple pol states in LF. Returns ------- Ta Xarray (theta, phi, E, Sym) of MFPADs, summed over (l,m) Tlm Xarray (theta, phi, E, Sym, lm) of MFPAD components, expanded over all (l,m) """ # Define reduced data from selection over all data daRed = matEleSelector(dataIn, thres = 1e-2, inds = inds) # Generate spherical harmonics Lmax = daRed.l.max() YlmX = sphCalc(Lmax, res = res) # Reindex to match data (should happen automagically, but not always!) # YlmXre = YlmX.reindex_like(daRed) # Set rotation angles for LF > MF if R is None: # Set (x,y,z) projection terms only # Nangs = 10 # pRot = np.linspace(0,180,Nangs) # tRot = np.linspace(0,90,Nangs) # cRot = np.linspace(0,180,Nangs) # eAngs = np.array([pRot, tRot, cRot,])*np.pi/180 # Convert to quaternions # R = quaternion.from_euler_angles(pRot*np.pi/180, tRot*np.pi/180, cRot*np.pi/180) # Eugler angles for rotation of LF->MF, set as [0 0 0] for z-pol, [0 pi/2 0] for x-pol, [pi/2 pi/2 0] for y-pol pRot = [0, 0, np.pi/2] tRot = [0, np.pi/2, np.pi/2] cRot = [0, 0, 0] eAngs = np.array([pRot, tRot, cRot]) # List form to use later Euler = pd.MultiIndex.from_arrays(eAngs, names = ['P','T','C']) # Convert to quaternions R = quaternion.from_euler_angles(pRot, tRot, cRot) #**************** Calculate MFPADs Tlm = [] Ta = [] # Loop over pol geoms R for n, Rcalc in enumerate(R): T = [] # Loop over mu terms and multiply for mu in np.arange(-1,2): # Set by element replacement (preserves whole structure) # daTemp = daRed.copy() # Set explicit copy for rotation. # daTemp.loc[{'mu':mu}].values = daTemp.loc[{'mu':mu}].values * sf.Wigner_D_element(Rcalc, 1, mu, 0).conj() # Issues with reindexing to extra coords at the moment, so reindex and multiply for specific mu only # daTemp = daTemp.sel({'mu':mu}) # YlmXre = YlmX.reindex_like(daTemp) # T.append(YlmXre.conj() * daTemp) # Output full (l,m,mu) expansion # Set by looping and selection daTemp = daRed.sel({'mu':mu}) * sf.Wigner_D_element(Rcalc, 1, mu, 0).conj() YlmXre = YlmX.reindex_like(daTemp) T.append(YlmXre.conj() * daTemp) # Output full (l,m,mu) expansion # Concat & sum over symmetries Ts = xr.combine_nested([T[0], T[1], T[2]], concat_dim=['LM']) # Add dims - currently set for Euler angles only. # Can't seem to add mutiindex as a single element, so set dummy coord here and replace below. Ts = Ts.expand_dims({'Euler':[n]}) # Set as index # Ts = Ts.expand_dims({'p':[eAngs[0,n]], 't':[eAngs[1,n]], 'c':[eAngs[2,n]]}) Tlm.append(Ts) Ta.append(Ts.sum(dim = 'LM')) TlmX = xr.combine_nested(Tlm, concat_dim=['Euler']) TaX = xr.combine_nested(Ta, concat_dim=['Euler']) # Assign Euler angles to dummy dim TlmX = TlmX.assign_coords(Euler = Euler) TaX = TaX.assign_coords(Euler = Euler) return TaX, TlmX # , Ta, Tlm # For debug also return lists
# # open each zarr file as an xarray dataset # correcting the timestamp # for ds_zarr in zarr_in: ds = xr.open_zarr(str(ds_zarr)) # # wrote incorrect times in original files, fix here # ds['time'] = the_time zarr_list.append(ds) the_time += 60 # # make a virtual dataset with time as the outer dimension # zarr_time_ds = xr.combine_nested(zarr_list, 'time') # # compute the mean and perturbation for timestep 0 and # write out as a new zarr file # time_step = 0 print(f"finding perturbation for {zarr_in[time_step]}") temp = zarr_time_ds['TABS'] wvel = zarr_time_ds['W'] tr01 = zarr_time_ds['TR01'] mean_temp = temp[time_step, :, :, :].mean(dim=('x', 'y')) mean_w = wvel[time_step, :, :, :].mean(dim=('x', 'y')) mean_tr = tr01[time_step, :, :, :].mean(dim=('x', 'y')) w_prime = wvel - mean_w T_prime = temp - mean_temp tr_prime = tr01 - mean_tr
def test_combine_nested_but_need_auto_combine(self): objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2], "wall": [0]})] with raises_regex(ValueError, "cannot be combined"): combine_nested(objs, concat_dim="x")
def main(src_dir): src_dir = os.path.abspath(src_dir) tif_file_list = glob.glob(os.path.join(src_dir, "*.tif")) logger.info(f"found {len(tif_file_list)} file(s) to process") # test read array = imageio.volread(tif_file_list[0]) shape, dtype = array.shape, array.dtype del array logger.info(f"array info {shape}, {dtype}") # coordinate list csv_file_list = glob.glob(os.path.join(src_dir, "*.csv")) columns = { "grid_x": int, "grid_y": int, "grid_z": int, "coord_x": float, "coord_y": float, "coord_z": float, } coords = pd.read_csv( csv_file_list[0], skiprows=6, usecols=list(range(3, 9)), names=columns.keys(), dtype=columns, ) @delayed def volread_np(uri): return np.array(imageio.volread(uri)) def volread_da(uri): return da.from_delayed(volread_np(uri), shape, dtype) subsets = [] for src_path, coord in zip(tif_file_list, coords.itertuples(index=False)): array = volread_da(src_path) coord = coord._asdict() array = xr.DataArray( array, name="raw", dims=["z", "y", "x"], coords={k: v for k, v in coord.items()}, ) # attach tile coordinate array = array.expand_dims("tile") array = array.assign_coords( {k: ("tile", [v]) for k, v in coord.items()}) # convert to datasets subset = array.to_dataset() subsets.append(subset) dataset = xr.combine_nested(subsets, concat_dim="tile") print(dataset) """ compressor = zarr.Blosc(cname="lz4", clevel=5, shuffle=zarr.blosc.SHUFFLE) dataset.to_zarr( "_demo_converted.zarr", encoding={"raw": {"compressor": compressor}} ) """ # generate pyramids r = 1 for _ in range(3): r *= 2 sampler = (slice(None, None, r), ) * 2 + (slice(None), ) sampler = {k: s for k, s in zip("xyz", sampler)} dataset[f"bin{r}"] = dataset["raw"][sampler] dataset["mip_xy"] = dataset["raw"].max("z") mip_dataset = dataset["mip_xy"][dataset.grid_z == 0] tasks = [] counter = 1 for iy, image_xy in mip_dataset.groupby("grid_y"): for ix, image in image_xy.groupby("grid_x"): image = image.squeeze() fname = f"tile{counter:03d}_x{ix:03d}_y{iy:03d}.tif" counter += 1 tasks.append((fname, image)) def imwrite(uri, image): imageio.imwrite(uri, image) print(uri) fname, image = zip(*tasks) batch_submit(imwrite, fname, image)
print (Species_1) #ammonia sufrace layer GC_surface_ammonia = [data['SpeciesConc_NH3'].isel(time=0,lev=0) for data in Species_1] print (GC_surface_ammonia) os.chdir("/data/uptrop/Projects/DEFRA-NH3/GC/geosfp_eu_naei_iccw/StateMet/2016/") StateMet = sorted(glob.glob("GEOSChem.StateMet*.nc4")) print (len(StateMet)) StateMet = StateMet[:] StateMet_1 = [xr.open_dataset(file) for file in StateMet] #print ((StateMet_1[0])) combined = xr.combine_nested(StateMet_1, concat_dim=("time")) print (combined.indexes) #monthly mean StateMet_2=combined.groupby('time.month').mean() #print (len(StateMet_2)) print (StateMet_2) #StateMet_3 = list(StateMet_2.groupby('time')) StateMet_3 = list(StateMet_2.groupby("month", squeeze=False)) print (StateMet_3) # convert unit for ammonia (dry mol/mol to ug/m3) surface_AIRDEN = [data['Met_AIRDEN'].isel(time=0,lev=0) for data in StateMet_3] #kg/m3 surface_AIRNUMDEN_a = np.asarray(surface_AIRDEN)/MW_AIR #mol/m3 surface_AIRNUMDEN_b = surface_AIRNUMDEN_a*AVOGADRO # unit molec air/m3 surface_AIRNUMDEN = surface_AIRNUMDEN_b/1e6 #unit molec air/cm3
def test_manual_combine_but_need_auto_combine(self): objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2], 'wall': [0]})] with raises_regex(ValueError, 'cannot be combined'): combine_nested(objs, concat_dim='x')
if session.algorithm == 'NDVI': V_INDEX_d = [dask.delayed(f.NDVI) for f in AOI_d] elif session.algorithm == 'EVI': V_INDEX_d = [dask.delayed(EVI)(f) for f in AOI_d] combined = zip(V_INDEX_d, HSV_d) GVI_index = [ dask.delayed(GVI)(i, **{ 'limits': session.limits, 'algorithm': session.algorithm }) for i in combined ] datasets = dask.compute(GVI_index) # get a list of xarray.Datasets da_vegetation = xr.combine_nested(datasets[0], concat_dim=['time']) da_vegetation = da_vegetation.chunk({ 'time': -1, 'latitude': 1000, 'longitude': 1000 }) GVDM = xr.apply_ufunc( decades, da_vegetation, input_core_dims=[['time']], exclude_dims={ 'time', }, dask='parallelized', dask_gufunc_kwargs={'allow_rechunk': True},
def test_combine_coords_join_exact(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] with pytest.raises(ValueError, match=r"cannot align.*join.*exact.*"): combine_nested(objs, concat_dim="x", join="exact")
def _load(self, filenames=None): ''' Load the set of files into a single XArray structure. ''' all_leads = [] if filenames is None else [self.contents] filenames = self.filenames if filenames is None else filenames # 0h and 1h accumulated forecast variables are named differently than # the rest of the forecast hours. Rename those accumulated variables if # needed. for fcst_type in ['01fcst', 'free_fcst']: if filenames.get(fcst_type): for filename in filenames.get(fcst_type): print(f'Loading grib2 file: {fcst_type}, {filename}') # Rename variables to match free forecast variables dataset = xr.open_mfdataset( filenames[fcst_type], **self.open_kwargs, ) renaming = self.free_fcst_names(dataset, fcst_type) if renaming and self.model not in ['hrrre', 'rrfse']: print(f'RENAMING VARIABLES:') for old_name, new_name in renaming.items(): print(f' {old_name:>30s} -> {new_name}') dataset = dataset.rename_vars(renaming) if len(all_leads) == 1: # Check that specific variables exist in the xarray that is # already loaded (presumably 0hr), and add them if they # don't. This implementation is relying on pointers to # update "in place" og_ds = all_leads[0] bad_vars = [ 'APCP_P8_L1_{grid}_acc', 'ACPCP_P8_L1_{grid}_acc', 'FROZR_P8_L1_{grid}_acc', 'NCPCP_P8_L1_{grid}_acc', 'WEASD_P8_L1_{grid}_acc', ] bad_vars = [v.format(grid=self.grid_suffix) for v in \ bad_vars] for bad_var in bad_vars: # Check to see if the bad variable is in the current # dataset and NOT in the original dataset. if bad_var not in og_ds.variables and \ dataset.get(bad_var) is not None: print(f'Adding {bad_var} to og ds') # Duplicate the accumulated variable with the # required name og_ds[bad_var] = og_ds.get(f'{bad_var}1h') all_leads.append(dataset) ret = xr.combine_nested( all_leads, compat='override', concat_dim=list(self.coord_dims.keys())[0], coords='minimal', data_vars='all', ) return ret
def truncate(ds: xr.Dataset, max_sizes: Mapping[Hashable, int]) -> xr.Dataset: """Truncate a dataset along two dimensions into a form suitable for display. Truncation involves taking four rectangles from each corner of the dataset array (or arrays) and combining them into a smaller dataset array (or arrays). Parameters ---------- ds The dataset to be truncated. max_sizes : Mapping[Hashable, int] A dict with keys matching dimensions and integer values indicating the maximum size of the dimension after truncation. Returns ------- Dataset A truncated dataset. Warnings -------- A maximum size of `n` may result in the array having size `n + 2` (and not `n`). The reason for this is so that pandas can be used to display the array as a table, and correctly truncate rows or columns (shown as ellipses ...). """ if len(max_sizes) != 2: raise ValueError("Truncation is only supported for two dimensions") dims = list(max_sizes.keys()) max_dim = max_sizes[dims[0]], max_sizes[dims[1]] n_dim = ds.sizes[dims[0]], ds.sizes[dims[1]] if n_dim[0] <= max_dim[0] + 2 and n_dim[1] <= max_dim[1] + 2: # No truncation required return ds if n_dim[0] <= max_dim[0] + 1: # Truncate dim1 only m_dim = n_dim[0], max_dim[1] // 2 + 1 rows = [[(0, 0), (0, m_dim[1])]] elif n_dim[1] <= max_dim[1] + 1: # Truncate dim0 only m_dim = max_dim[0] // 2 + 1, n_dim[1] rows = [[(0, 0)], [(m_dim[0], 0)]] else: # Truncate both dimensions m_dim = max_dim[0] // 2 + 1, max_dim[1] // 2 + 1 rows = [[(0, 0), (0, m_dim[1])], [(m_dim[0], 0), (m_dim[0], m_dim[1])]] limits = {dims[0]: m_dim[0], dims[1]: m_dim[1]} slices = {k: slice(v) for k, v in limits.items()} ds_abbr: xr.Dataset = xr.combine_nested( # type: ignore[no-untyped-call] [ [ # Roll all of these simultaneously along with any indexes/coords # and then clip them using the same slice for each corner ds.roll(dict(zip(limits, roll)), roll_coords=True).isel(**slices) for roll in row ] for row in rows ], concat_dim=limits.keys(), ) assert ds_abbr.sizes[dims[0]] <= max_dim[0] + 2 assert ds_abbr.sizes[dims[1]] <= max_dim[1] + 2 return ds_abbr
def combine_echodata(echodatas: List[EchoData], combine_attrs="override") -> EchoData: """ Combines multiple `EchoData` objects into a single `EchoData` object. Parameters ---------- echodatas: List[EchoData] The list of `EchoData` objects to be combined. combine_attrs: { "override", "drop", "identical", "no_conflicts", "overwrite_conflicts" } String indicating how to combine attrs of the `EchoData` objects being merged. This parameter matches the identically named xarray parameter (see https://xarray.pydata.org/en/latest/generated/xarray.combine_nested.html) with the exception of the "overwrite_conflicts" value. * "override": Default. skip comparing and copy attrs from the first `EchoData` object to the result. * "drop": empty attrs on returned `EchoData` object. * "identical": all attrs must be the same on every object. * "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. * "overwrite_conflicts": attrs from all `EchoData` objects are combined, attrs with conflicting keys will be overwritten by later `EchoData` objects. Returns ------- EchoData An `EchoData` object with all of the data from the input `EchoData` objects combined. Raises ------ ValueError If `echodatas` contains `EchoData` objects with different or `None` `sonar_model` values (i.e., all `EchoData` objects must have the same non-None `sonar_model` value). ValueError If EchoData objects have conflicting source file names. Warns ----- UserWarning If the `sonar_model` of the input `EchoData` objects is `"EK60"` and any `EchoData` objects have non-monotonically increasing `ping_time`, `time1` or `time2` values, the corresponding values in the output `EchoData` object will be increased starting at the timestamp where the reversal occurs such that all values in the output are monotonically increasing. Additionally, the original `ping_time`, `time1` or `time2` values will be stored in the `Provenance` group, although this behavior may change in future versions. Warnings -------- Changes in parameters between `EchoData` objects are not currently checked; however, they may raise an error in future versions. Notes ----- * `EchoData` objects are combined by combining their groups individually. * Attributes from all groups before the combination will be stored in the provenance group, although this behavior may change in future versions. * The `source_file` and `converted_raw_path` attributes will be copied from the first `EchoData` object in the given list, but this may change in future versions. Examples -------- >>> ed1 = echopype.open_converted("file1.nc") >>> ed2 = echopype.open_converted("file2.zarr") >>> combined = echopype.combine_echodata([ed1, ed2]) """ tree_dict = {} result = EchoData() if len(echodatas) == 0: return result result.source_file = echodatas[0].source_file result.converted_raw_path = echodatas[0].converted_raw_path sonar_model = None for echodata in echodatas: if echodata.sonar_model is None: raise ValueError( "all EchoData objects must have non-None sonar_model values") elif sonar_model is None: sonar_model = echodata.sonar_model elif echodata.sonar_model != sonar_model: raise ValueError( "all EchoData objects must have the same sonar_model value") # ping time before reversal correction old_ping_time = None # ping time after reversal correction new_ping_time = None # location time before reversal correction old_time1 = None # location time after reversal correction new_time1 = None # mru time before reversal correction old_time2 = None # mru time after reversal correction new_time2 = None # time3 before reversal correction old_time3 = None # time3 after reversal correction new_time3 = None # all attributes before combination # { group1: [echodata1 attrs, echodata2 attrs, ...], ... } old_attrs: Dict[str, List[Dict[str, Any]]] = dict() for group, value in EchoData.group_map.items(): group_datasets = [ getattr(echodata, group) for echodata in echodatas if getattr(echodata, group) is not None ] if group in ("top", "sonar"): combined_group = getattr(echodatas[0], group) elif group == "provenance": combined_group = assemble_combined_provenance([ echodata.source_file if echodata.source_file is not None else echodata.converted_raw_path for echodata in echodatas ]) else: if len(group_datasets) == 0: setattr(result, group, None) continue concat_dim = SONAR_MODELS[sonar_model]["concat_dims"].get( group, SONAR_MODELS[sonar_model]["concat_dims"]["default"]) concat_data_vars = SONAR_MODELS[sonar_model][ "concat_data_vars"].get( group, SONAR_MODELS[sonar_model]["concat_data_vars"]["default"]) combined_group = xr.combine_nested( group_datasets, [concat_dim], data_vars=concat_data_vars, coords="minimal", combine_attrs="drop" if combine_attrs == "overwrite_conflicts" else combine_attrs, ) if combine_attrs == "overwrite_conflicts": combined_group.attrs.update(union_attrs(group_datasets)) if group == "beam": if sonar_model == "EK80": combined_group[ "transceiver_software_version"] = combined_group[ "transceiver_software_version"].astype("<U10") combined_group["channel"] = combined_group[ "channel"].astype("<U50") elif sonar_model == "EK60": combined_group["gpt_software_version"] = combined_group[ "gpt_software_version"].astype("<U10") # TODO: investigate further why we need to do .astype("<U50") combined_group["channel"] = combined_group[ "channel"].astype("<U50") if sonar_model != "AD2CP": combined_group, old_ping_time, new_ping_time = check_and_correct_reversed_time( combined_group, old_ping_time, new_ping_time, "ping_time", sonar_model) if group != "nmea": combined_group, old_time1, new_time1 = check_and_correct_reversed_time( combined_group, old_time1, new_time1, "time1", sonar_model) combined_group, old_time2, new_time2 = check_and_correct_reversed_time( combined_group, old_time2, new_time2, "time2", sonar_model) combined_group, old_time3, new_time3 = check_and_correct_reversed_time( combined_group, old_time3, new_time3, "time3", sonar_model) if len(group_datasets) > 1: old_attrs[group] = [ group_dataset.attrs for group_dataset in group_datasets ] if combined_group is not None: # xarray inserts this dimension when concatenating along multiple dimensions combined_group = combined_group.drop_dims("concat_dim", errors="ignore") combined_group = set_encodings(combined_group) if value["ep_group"] is None: tree_dict["root"] = combined_group else: tree_dict[value["ep_group"]] = combined_group # Set tree into echodata object result._set_tree(tree=DataTree.from_dict(tree_dict)) result._load_tree() # save ping time before reversal correction if old_ping_time is not None: result.provenance["old_ping_time"] = old_ping_time result.provenance.attrs["reversed_ping_times"] = 1 # save location time before reversal correction if old_time1 is not None: result.provenance["old_time1"] = old_time1 result.provenance.attrs["reversed_ping_times"] = 1 # save mru time before reversal correction if old_time2 is not None: result.provenance["old_time2"] = old_time2 result.provenance.attrs["reversed_ping_times"] = 1 # save time3 before reversal correction if old_time3 is not None: result.provenance["old_time3"] = old_time3 result.provenance.attrs["reversed_ping_times"] = 1 # TODO: possible parameter to disable original attributes and original ping_time storage # in provenance group? # save attrs from before combination for group in old_attrs: all_group_attrs = set() for group_attrs in old_attrs[group]: for attr in group_attrs: all_group_attrs.add(attr) echodata_filenames = [] for ed in echodatas: if ed.source_file is not None: filepath = ed.source_file elif ed.converted_raw_path is not None: filepath = ed.converted_raw_path else: # unreachable raise ValueError("EchoData object does not have a file path") filename = Path(filepath).name if filename in echodata_filenames: raise ValueError("EchoData objects have conflicting filenames") echodata_filenames.append(filename) attrs = xr.DataArray( [[group_attrs.get(attr) for attr in all_group_attrs] for group_attrs in old_attrs[group]], coords={ "echodata_filename": echodata_filenames, f"{group}_attr_key": list(all_group_attrs), }, dims=["echodata_filename", f"{group}_attr_key"], ) result.provenance = result.provenance.assign({f"{group}_attrs": attrs}) # Add back sonar model result.sonar_model = sonar_model return result
def get_averaged_ms(ms_name, tbin=None, cbin=None, chunks=None, taql_where='', columns=None, chan=None, corr=None, data_col=None, group_cols=None, iter_axis=None): """ Performs MS averaging. Before averaging is performed, data selections is already performed during the MS acquisition process. TAQL (if available) is used to perform selections for FIELD, SPW/DDID & SCAN. This is the first round of selection. The second round involves selections over channels and correlations. This is done via a slicer. With the exception of corr selectino, all the other selections are done before averaging. This is done because the averager requires 3-dimensional data. MS is then grouped by DDID, FIELD_ID & SCAN_NUMBER and fed into :meth:`average_main` which actually performs the averaging. This function returns to :meth:`ragavi.visibilities.get_ms` and is therefore grouped and column select similarly Parameters ---------- ms_name : :obj:`str` Name of the input MS tbin : :obj:`float` Time bin in seconds cbin : :obj:`int` Number of channels to bin together chunks : :obj:`dict` Size of resulting MS chunks. taql_where: :obj:`str` TAQL clause to pass to xarrayms columns: :obj:`list` Columns to be present in the data chan : :obj:`slicer` A slicer to select the channels to be present in the dataset corr : :obj:`slicer` or :obj:`int` Correlation index of slice to be present in the dataset data_col : :obj:`str` Column containing data to be used group_cols: :obj:`list` List containing columns by which to group the data iter_axis: :obj:`str` Axis over which iteration is done Returns ------- x_dataset: :obj:`list` List of :obj:`xarray.Dataset` containing averaged MS. The MSs are split by Spectral windows and grouped depending on the type of plots. """ if chunks is None: chunks = dict(row=10000) # these are the defaults in averager if tbin is None: tbin = 1 if cbin is None: cbin = 1 # ensure that these are in the selected columns for _ in [ "TIME", "ANTENNA1", "ANTENNA2", "INTERVAL", "FLAG", "FLAG_ROW", data_col ]: if _ not in columns: columns.append(_) # must be grouped this way because of time averaging ms_obj = xm.xds_from_ms( ms_name, group_cols=["DATA_DESC_ID", "FIELD_ID", "SCAN_NUMBER"], columns=columns, taql_where=taql_where) # some channels have been selected # corr selection is performed after averaging!! if chan is not None: ms_obj = [_.sel(chan=chan) for _ in ms_obj] logger.info("Averaging MAIN table") # perform averaging to the MS avg_mss = average_main(main_ds=ms_obj, time_bin_secs=tbin, chan_bin_size=cbin, group_row_chunks=100000, respect_flag_row=False, sel_cols=columns, viscolumn=data_col) n_ams = len(avg_mss) # writes_ms = xm.xds_to_table(avg_mss, "tesxt", "ALL") logger.info("Creating averaged xarray Dataset") x_datasets = [] for _a, ams in enumerate(avg_mss): ams = ams.compute() logger.info(f"Averaging {_a+1} / {n_ams}") datas = { k: (v.dims, v.data, v.attrs) for k, v in ams.data_vars.items() if k != "FLAG_CATEGORY" } new_ds = xr.Dataset(datas, attrs=ams.attrs, coords=ams.coords) new_ds = new_ds.chunk(chunks) x_datasets.append(new_ds) # data will always be grouped by SPW unless iterating over antenna # the most amount of grouping that will occur will be between to columns all_grps = [] if len(group_cols) == 0: # return a single dataset subs = xr.combine_nested(x_datasets, concat_dim="row", compat="no_conflicts", data_vars="all", coords="different", join="outer") subs.attrs = {} subs = subs.chunk(chunks) all_grps.append(subs) elif (set(group_cols) <= {"DATA_DESC_ID", "ANTENNA1", "ANTENNA2"} or iter_axis == "antenna"): uniques = np.unique([_.attrs["DATA_DESC_ID"] for _ in x_datasets]) uants = np.arange(vu.get_antennas(ms_name).size) for _d in uniques: subs = [] for _ in x_datasets: if _.attrs["DATA_DESC_ID"] == _d: subs.append(_) subs = xr.combine_nested(subs, concat_dim="row", compat="no_conflicts", data_vars="all", coords="different", join="outer") subs.attrs = {"DATA_DESC_ID": _d} subs = subs.chunk(chunks) if {"ANTENNA1", "ANTENNA2"} <= set(group_cols): u_bl = combinations(uants, 2) for p, q in u_bl: n_subs = subs.where( (subs.ANTENNA1 == p) & (subs.ANTENNA2 == q), drop=True) n_subs.attrs = { "DATA_DESC_ID": _d, "ANTENNA1": p, "ANTENNA2": q } all_grps.append(n_subs) elif "ANTENNA1" in group_cols: for p in uants[:-1]: n_subs = subs.where((subs.ANTENNA1 == p), drop=True) n_subs.attrs = {"DATA_DESC_ID": _d, "ANTENNA1": p} all_grps.append(n_subs) elif "ANTENNA2" in group_cols: for q in uants[:-1] + 1: n_subs = subs.where((subs.ANTENNA2 == q), drop=True) n_subs.attrs = {"DATA_DESC_ID": _d, "ANTENNA2": q} all_grps.append(n_subs) elif iter_axis == "antenna": for p in uants: n_subs = subs.where( (subs.ANTENNA1 == p) | (subs.ANTENNA2 == p), drop=True) n_subs.attrs = {"DATA_DESC_ID": _d, "ANTENNA": p} all_grps.append(n_subs) else: all_grps.append(subs) elif set(group_cols) <= {"DATA_DESC_ID", "FIELD_ID", "SCAN_NUMBER"}: grps = {} # must be ddid + something else # if it is something other than fid and scan e.g # by default group by ddid for grp in group_cols: uniques = np.unique([_.attrs[grp] for _ in x_datasets]) grps[grp] = uniques # grps.append(uniques) for com in product(*grps.values()): subs = [] natt = {k: v for k, v in zip(group_cols, com)} for _ in x_datasets: if set(natt.items()) <= set(_.attrs.items()): subs.append(_) subs = xr.combine_nested(subs, concat_dim="row", compat="no_conflicts", data_vars="all", coords="different", join="outer") subs.attrs = natt subs = subs.chunk(chunks) all_grps.append(subs) # select a corr if corr is not None: all_grps = [_.sel(corr=corr) for _ in all_grps] logger.info("Averaging completed.") return all_grps
projection = ccrs.PlateCarree() gs1 = gridspec.GridSpec(3, 1) gs1.update(wspace=0, hspace=0, left=0.01, right=0.99, top=0.99, bottom=0.01) STRETCH_FACTOR = 1.0001 TARGET_LAT = 0.0 TARGET_LON = 350.0 grid = sg.grids.StretchedGrid(48, STRETCH_FACTOR, TARGET_LAT, TARGET_LON) with suppress_stdout(): # Load data da = xr.combine_nested([ xr.open_dataset(path)['SpeciesConc_Rn222'] for path in [ f'OutputDir/GCHP.SpeciesConc.20160101_0030z.nc4', f'OutputDir/GCHP.SpeciesConc.20160101_1230z.nc4', f'OutputDir/GCHP.SpeciesConc.20160102_0030z.nc4', ] ], concat_dim='time') ds_wind = [ xr.open_dataset(path) for path in [ f'OutputDir/GCHP.StateMet_avg.20160101_0030z.nc4', f'OutputDir/GCHP.StateMet_avg.20160101_1230z.nc4', f'OutputDir/GCHP.StateMet_avg.20160102_0030z.nc4', ] ] u_wind = xr.combine_nested([ds['Met_U'] for ds in ds_wind], concat_dim='time') v_wind = xr.combine_nested([ds['Met_V'] for ds in ds_wind], concat_dim='time') for time in range(3):
def test_empty_input(self): assert_identical(Dataset(), combine_nested([], concat_dim="x"))
def test_combine_nested_join_exact(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] with raises_regex(ValueError, "indexes along dimension"): combine_nested(objs, concat_dim="x", join="exact")
def test_combine_coords_join(self, join, expected): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] actual = combine_nested(objs, concat_dim="x", join=join) assert_identical(expected, actual)
edate = '20171203' files1 = glob.glob(path_ccn) files1.sort() files1 = [ f for f in files1 if f.split('.')[-3] >= sdate and f.split('.')[-3] <= edate ] #file = files+files1 ccn_colavg1 = arm.read_netcdf(files1) ccn_colavg1 = ccn_colavg1.resample(time='1h').nearest() con1 = ccn_colavg1['N_CCN'] qc1 = ccn_colavg1['qc_N_CCN'] #con_full = xr.concat([con,con1],dim =index ) con_full = xr.combine_nested( [con, con1], concat_dim=['time'])[:, 3] # supersaturation_setpoint float32 0.5 ccn_qc_full = xr.combine_nested( [qc, qc1], concat_dim=['time'])[:, 3] # supersaturation_setpoint float32 0.5 time_ccn = con_full.time #%% #qing_exhau = netCDF4.Dataset('/Users/qingn/20171030201180324qing_flag_00.cdf') #qing_exhaust = qing_exhau['exhaust_flag'] #environ_path = '/Users/qingn/Downloads/drive-download-20191125T073459Z-001/MARCUS_V1_VAP_20171029.cdf' # ============================================================================= # environ_path1 = '/Users/qingn/Downloads/MARCUS VAP/MARCUS_*.cdf'\ # # files_env_July = glob.glob(environ_path1) # files_env_July.sort()
def open_mfdataset( paths, chunks=None, concat_dim="time", compat="no_conflicts", preprocess=None, engine=None, lock=None, data_vars="all", coords="different", combine="nested", autoclose=None, parallel=False, join="outer", attrs_file=None, **kwargs, ): """Helper function for opening multiple files as an xarray_ dataset. Adapted from upstream implementation_. See docs_ for usage. .. todo:: To be removed when a backend entrypoint_ is implementated. .. _implementation: https://github.com/pydata/xarray/blob/484d1ce5ff8969b6ca6fa942b344379725f33b9c/xarray/backends/api.py#L726 .. _docs: https://xarray.pydata.org/en/v0.15.1/generated/xarray.open_mfdataset.html .. _entrypoint: https://github.com/pydata/xarray/pull/3166 """ if isinstance(paths, str): paths = sorted(glob(paths)) else: paths = [str(p) if isinstance(p, Path) else p for p in paths] if not paths: raise OSError("no files to open") # If combine='by_coords' then this is unnecessary, but quick. # If combine='nested' then this creates a flat list which is easier to # iterate over, while saving the originally-supplied structure as "ids" if combine == "nested": if isinstance(concat_dim, (str, xr.DataArray)) or concat_dim is None: concat_dim = [concat_dim] open_kwargs = dict() if parallel: import dask # wrap the open_dataset, getattr, and preprocess with delayed open_ = dask.delayed(open_dataset) if preprocess is not None: preprocess = dask.delayed(preprocess) else: open_ = open_dataset datasets = [open_(p, **open_kwargs) for p in paths] if preprocess is not None: datasets = [preprocess(ds) for ds in datasets] if parallel: # calling compute here will return the datasets # the underlying datasets will still be stored as dask arrays (datasets,) = dask.compute(datasets) # Combine all datasets, closing them in case of a ValueError try: if combine == "nested": # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" combined = xr.combine_nested( datasets, concat_dim=concat_dim, compat=compat, data_vars=data_vars, coords=coords, join=join, ) elif combine == "by_coords": # Redo ordering from coordinates, ignoring how they were ordered # previously combined = xr.combine_by_coords( datasets, compat=compat, data_vars=data_vars, coords=coords, join=join ) else: raise ValueError( "{} is an invalid option for the keyword argument" " ``combine``".format(combine) ) except ValueError: for ds in datasets: ds.close() raise # read global attributes from the attrs_file or from the first dataset if attrs_file is not None: if isinstance(attrs_file, Path): attrs_file = str(attrs_file) combined.attrs = datasets[paths.index(attrs_file)].attrs else: combined.attrs = datasets[0].attrs return combined
def test_manual_concat_too_many_dims_at_once(self): objs = [Dataset({'x': [0], 'y': [1]}), Dataset({'y': [0], 'x': [1]})] with pytest.raises(ValueError, match="not equal across datasets"): combine_nested(objs, concat_dim='x', coords='minimal')
an integer and returning that number """ the_match = find_hour.match(str(the_file)) return int(the_match.group(1)) if __name__ == "__main__": all_files.sort(key=sort_hour) xarray_files = [] for item in all_files: with Dataset(str(item)) as nc_file: the_time = nc_file.variables['time'][...] print(datetime.fromtimestamp(the_time, tz=utc)) ds = xr.open_dataset(item) xarray_files.append(ds) ds_big = xr.combine_nested(xarray_files, 'time') time_average = ds_big.mean('time') # # time_average.data_vars # time_average.coords varnames = list(ds_big.variables.keys()) # # # create an xarray out of these files # vel_vals = [ 'VVEL_200mb', 'VVEL_250mb', 'VVEL_500mb', 'VVEL_700mb', 'VVEL_925mb', 'VVEL_1000mb' ] vel_dict = {} for key in vel_vals:
def read_clear_allsky_pairs_MWI(files_clearsky): dict_ici = {"ici_channels": "channels", "ici_stokes_dim": "stokes_dim"} dict_mwi = {"mwi_channels": "channels", "mwi_stokes_dim": "stokes_dim"} first_iteration = True for file_clearsky in files_clearsky[:]: file_allsky = file_clearsky.replace('_clearsky.nc', '.nc') file_clearsky_mwi = file_clearsky.replace('ICI', 'MWI') file_allsky_mwi = file_allsky.replace('ICI', 'MWI') files = [file_allsky, file_clearsky_mwi, file_allsky_mwi] f_exist = [f for f in files if os.path.isfile(f)] if len(f_exist) == 3: # if os.path.isfile(file_allsky): # check if both files exist # print (file_allsky) y = xarray.open_dataset(file_allsky) y_ici_allsky = y.y_ici y = xarray.open_dataset(file_clearsky) # print(file_clearsky) y_ici_clearsky = y.y_ici y = xarray.open_dataset(file_allsky_mwi) y_mwi_allsky = y.y_mwi y = xarray.open_dataset(file_clearsky_mwi) # print(file_clearsky) y_mwi_clearsky = y.y_mwi allsky = y_ici_allsky.shape[0] clearsky = y_ici_clearsky.shape[0] allsky_mwi = y_mwi_allsky.shape[0] clearsky_mwi = y_mwi_clearsky.shape[0] cases = min(allsky, clearsky, allsky_mwi, clearsky_mwi) y_ici_allsky = y_ici_allsky[:cases, :] y_ici_clearsky = y_ici_clearsky[:cases, :] y_mwi_allsky = y_mwi_allsky[:cases, :] y_mwi_clearsky = y_mwi_clearsky[:cases, :] y_ici_allsky = y_ici_allsky.rename(dict_ici) y_ici_clearsky = y_ici_clearsky.rename(dict_ici) y_mwi_allsky = y_mwi_allsky.rename(dict_mwi) y_mwi_clearsky = y_mwi_clearsky.rename(dict_mwi) y_ici_allsky = xarray.combine_nested([y_ici_allsky, y_mwi_allsky], concat_dim=["channels"]) y_ici_clearsky = xarray.combine_nested( [y_ici_clearsky, y_mwi_clearsky], concat_dim=["channels"]) if first_iteration: # initialise the xarray DataArray y_ici_cs = y_ici_clearsky y_ici_as = y_ici_allsky first_iteration = False else: y_ici_cs = xarray.concat([y_ici_cs, y_ici_clearsky], dim='cases') y_ici_as = xarray.concat([y_ici_as, y_ici_allsky], dim='cases') print(y_ici_cs.shape) print(y_ici_as.shape) return y_ici_cs, y_ici_as