def test_subset_variables(self): fileName = str(self.datadir.join('example_jan.nc')) timestr = ['xtime_start', 'xtime_end'] varList = ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] # first, test loading the whole data set and then calling # subset_variables explicitly ds = xr.open_mfdataset( fileName, preprocess=lambda x: mpas_xarray.preprocess_mpas(x, timestr=timestr, yearoffset=1850)) ds = mpas_xarray.subset_variables(ds, varList) self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList)) self.assertEqual(pd.Timestamp(ds.Time.values[0]), pd.Timestamp('1855-01-16 12:22:30')) # next, test the same with the onlyvars argument ds = xr.open_mfdataset( fileName, preprocess=lambda x: mpas_xarray.preprocess_mpas(x, timestr=timestr, onlyvars=varList, yearoffset=1850)) self.assertEqual(ds.data_vars.keys(), varList)
def scaleVSpower(): power = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/power_maps/' \ 'lsta_daily_power*.nc') scale = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/scale_maps/' \ 'lsta_daily_scale*.nc') scales = np.unique(scale['LSTA'].values[0:300,:,:]) scales = scales[np.isfinite(scales)] power_arr = power['LSTA'][0:300] scale_arr = scale['LSTA'][0:300] mlist = [] for s in scales: print('Doing '+str(s)) mean = np.nanmean(power_arr.where(scale_arr.values == s).values) mlist.append(mean) f= plt.figure() plt.scatter(scales,mlist)
def main (era_filesearch, cesm_base_filesearch, bias_output): print("opening data") era_data = xr.open_mfdataset(era_filesearch, concat_dim='time') base_cesm_data = xr.open_mfdataset(cesm_base_filesearch, concat_dim='time') print("loading data") era_data.load() base_cesm_data.load() print("compute means") emean = era_data.std(dim="time") cmean = base_cesm_data.std(dim="time") print("creating data") interpolated_era = xr.zeros_like(cmean) print("loading data") interpolated_era.load() z_interp_all_vars(emean, interpolated_era, era_data["z"].mean(dim="time"), base_cesm_data["z"].mean(dim="time"), vars_to_correct) interpolated_era.to_netcdf("era_interpolated_std.nc") print("Computing Bias") bias = interpolated_era - cmean print("writing") bias.to_netcdf(bias_output)
def file_loop(): lsta = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/scale_maps/' \ 'lsta_daily_scale_*.nc') lsta_check = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/' \ 'lsta_daily_*.nc') lsta_check = lsta_check.sel(lat=slice(lsta['lat'].values.min(),lsta['lat'].values.max()), lon=slice(lsta['lon'].values.min(),lsta['lon'].values.max())) lsta_checks = lsta_check['LSTA'].where(lsta_check['LSTA']>-800) lsta_checks = lsta_checks.where(lsta.time==lsta_checks.time) bins = np.arange(-20,20,2) f=plt.figure() plt.hist(lsta_checks.values[np.isfinite(lsta_checks.values)], bins=bins, edgecolor='k') bins = np.arange(-140, 141, 10) ll = [] for i, b in enumerate(bins[0:-1]): b1 = bins[i+1] lmean = np.percentile(lsta_checks.where((lsta['LSTA'].values>=b) & (lsta['LSTA'].values<b1)), 90) ll.append(lmean) pdb.set_trace() f = plt.figure() plt.scatter(bins[1::], ll)
def test_deterministic_names(self): with create_tmp_file() as tmp: data = create_test_data() data.to_netcdf(tmp) with open_mfdataset(tmp) as ds: original_names = dict((k, v.data.name) for k, v in ds.items()) with open_mfdataset(tmp) as ds: repeat_names = dict((k, v.data.name) for k, v in ds.items()) for var_name, dask_name in original_names.items(): self.assertIn(var_name, dask_name) self.assertIn(tmp, dask_name) self.assertEqual(original_names, repeat_names)
def read_nc_files(dir, bounds=None): def rmheight(d): #del d["height"] return d files = get_reanalysis_file_paths(dir) if len(files) > 1: data = xarray.open_mfdataset(files, preprocess=lambda d: assert_bounds(d, bounds)) elif len(files) == 1: data = xarray.open_mfdataset(files, preprocess=lambda d: assert_bounds(d, bounds)) else: raise IOError("There are no .nc files in that directory.") return data
def test_lock(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp, format='NETCDF3_CLASSIC') with open_dataset(tmp, chunks=10) as ds: task = ds.foo.data.dask[ds.foo.data.name, 0] self.assertIsInstance(task[-1], type(Lock())) with open_mfdataset(tmp) as ds: task = ds.foo.data.dask[ds.foo.data.name, 0] self.assertIsInstance(task[-1], type(Lock())) with open_mfdataset(tmp, engine='scipy') as ds: task = ds.foo.data.dask[ds.foo.data.name, 0] self.assertNotIsInstance(task[-1], type(Lock()))
def saveMonthly18(): msg_folder = '/users/global/cornkle/data/OBS/gridsat/gridsat_netcdf/z18_panAfrica/' da = xr.open_mfdataset(msg_folder+'gridsat_WA_*18UTC.nc') da = da.where((da<=-40) & (da>=-110)) da = da.resample('m', dim='time', how='mean') da.to_netcdf(msg_folder+'gridsat_monthly_18UTC.nc')
def data(self): try: if self.path: return open_mfdataset(self.path / "data*.nc") return self._concat_fields(self._cached_data) except OSError: return
def retrieve(path, isel='all', lazy=True): path = Path(path) try: data = open_dataset(path / "data.nc") lazy = True except FileNotFoundError: data = open_mfdataset(path / "data*.nc", concat_dim="t").sortby("t") try: with open(Path(path) / 'metadata.yml', 'r') as yaml_file: metadata = yaml.load(yaml_file) except FileNotFoundError: # Ensure retro-compatibility with older version with open(path.glob("Treant.*.json")[0]) as f: metadata = json.load(f)["categories"] if isel == 'last': data = data.isel(t=-1) elif isel == 'all': pass elif isinstance(isel, dict): data = data.isel(**isel) else: data = data.isel(t=isel) if not lazy: return FieldsData(data=data.load(), metadata=AttrDict(**metadata)) return FieldsData(data=data, metadata=AttrDict(**metadata))
def month_count_concat(): msg_folder = cnst.GRIDSAT fname = 'aggs/gridsat_WA_-65_monthly_count_-40base_15-21UTC_1000km2.nc' da = xr.open_mfdataset(cnst.GRIDSAT + 'gridsat_WA_-40_1000km2_15-21UTC*_monthSum.nc') enc = {'tir': {'complevel': 5, 'zlib': True}} da.to_netcdf(msg_folder + fname, encoding=enc)
def test_open_and_do_math(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp) with open_mfdataset(tmp) as ds: actual = 1.0 * ds self.assertDatasetAllClose(original, actual)
def test_variable_map(self): fileName = str(self.datadir.join('example_jan.nc')) varMap = { 'avgSurfaceTemperature': ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature', 'other_string', 'yet_another_string'], 'daysSinceStartOfSim': ['time_avg_daysSinceStartOfSim', 'xtime', 'something_else'], 'avgLayerTemperature': ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'test1', 'test2'], 'Time': [['xtime_start', 'xtime_end'], 'time_avg_daysSinceStartOfSim']} varList = ['avgSurfaceTemperature', 'avgLayerTemperature', 'refBottomDepth', 'daysSinceStartOfSim'] # preprocess_mpas will use varMap to map the variable names from their # values in the file to the desired values in varList ds = xr.open_mfdataset( fileName, preprocess=lambda x: mpas_xarray.preprocess_mpas( x, timestr='Time', onlyvars=varList, yearoffset=1850, varmap=varMap)) # make sure the remapping happened as expected self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList))
def open_cchdo_as_mfdataset(paths, target_pressure, pressure_coord='pressure', concat_dim='time'): """Open cchdo hydrographic data in netCDF format, interpolate to specified pressures, and combine as an xarray dataset Parameters ---------- paths : str or sequence Either a string glob in the form "path/to/my/files/*.nc" or an explicit list of files to open. target_pressure : arraylike Target pressure to which all casts are interpolated pressure_coord : str Name of the coordinate variable for pressure concat_dim : str Name of the dimension along which to concatenate casts Returns ------- ds : xarray Dataset """ # add time if missing timefun = _maybe_add_time_coord # create interpolation function for pressure interpfun = functools.partial(interp_coordinate, interp_coord=pressure_coord, interp_data=target_pressure) # create renaming function for concatenation renamefun = functools.partial(rename_0d_coords, new_dim=concat_dim) # compose together ppfun = compose(interpfun, renamefun, timefun) #paths = os.path.join(ddir, match_pattern) return xr.open_mfdataset(paths, concat_dim=concat_dim, preprocess=ppfun)
def _load_data_from_disk(file_set, preprocess_func=lambda ds: ds, data_vars='minimal', coords='minimal', grid_attrs=None, **kwargs): """Load a Dataset from a list or glob-string of files. Datasets from files are concatenated along time, and all grid attributes are renamed to their aospy internal names. Parameters ---------- file_set : list or str List of paths to files or glob-string preprocess_func : function (optional) Custom function to call before applying any aospy logic to the loaded dataset data_vars : str (default 'minimal') Mode for concatenating data variables in call to ``xr.open_mfdataset`` coords : str (default 'minimal') Mode for concatenating coordinate variables in call to ``xr.open_mfdataset``. grid_attrs : dict Overriding dictionary of grid attributes mapping aospy internal names to names of grid attributes used in a particular model. Returns ------- Dataset """ apply_preload_user_commands(file_set) func = _preprocess_and_rename_grid_attrs(preprocess_func, grid_attrs, **kwargs) return xr.open_mfdataset(file_set, preprocess=func, concat_dim=TIME_STR, decode_times=False, decode_coords=False, mask_and_scale=True, data_vars=data_vars, coords=coords)
def main(files, out): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ fname, ext = os.path.splitext(files) if ext in '.nc': list_files = [files] elif ext in '.ncml': list_files = [files] else: list_files = read_file(files) stream_vars = pf.load_variable_dict(var='eng') # load engineering variables # for nc in list_files: # print nc # the engine that xarray uses can be changed as specified here # http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html#xarray.open_dataset with xr.open_mfdataset(list_files, engine='netcdf4') as ds_disk: # change dimensions from 'obs' to 'time' ds_disk = ds_disk.swap_dims({'obs': 'time'}) ds_variables = ds_disk.data_vars.keys() # List of dataset variables stream = ds_disk.stream # List stream name associated with the data title_pre = mk_str(ds_disk.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds_disk.attrs, 's') # , var, tt0, tt1, 's') save_dir = os.path.join(out, ds_disk.subsite, ds_disk.node, ds_disk.stream, 'pcolor') cf.create_dir(save_dir) # t0, t1 = cf.get_rounded_start_and_end_times(ds_disk['time'].data) # tI = t0 + t1 - (t0 / 2) # time_list = [[t0, t1], [t0, tI], [tI, t1]] # time_list = [[t0, t1]] # for period in time_list: # tt0 = period[0] # tt1 = period[1] # sub_ds = ds_disk.sel(time=slice(str(tt0), str(tt1))) bins = ds_disk['bin_depths'] north = ds_disk['northward_seawater_velocity'] east = ds_disk['eastward_seawater_velocity'] # up = ds_disk['upward_seawater_velocity'] # error = ds_disk['error_velocity'] time = dict(data=ds_disk['time'].data, info=dict(label=ds_disk['time'].standard_name, units='GMT')) bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units)) north = dict(data=north.data.T, info=dict(label=north.long_name, units=north.units)) east = dict(data=east.data.T, info=dict(label=east.long_name, units=east.units)) # up = dict(data=up.data.T, info=dict(label=up.long_name, units=up.units)) # error = dict(data=error.data.T, info=dict(label=error.long_name, units=error.units)) sname = save_pre + 'ADCP' title = title_pre fig, axs = pf.adcp(time, bins, north, east, title) pf.resize(width=12, height=8.5) # Resize figure pf.save_fig(save_dir, sname, res=250) # Save figure plt.close('all')
def read_var_in_memory(dir, common_suffix="daily.nc", varname="lake_ice_fraction"): """ :param dir: :param common_suffix: """ with xarray.open_mfdataset(f"{dir}/*{common_suffix}") as ds: d_arr = ds[varname].load() return d_arr
def test_open_mfdataset(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp1: with create_tmp_file() as tmp2: original.isel(x=slice(5)).to_netcdf(tmp1) original.isel(x=slice(5, 10)).to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2]) as actual: self.assertIsInstance(actual.foo.variable.data, da.Array) self.assertEqual(actual.foo.variable.data.chunks, ((5, 5),)) self.assertDatasetAllClose(original, actual) with open_mfdataset([tmp1, tmp2], chunks={'x': 3}) as actual: self.assertEqual(actual.foo.variable.data.chunks, ((3, 2, 3, 2),)) with self.assertRaisesRegexp(IOError, 'no files to open'): open_mfdataset('foo-bar-baz-*.nc')
def test_load_mpas_xarray_timeSeriesStats_datasets(path): #{{{ ds = xr.open_mfdataset(path, preprocess=preprocess_mpas_timeSeriesStats) ds = remove_repeated_time_index(ds) ds2 = xr.open_mfdataset(path, preprocess=preprocess_mpas) ds2 = remove_repeated_time_index(ds2) # make a simple plot from the data def plot_data(ds): var = ds["timeSeriesStatsMonthly_avg_iceAreaCell_1"] return var.where(var > 0).mean('nCells').plot() plot_data(ds) plot_data(ds2) plt.title("Curve centered around right times (b) \n "+\ "Curve shifted towards end of avg period (g)") plt.show() return #}}}
def test_load_mpas_xarray_datasets(path): #{{{ ds = xr.open_mfdataset(path, preprocess=preprocess_mpas) ds = remove_repeated_time_index(ds) # make a simple plot from the data ds.Time.plot() plt.show() return #}}}
def test_save_mfdataset_roundtrip(self): original = Dataset({'foo': ('x', np.random.randn(10))}) datasets = [original.isel(x=slice(5)), original.isel(x=slice(5, 10))] with create_tmp_file() as tmp1: with create_tmp_file() as tmp2: save_mfdataset(datasets, [tmp1, tmp2]) with open_mfdataset([tmp1, tmp2]) as actual: self.assertDatasetIdentical(actual, original)
def size_trend(): msg_folder = '/users/global/cornkle/data/OBS/gridsat/gridsat_netcdf/yearly_files/' data = xr.open_mfdataset(msg_folder + 'gridsat*.nc') cut = data.sel(lat=slice(10,17), lon=slice(-17,-10)) cut = cut.isel(time= ((cut['time.year']>1984) & (cut['time.month']==8))) cut=cut['t'] dic= {} for p in np.arange(1985,2017,1): dic[p] = [] def mcs_find(image, thresh=None): if not thresh: print('Give threshold') return image[image > thresh] = 0 image[image <= thresh] = 1 image[np.isnan(image)] = 0 if np.sum(image<10): return [] labels, numL = label(image) ret = [] for l in np.unique(labels): if l == 0: continue blob = np.sum(labels == l) pdb.set_trace() if np.sum(len(blob[0])) < 100: # at least 1000m2 continue ret.append(blob*49) return ret for i in np.arange(cut.shape[0]): ret = mcs_find(cut[i,:,:].values, thresh=-40) if ret == []: continue pdb.set_trace() dic[cut['time.year']].append(ret) pdb.set_trace() for d in dic: d = [item for sublist in d for item in sublist]
def read_mixed(paths, variable: str, **args): """ Reads variable from multiple files and mixed locations """ ds = xr.open_mfdataset(paths, concat_dim='time') v = ds[variable] a = v[args] a.load() return a
def plot(): lsta_all = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/scale_maps_smallPref/*.nc') temp_all = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/lsta_daily_*.nc') temp_all = temp_all.sel(lat=slice(10.5,17.5), lon=slice(-9.5,9.5)) lsta_all = temp_all.sel(lat=slice(10.5, 17.5), lon=slice(-9.5, 9.5)) temp_all = temp_all.where(temp_all['time'] == lsta_all['time']) lsta_all = lsta_all.where(temp_all > -800) temp_all = temp_all.where(temp_all > -800) lsta_all = lsta_all.where(np.abs(temp_all['LSTA'].values) > 0.2) temp_all = temp_all.where(np.abs(temp_all['LSTA'].values) > 0.2) dic = pkl.load( open("/users/global/cornkle/figs/LSTA-bullshit/scales/new/scalesVSblob.p", "rb")) blob = np.squeeze(np.concatenate(dic['blob'])) scale = np.squeeze(np.concatenate(dic['scale'])) temp = np.squeeze(np.concatenate(dic['temp'])) scalei = scale[np.isfinite(scale) & np.isfinite(temp)] blobi = blob[np.isfinite(scale) & np.isfinite(temp)] tempi = temp[np.isfinite(scale) & np.isfinite(temp)] H, xbins, ybins = np.histogram2d(tempi,np.abs(scalei) , bins = [ np.arange(-10,11,2), np.arange(0,151,15)]) H = H.transpose() #/ np.sum(H) H2, xbins, ybins = np.histogram2d(temp_all['LSTA'].values.flatten(), np.abs(lsta_all['LSTA'].values.flatten()), bins=[np.arange(-10, 11, 2), np.arange(0, 151, 15)]) #H2 = H2.transpose() / np.sum(H2) X,Y = np.meshgrid(xbins, ybins) f = plt.figure() plt.pcolormesh(X,Y,H, cmap='viridis') plt.colorbar()
def test_plot_area_avg(target_nc_folder="", source_nc_path=""): # target_nc_folder = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_daily_Obs_monthly_icefix_1980-2009" # target_nc_folder = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_icefix_Obs_1980-1981_test" #target_nc_folder = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_daily_Obs_monthly_icefix_test2_1980-1981_test1" ice_fr = xarray.open_dataset(source_nc_path)["LC"] assert isinstance(ice_fr, xarray.DataArray) ice_fr = ice_fr.where((ice_fr >= 0) & (ice_fr <= 1)) # t, x, y source_data = ice_fr.to_masked_array(copy=False) source_time = ice_fr.coords["time"] source_time = pd.to_datetime(source_time.values.tolist()) s_source = pd.Series(data=[ (field[~field.mask].mean() if not np.all(field.mask) else np.nan) for field in source_data ], index=source_time) ice_fr_lkeff = xarray.open_mfdataset(target_nc_folder + "/*daily.nc")["lake_ice_fraction"] lkeff_data = ice_fr_lkeff.to_masked_array(copy=False) lkeff_time = pd.to_datetime(ice_fr_lkeff.coords["t"].values.tolist()) s_lkeff = pd.Series([ (field[~field.mask].mean() if not np.all(field.mask) else np.nan) for field in lkeff_data ], index=lkeff_time) s_source = s_source[(s_source.index <= lkeff_time[-1]) & (s_source.index >= lkeff_time[0])] assert isinstance(s_source, pd.Series) # print(f"Source: len={len(s_source)}") print(f"Lkeff: len={len(s_lkeff)}") # do the plotting fig = plt.figure() gs = GridSpec(2, 1) # plot initial lake fractions ax = fig.add_subplot(gs[0, 0]) s_source.plot(ax=ax, marker=".", linestyle="None", label="original") ax.legend() # plot lake fractions outputed by hles algorithm ax = fig.add_subplot(gs[1, 0], sharex=ax) s_lkeff.plot(ax=ax, marker=".", linestyle="None", label="lkeff") ax.legend()
def test_preprocess_mfdataset(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp) def preprocess(ds): return ds.assign_coords(z=0) expected = preprocess(original) with open_mfdataset(tmp, preprocess=preprocess) as actual: self.assertDatasetIdentical(expected, actual)
def mergeCMORPH(): sm_folder = '/users/global/cornkle/data/OBS/CMORPH/CMORPH_nc/' for y in range(2006, 2011): files = sm_folder + str(y) + '/' + '*.nc' ds = xr.open_mfdataset(files) enc = {'pr': {'complevel': 5, 'zlib': True}} ds.to_netcdf(sm_folder + 'CMORPH_WA_' + str(y) + '.nc', encoding=enc, format='NETCDF4') print('Wrote ' + sm_folder + 'CMORPH_WA_' + str(y) + '.nc')
def read_data(path): """ Read in multiple netCDF files and combine them in an xarray dataset. :rtype: xr.Dataset :param path: Path to the folder :return: The resulting dataset """ path = path + os.sep + '*.nc' print(path) dataset = xr.open_mfdataset(path, concat_dim='time') return dataset
def __init__(self, path="", vname=""): self.ds = xarray.open_mfdataset(path) print(self.ds) self.data = self.ds[vname] self.vname = vname # Create the caching directory for a variable self.cache_dir = Path("Daymet_cache") / vname self.cache_dir.mkdir(parents=True, exist_ok=True)
def tamsat(y): path = '/users/global/cornkle/data/OBS/TAMSATv3/' print('Doing '+str(y)) area = np.array([[-17,-10,10,17], [-10,-2,10,17], [-2,9,10,17], [-3,3,4,12], [-17,-15,13.5,15.5]]) # [-17,-10,13,17] coord = area[4] # regions: west, central, east, Ghana, Sahel data = xr.open_mfdataset(path + 'rfe'+str(y)+'*.nc') data = data.sel(lon=slice(coord[0], coord[1]), lat=slice(coord[3], coord[2])) print('Opened data') data = data['rfe'] tstart=[] tend = [] # for yy in np.arange(data.shape[0]): # for xx in np.arange(data.shape[1]): mean = data.mean(dim=['lat', 'lon']) # mean = data.isel(lat=yy, lon=xx) md = mean.to_pandas() md = md.reindex(pd.date_range(str(y)+'-01-01', str(y)+'-12-31', freq='D')) diff = RainySeason(md) dstart= np.argmin(diff) dend=np.argmax(diff) # f = plt.figure() # ax = f.add_subplot(111) # plt.plot(diff.index, diff) # plt.axvline(dstart, color='k') # plt.axvline(dend, color='k') # plt.text(dstart-1, -50, str(dstart)) # plt.text(dend-1, -50, str(dend)) # plt.minorticks_on() # tstart.append(dstart) # tend.append(dend) # start = np.median(tstart) # end = np.median(tend) # # print('Done ' + str(y)) return dstart, dend
def read(dictArgs): """ read data from model and obs files, process data and return it """ dsmodel = xr.open_mfdataset(dictArgs["infile"], combine="by_coords", decode_times=False) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat_platform = "catalogs/obs_catalog_" + dictArgs["platform"] + ".yml" catfile = pkgr.resource_filename("om4labs", cat_platform) cat = intake.open_catalog(catfile) dsobs = cat[dictArgs["dataset"]].to_dask() # read in model and obs data datamodel = read_data(dsmodel, dictArgs["possible_variable_names"]) dataobs = read_data(dsobs, dictArgs["possible_variable_names"]) # subset data if dictArgs["depth"] is None: dictArgs["depth"] = dictArgs["surface_default_depth"] if dictArgs["depth"] is not None: datamodel = subset_data(datamodel, "assigned_depth", dictArgs["depth"]) dataobs = subset_data(dataobs, "assigned_depth", dictArgs["depth"]) # reduce data along depth (not yet implemented) if "depth_reduce" in dictArgs: if dictArgs["depth_reduce"] == "mean": # do mean pass elif dictArgs["depth_reduce"] == "sum": # do sum pass # reduce data along time, here mandatory if ("assigned_time" in datamodel.dims) and (len(datamodel["assigned_time"]) > 1): warnings.warn("input dataset has more than one time record, " + "performing non-weighted average") datamodel = simple_average(datamodel, "assigned_time") if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1: warnings.warn("reference dataset has more than one time record, " + "performing non-weighted average") dataobs = simple_average(dataobs, "assigned_time") datamodel = datamodel.squeeze() dataobs = dataobs.squeeze() # check final data is 2d assert len(datamodel.dims) == 2 assert len(dataobs.dims) == 2 # check consistency of coordinates assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"]) assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"]) # homogeneize coords dataobs = copy_coordinates(datamodel, dataobs, ["assigned_lon", "assigned_lat"]) # restrict model to where obs exists datamodel = datamodel.where(dataobs) # dump values model = datamodel.to_masked_array() obs = dataobs.to_masked_array() x = datamodel["assigned_lon"].values y = datamodel["assigned_lat"].values # compute area if "areacello" in dsmodel.variables: area = dsmodel["areacello"].values else: if model.shape == (180, 360): area = compute_area_regular_grid(dsmodel) else: raise IOError("no cell area provided") return x, y, area, model, obs
def exec(self): log.info('[START] {}'.format("exec")) try: # bash RunShell-Python.sh "TalentPlatform-LSH0291-Analy.py" "2018-01-01" "2021-01-01" # nohup bash RunShell-Python.sh "TalentPlatform-LSH0291-Analy.py" "2018-01-01" "2021-01-01" & # /home/dxinyu/TEST/OUTPUT # /home/dxinyu/TEST/OUTPUT # python3 "/home/dxinyu/TEST/TalentPlatform-LSH0291-DataMerge.py" --inpPath "/home/dxinyu/TEST/OUTPUT" --outPath "/home/dxinyu/TEST/OUTPUT" # /data/dxinyu/CM_EDGAR/v5.0_FT2019_carbon_monitor # /home/dxinyu/TEST/EDGAR if (platform.system() == 'Windows'): # 옵션 설정 sysOpt = { # 시작/종료 시간 'srtDate': '2019-01-01', 'endDate': '2019-12-31' } else: # 옵션 설정 sysOpt = { # 시작/종료 시간 # 'srtDate': globalVar['srtDate'] # , 'endDate': globalVar['endDate'] } keyList = [ 'Oil_Power_Plants', 'Coal_Power_Plants', 'Gas_Power_Plants' ] dsDataL2 = xr.Dataset() for i, keyInfo in enumerate(keyList): log.info("[CHECK] keyInfo : {}".format(keyInfo)) dtSrtDate = pd.to_datetime(sysOpt['srtDate'], format='%Y-%m-%d') dtEndDate = pd.to_datetime(sysOpt['endDate'], format='%Y-%m-%d') dtIncDateList = pd.date_range(start=dtSrtDate, end=dtEndDate, freq='1M') # dtIncDateInfo = dtIncDateList[0] searchFileList = [] for j, dtIncDateInfo in enumerate(dtIncDateList): log.info( "[CHECK] dtIncDateInfo : {}".format(dtIncDateInfo)) dtYear = dtIncDateInfo.strftime('%Y') dtMonth = dtIncDateInfo.strftime('%m').replace('0', '') # inpFilePattern = '{}/CarbonMonitor_*{}*_y{}_m{}.nc'.format(serviceName, keyInfo, dtYear, dtMonth) # inpFilePattern = 'projects_v5.0_FT2019_carbon_monitor_*{}_{}_{}.txt'.format(keyInfo, dtYear, dtMonth) inpFilePattern = '{}_{}_{}_{}.nc'.format( serviceName, keyInfo, dtYear, dtMonth) inpFile = '{}/{}/EDGAR/{}'.format(globalVar['inpPath'], serviceName, inpFilePattern) fileList = sorted(glob.glob(inpFile)) if (len(fileList) < 1): continue fileInfo = fileList[0] searchFileList.append(fileList[0]) dsData = xr.open_mfdataset(searchFileList) dsDataL1 = dsData.rename({'ems': keyInfo}) dsDataL2 = dsDataL2.merge(dsDataL1) dsDataL2[[ 'Oil_Power_Plants', 'Coal_Power_Plants', 'Gas_Power_Plants' ]].to_array() dsDataL2['Gas_Power_Plants'][:, :, 4].plot() plt.show() # 변수별로 합계 dsDataL3 = dsDataL2.copy().to_array().sum("variable") dsDataL3 = dsDataL2.copy().assign( ems=dsDataL2['Oil_Power_Plants'] + dsDataL2['Coal_Power_Plants'] + dsDataL2['Gas_Power_Plants']) np.nansum(dsDataL3['ems'].values) np.nansum(dsDataL3.values) # dsDataL3 = xr.where((dsDataL3 == 0), np.nan, dsDataL3) cnt2D = dsDataL3.count(['date']) mean2D = dsDataL3.mean(['date']) sd2D = dsDataL3.std(['date']) sum2D = dsDataL3.sum(['date']) time1D = dsDataL3['date'].values lon1D = dsDataL3['lon'].values lat1D = dsDataL3['lat'].values lon2D, lat2D = np.meshgrid(lon1D, lat1D) dsDataL4 = dsDataL3.sel(date=time1D[0]) dsDataL4.values dsDataL4.plot() plt.show() sd2D.plot() plt.show() d = cnt2D.values d.shape cnt2D.values.shape() plt.contourf(lat1D, lon1D, cnt2D.values) np.nanmean(cnt2D.values) np.nanmax(cnt2D.values) np.nanmin(cnt2D.values) # plt.scatter(lon2D, lat2D, c=cnt2D.values) plt.colorbar() plt.close() plt.show() # data = pd.read_csv(fileInfo, skiprows=[0, 1], sep=';') # # dtIncDatePattern = '{}-{}'.format(dtYear, dtMonth) # dtIncDate = pd.to_datetime(dtIncDatePattern, format='%Y-%m') # data['date'] = dtIncDate # # dataL1 = data.set_index(['lon', 'lat', 'date']) # dsData = dataL1.to_xarray() # # saveFile = '{}/{}_{}_{}_{}.nc'.format(globalVar['outPath'], serviceName, keyInfo, dtYear, dtMonth) # os.makedirs(os.path.dirname(saveFile), exist_ok=True) # xr.Dataset(dsData).to_netcdf(saveFile) # log.info('[CHECK] saveFile : {}'.format(saveFile)) # # NetCDF 생산 # dsDataL2 = xr.Dataset( # { # 'mean': (('lat', 'lon'), (mean2D['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'count': (('lat', 'lon'), (cnt2D['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'sd': (('lat', 'lon'), (sd2D['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'sum': (('lat', 'lon'), (sum2D['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'extndUncrt': (('lat', 'lon'), (extndUncrt['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'rltvUncrt': (('lat', 'lon'), (rltvUncrt['emission'].values).reshape(len(lat1D), len(lon1D))) # } # , coords={ # 'lat': lat1D # , 'lon': lon1D # } # ) # # # # # dsData = xr.open_mfdataset(fileList) # dsData = xr.where((dsData == 0), np.nan, dsData) # # cnt2D = dsData.count(['month']) # mean2D = dsData.mean(['month']) # sd2D = dsData.std(['month']) # sum2D = dsData.sum(['month']) # # time1D = dsData['month'].values # lon1D = dsData['lon'].values # lat1D = dsData['lat'].values # lon2D, lat2D = np.meshgrid(lon1D, lat1D) # # ***************************************************************************** # 확장/상대 불확도 계산 # ***************************************************************************** # 자유도 df = len(time1D) # t값 tVal = t(df) # 신뢰구간 95%에 대한 t값 t025 = tVal.ppf(0.975) # 신뢰구간 95% 불확실성 범위 leftConf = mean2D - t025 * (sd2D / np.sqrt(df)) rightConf = mean2D + t025 * (sd2D / np.sqrt(df)) # 확장 불확도 extndUncrt = t025 * (sd2D / np.sqrt(df)) # 상대 불확도 (%) rltvUncrt = (extndUncrt * 100) / mean2D # 총 불확도 totalUncrt = (rltvUncrt * extndUncrt) / np.abs(extndUncrt) dtYear = 2019 keyInfo = 'land' # # meanTotalUncrt = np.nanmean(totalUncrt[keyInfo].values) # mainTitle = '[{}] {} {} ({:.2f})'.format(dtYear, keyInfo, 'total uncertainty', meanTotalUncrt) # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'totalUncertainty', dtYear) # rtnInfo = makeMapPlot(lon2D, lat2D, totalUncrt[keyInfo].values, mainTitle, saveImg, None) # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo)) # # keyInfo = 'intl_bunker' # meanTotalUncrt = np.nanmean(totalUncrt[keyInfo].values) # mainTitle = '[{}] {} {} ({:.2f})'.format(dtYear, keyInfo, 'total uncertainty', meanTotalUncrt) # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'totalUncertainty', dtYear) # rtnInfo = makeMapPlot(lon2D, lat2D, totalUncrt[keyInfo].values, mainTitle, saveImg, None) # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo)) # # totalUncrt['sum'] = totalUncrt['land'] + totalUncrt['intl_bunker'] # # keyInfo = 'sum' # meanTotalUncrt = np.nanmean(totalUncrt[keyInfo].values) # mainTitle = '[{}] {} {} ({:.2f})'.format(dtYear, keyInfo, 'total uncertainty', meanTotalUncrt) # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'totalUncertainty', dtYear) # rtnInfo = makeMapPlot(lon2D, lat2D, totalUncrt[keyInfo].values, mainTitle, saveImg, None) # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo)) # # # dsDataL3 = totalUncrt.to_dataframe().reset_index() # dsDataL4 = dsDataL3.merge(posDataL1, how='left', left_on=['lat', 'lon'], right_on=['lat', 'lon']) # # dsDataL3.describe() # posDataL1.describe() # # # from global_land_mask import globe # # is_on_land = globe.is_land(dsDataL3['lon'], dsDataL3['lat']) # # # # try: # # totalUncrtTotal = dsDataL4.mean() # # totalUncrtLandSea = dsDataL4.groupby(by=['landSea']).mean() # # totalUncrtCont = dsDataL4.groupby(by=['cont']).mean() # # # # emissionTotal = dsDataL4.mean()['mean'] # # emissionLandSea = dsDataL4.groupby(by=['landSea']).mean()['mean'] # # emissionCont = dsDataL4.groupby(by=['cont']).mean()['mean'] # # # # dict = { # # 'year': [dtYear] # # , 'key': [keyInfo] # # , 'rltvUncrt total': [rltvUncrtTotal] # # , 'rltvUncrt land': [rltvUncrtLandSea['land']] # # , 'rltvUncrt sea': [rltvUncrtLandSea['sea']] # # , 'rltvUncrt Africa': [rltvUncrtCont['Africa']] # # , 'rltvUncrt Antarctica': [rltvUncrtCont['Antarctica']] # # , 'rltvUncrt Asia': [rltvUncrtCont['Asia']] # # , 'rltvUncrt Australia': [rltvUncrtCont['Australia']] # # , 'rltvUncrt Europe': [rltvUncrtCont['Europe']] # # , 'rltvUncrt NorthAmerica': [rltvUncrtCont['NorthAmerica']] # # , 'rltvUncrt SouthAmerica': [rltvUncrtCont['SouthAmerica']] # # } # # # # statData = statData.append(pd.DataFrame.from_dict(dict)) # # # # # # mainTitle = '[{}] {} {}'.format(dtYear, keyInfo, 'emission') # # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'emission', dtYear) # # rtnInfo = makeMapPlot(lon2D, lat2D, mean2D['emission'].values, mainTitle, saveImg, True) # # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo)) # # # ******************************************************* # # 육/해상, 대륙별 배출량 # # ******************************************************* # keyList = ['total', 'Power', 'Industry', 'Residential', 'GroundTransportation', 'InternationalAviation', 'InternationalShipping', 'DomesticAviation'] # # statData = pd.DataFrame() # for i, keyInfo in enumerate(keyList): # # # dtYear = 2019 # for dtYear in range(2018, 2022): # log.info("[CHECK] keyInfo : {}".format(keyInfo)) # log.info("[CHECK] dtYear : {}".format(dtYear)) # # inpFilePattern = '{}_{}_{}*.nc'.format(serviceName, keyInfo, dtYear) # inpFile = '{}/{}'.format(globalVar['outPath'], inpFilePattern) # log.info("[CHECK] inpFile : {}".format(inpFile)) # # fileList = sorted(glob.glob(inpFile)) # log.info('[CHECK] fileList : {}'.format(fileList)) # if (len(fileList) < 1): continue # # dsData = xr.open_mfdataset(fileList) # # log.info('[CHECK] dsData : {}'.format(dsData)) # # time1D = dsData['time'].values # lon1D = dsData['lon'].values # lat1D = dsData['lat'].values # lon2D, lat2D = np.meshgrid(lon1D, lat1D) # # # 결측값 처리 # dsData = xr.where((dsData == 0), np.nan, dsData) # # # ***************************************************************************** # # 위/경도에 따른 통계 계산 # # ***************************************************************************** # cnt2D = dsData.count(['time']) # mean2D = dsData.mean(['time']) # sd2D = dsData.std(['time']) # sum2D = dsData.sum(['time']) # # # cntVal = np.nanmean(cnt2D['emission']) # # log.info('[CHECK] cntVal : {}'.format(cntVal)) # # # # sumVal = np.nanmean(sum2D['emission']) # # log.info('[CHECK] sumVal : {}'.format(sumVal)) # # # # meanVal = np.nanmean(mean2D['emission']) # # log.info('[CHECK] meanVal : {}'.format(meanVal)) # # # # sdVal = np.nanmean(sd2D['emission']) # # log.info('[CHECK] sdVal : {}'.format(sdVal)) # # # # ***************************************************************************** # # 확장/상대 불확도 계산 # # ***************************************************************************** # # 자유도 # df = len(time1D) # # # t값 # tVal = t(df) # # # 신뢰구간 95%에 대한 t값 # t025 = tVal.ppf(0.975) # # # 신뢰구간 95% 불확실성 범위 # # leftConf = mean2D - t025 * (sd2D / np.sqrt(df)) # # rightConf = mean2D + t025 * (sd2D / np.sqrt(df)) # # # 확장 불확도 # extndUncrt = t025 * (sd2D / np.sqrt(df)) # # # 상대 불확도 (%) # rltvUncrt = (extndUncrt * 100) / mean2D # # # NetCDF 생산 # dsDataL2 = xr.Dataset( # { # 'mean': (('lat', 'lon'), (mean2D['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'count': (('lat', 'lon'), (cnt2D['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'sd': (('lat', 'lon'), (sd2D['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'sum': (('lat', 'lon'), (sum2D['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'extndUncrt': (('lat', 'lon'), (extndUncrt['emission'].values).reshape(len(lat1D), len(lon1D))) # , 'rltvUncrt': (('lat', 'lon'), (rltvUncrt['emission'].values).reshape(len(lat1D), len(lon1D))) # } # , coords={ # 'lat': lat1D # , 'lon': lon1D # } # ) # # saveFile = '{}/{}_{}_{}_{}.nc'.format(globalVar['outPath'], serviceName, keyInfo, 'statData', dtYear) # os.makedirs(os.path.dirname(saveFile), exist_ok=True) # dsDataL2.to_netcdf(saveFile) # log.info('[CHECK] saveFile : {}'.format(saveFile)) # # dsDataL3 = dsDataL2.to_dataframe().reset_index() # dsDataL4 = dsDataL3.merge(posDataL1, how='left', left_on=['lat', 'lon'], right_on=['lat', 'lon']) # # try: # rltvUncrtTotal = dsDataL4.mean()['rltvUncrt'] # rltvUncrtLandSea = dsDataL4.groupby(by=['landSea']).mean()['rltvUncrt'] # rltvUncrtCont = dsDataL4.groupby(by=['cont']).mean()['rltvUncrt'] # # emissionTotal = dsDataL4.mean()['mean'] # emissionLandSea = dsDataL4.groupby(by=['landSea']).mean()['mean'] # emissionCont = dsDataL4.groupby(by=['cont']).mean()['mean'] # # dict = { # 'year': [dtYear] # , 'key': [keyInfo] # , 'rltvUncrt total': [rltvUncrtTotal] # , 'rltvUncrt land': [rltvUncrtLandSea['land']] # , 'rltvUncrt sea': [rltvUncrtLandSea['sea']] # , 'rltvUncrt Africa': [rltvUncrtCont['Africa']] # , 'rltvUncrt Antarctica': [rltvUncrtCont['Antarctica']] # , 'rltvUncrt Asia': [rltvUncrtCont['Asia']] # , 'rltvUncrt Australia': [rltvUncrtCont['Australia']] # , 'rltvUncrt Europe': [rltvUncrtCont['Europe']] # , 'rltvUncrt NorthAmerica': [rltvUncrtCont['NorthAmerica']] # , 'rltvUncrt SouthAmerica': [rltvUncrtCont['SouthAmerica']] # # , 'emission total': [emissionTotal] # , 'emission land': [emissionLandSea['land']] # , 'emission sea': [emissionLandSea['sea']] # , 'emission Africa': [emissionCont['Africa']] # , 'emission Antarctica': [emissionCont['Antarctica']] # , 'emission Asia': [emissionCont['Asia']] # , 'Australia': [emissionCont['Australia']] # , 'emission Europe': [emissionCont['Europe']] # , 'emission NorthAmerica': [emissionCont['NorthAmerica']] # , 'emission SouthAmerica': [emissionCont['SouthAmerica']] # } # # statData = statData.append(pd.DataFrame.from_dict(dict)) # except Exception as e: # log.error("Exception : {}".format(e)) # # # 시각화 # mainTitle = '[{}] {} {}'.format(dtYear, keyInfo, 'emission') # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'emission', dtYear) # rtnInfo = makeMapPlot(lon2D, lat2D, mean2D['emission'].values, mainTitle, saveImg, True) # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo)) # # mainTitle = '[{}] {} {}'.format(dtYear, keyInfo, 'relative uncertainty') # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'relativeUncertainty', dtYear) # rtnInfo = makeMapPlot(lon2D, lat2D, rltvUncrt['emission'].values, mainTitle, saveImg, None) # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo)) # # saveXlsxFile = '{}/{}_{}.xlsx'.format(globalVar['outPath'], serviceName, 'statData') # os.makedirs(os.path.dirname(saveXlsxFile), exist_ok=True) # statData.to_excel(saveXlsxFile, index=False) # log.info("[CHECK] saveXlsxFile : {}".format(saveXlsxFile)) except Exception as e: log.error("Exception : {}".format(e)) raise e finally: log.info('[END] {}'.format("exec"))
def get_era5_daily(var, date_from_arg, date_to_arg=None, reduce_func=None, cache_dir='era5', resample='1D'): """ Download and return an variable from the European Centre for Medium Range Weather Forecasts (ECMWF) global climate reanalysis product (ERA5) for a defined time window. Parameters ---------- var : string Name of the ERA5 climate variable to download, e.g "air_temperature_at_2_metres" date_from_arg: string or datetime object Starting date of the time window. date_to_arg: string or datetime object End date of the time window. If not supplied, set to be the same as starting date. reduce_func: numpy function lets you specify a function to apply to each day's worth of data. The default is np.mean, which computes daily average. To get a sum, use np.sum. cache_dir: sting Path to save downloaded ERA5 data. The path will be created if not already exists. The default is 'era5'. resample: string Temporal resampling frequency to be used for xarray's resample function. The default is '1D', which is daily. Since ERA5 data is provided as one file per month, maximum resampling period is '1M'. Returns ------- A lazy-loaded xarray dataset containing an ERA5 variable for the selected time window. """ # Massage input data assert var in ERA5_VARS, "var must be one of [{}] (got {})".format( ','.join(ERA5_VARS), var) if not os.path.exists(cache_dir): os.mkdir(cache_dir) if reduce_func is None: reduce_func = np.mean if type(date_from_arg) == str: date_from_arg = parse(date_from_arg) if type(date_to_arg) == str: date_to_arg = parse(date_to_arg) if date_to_arg is None: date_to_arg = date_from_arg # Make sure our dates are in the correct order from_date = min(date_from_arg, date_to_arg) to_date = max(date_from_arg, date_to_arg) # Download ERA5 files to local cache if they don't already exist client = None # Boto client (if needed) local_files = [] # Will hold list of local filenames Y, M = from_date.year, from_date.month # Loop vars loop_end = to_date.year * 12 + to_date.month # Loop sentinel while Y * 12 + M <= loop_end: local_file = os.path.join( cache_dir, "{Y:04}_{M:02}_{var}.nc".format(Y=Y, M=M, var=var)) data_key = "{Y:04}/{M:02}/data/{var}.nc".format(Y=Y, M=M, var=var) if not os.path.isfile( local_file ): # check if file already exists (TODO: move to temp, catch failed download) if client is None: client = boto3.client('s3', config=botocore.client.Config( signature_version=botocore.UNSIGNED)) client.download_file('era5-pds', data_key, local_file) local_files.append(local_file) if M == 12: Y += 1 M = 1 else: M += 1 # Load and merge the locally-cached ERA5 data from the list of filenames date_slice = slice(str(from_date.date()), str(to_date.date( ))) # I do this to INCLUDE the whole end date, not just 00:00 def prepro(ds): if 'time0' in ds.dims: ds = ds.rename({"time0": "time"}) if 'time1' in ds.dims: ds = ds.rename({ "time1": "time" }) # This should INTENTIONALLY error if both times are defined ds = ds[[var]] output = ds.sel(time=date_slice).resample( time=resample).reduce(reduce_func) output.attrs = ds.attrs for v in output.data_vars: output[v].attrs = ds[v].attrs return output return xr.open_mfdataset(local_files, combine='by_coords', compat='equals', preprocess=prepro, parallel=True)
# open L2 products print('Loading data\n') def preprocess(ds): ds['time'] = pd.to_datetime( np.array([ attributes[ds.attrs['source_product']]['time_coverage_start'] ])).values return ds DS = xr.open_mfdataset([ filename.replace('L2', 'L3') for filename in L2_files_urls if exists(filename.replace('L2', 'L3')) ], combine='nested', concat_dim='time', preprocess=preprocess, chunks={'time': 2000}) DS = DS.sortby('time') # filter pixels if args.shp is not None: print('\nApplying shapefile\n') mask = make_country_mask(args.shp, DS.longitude, DS.latitude) for column in [ column_name for column_name in list(DS.variables) if DS[column_name].dims == ('time', 'latitude', 'longitude') ]: DS[column] = DS[column].where(mask)
def nc_to_xr_dataset(liste_fichiers_nc, verbose=1, patch_xr_open_mfdataset=True): """ fonction qui lit une liste de fichiers netcdf et qui retourne un Dataset xarray """ assert (isinstance(liste_fichiers_nc, list)) assert (len(liste_fichiers_nc) > 0) # timing t00 = time.time() # impression de la liste des fichiers traites si verbose if verbose: print('ouverture des fichiers suivants:') for f in liste_fichiers_nc: print(f) # patch pour contourner la lenteur de xr.open_mfdataset # # xr.open_mfdataset prend environ 10x plus de temps que # nc.MFDataset. On peut couper le temps de moitie en # utilisant l'option decode_cf=False et xr.decode_cf() ensuite # mais la conversion en date ne fonctionne pas si le time unit # n'est pas identique pour tous les fichiers. # # la patch utilise nc.MFDataset pour calculer les dates et les # impose dataset retourne par xr.open_mfdataset avec # decode_cf=False # if patch_xr_open_mfdataset: # on extrait les dates avec netCDF4 ds = nc.MFDataset(liste_fichiers_nc, 'r') vartime = nc.MFTime(ds.variables['time']) dates = cftime.num2date(vartime[:], vartime.units, vartime.calendar) # on verifie si une variable est de type int16 def _has_int16(ds): for nv in ds.variables: if ds.variables[nv].dtype == 'int16': return True return False has_int16 = _has_int16(ds) ds.close() # on ouvre les fichiers sans decoder time # # test selon que le dataset a des int16 ou non # # avec un champ de type short ou int16, la commande xr.decode_cf # n'applique pas le scale_factor et add_offset # # todo: raise an issue about this if has_int16: # option plus lente mais qui decode les int16 print('*** il y a des int16 ***') ds = xr.open_mfdataset(liste_fichiers_nc, decode_times=False, combine='nested', concat_dim='time') else: # option plus rapide mais qui ne decode pas les int16 ds = xr.open_mfdataset(liste_fichiers_nc, decode_cf=False, combine='nested', concat_dim='time', coords='minimal', compat='override', data_vars='minimal') ds = xr.decode_cf(ds) # on drop la variable time pour etre sur qu'il ne # reste pas d'attributs causant des problemes plus # tard ds = ds.drop('time') # on pose la variable time comme etant les dates calculees avant ds['time'] = ('time', dates) # on ajoute les attributs units et calendar ds.time.attrs['units'] = vartime.units ds.time.attrs['calendar'] = vartime.calendar else: ds = xr.open_mfdataset(liste_fichiers_nc) if verbose: print( f'ouverture de {len(liste_fichiers_nc)} fichiers en {time.time() - t00:6.2f}s' ) return ds
if pressure_adjust: ds = get_pressure_coord_fields(case, varlist, from_time, to_time, history_fld, model=model) return ds else: if varlist is not None: fl = [] vl_lacking = [] for var in varlist: fn = get_filename_ng_field(var, model, case, from_time, to_time) if os.path.isfile(fn): fl.append(fn) else: vl_lacking.append(var) else: vl_lacking=varlist ds = xr_import_NorESM(case, vl_lacking, from_time, to_time, path=raw_data_path, model=model, history_fld=history_fld, comp=comp, chunks=chunks) ds = xr_fix(ds, model_name=model) if len(fl)>0: ds_f_file = xr.open_mfdataset(fl, combine='by_coords') ds = xr.merge([ds, ds_f_file]) return ds
'/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_199801-199812.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_199901-199912.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200001-200012.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200101-200112.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200201-200212.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200301-200312.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200401-200412.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200501-200512.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200601-200612.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200701-200712.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200801-200812.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200901-200912.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_201001-201012.nc', '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_201101-201112.nc' ] elif variable == "tas": pathList = [ '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/tas/tas_Amon_reanalysis_IFS-Cy31r2_197901-201712.nc' ] else: raise Exception(f"Unknown variable: {variable}") start = time.time() dset: xr.Dataset = xr.open_mfdataset(pathList, data_vars=[variable], parallel=True) var: xr.Variable = dset.variables.get(variable) print( f"Opened dataset, shape: {var.shape}, completed in {str(time.time() - start)} seconds" )
def ocn_modelvsobs(config, field): """ Plots a comparison of ACME/MPAS output to SST or MLD observations Parameters ---------- config : instance of MpasAnalysisConfigParser Contains configuration options field : {'sst', 'sss', 'mld'} The name of a field to be analyized Authors ------- Luke Van Roekel, Xylar Asay-Davis, Milena Veneziani Last Modified ------------- 03/23/2017 """ # perform common setup for the task namelist, runStreams, historyStreams, calendar, streamMap, \ variableMap, plotsDirectory = setup_task(config, componentName='ocean') simulationStartTime = get_simulation_start_time(runStreams) # get a list of timeSeriesStats output files from the streams file, # reading only those that are between the start and end dates startDate = config.get('climatology', 'startDate') endDate = config.get('climatology', 'endDate') streamName = historyStreams.find_stream(streamMap['timeSeriesStats']) inputFiles = historyStreams.readpath(streamName, startDate=startDate, endDate=endDate, calendar=calendar) print 'Reading files {} through {}'.format(inputFiles[0], inputFiles[-1]) observationsDirectory = build_config_full_path( config, 'oceanObservations', '{}Subdirectory'.format(field)) mainRunName = config.get('runs', 'mainRunName') overwriteMpasClimatology = config.getWithDefault( 'climatology', 'overwriteMpasClimatology', False) overwriteObsClimatology = config.getWithDefault('oceanObservations', 'overwriteObsClimatology', False) try: restartFileName = runStreams.readpath('restart')[0] except ValueError: raise IOError('No MPAS-O restart file found: need at least one ' 'restart file for ocn_modelvsobs calculation') sectionName = 'regridded{}'.format(field.upper()) outputTimes = config.getExpression(sectionName, 'comparisonTimes') # get a list of regridded observations files and check if they exist. If # they are all there, we don't have to do anything else with the # observations obsFileNames = \ {'mld': "{}/holtetalley_mld_climatology.nc".format( observationsDirectory), 'sst': "{}/MODEL.SST.HAD187001-198110.OI198111-201203.nc".format( observationsDirectory), 'sss': "{}/Aquarius_V3_SSS_Monthly.nc".format( observationsDirectory)} obsFileName = obsFileNames[field] buildObsClimatologies = overwriteObsClimatology for months in outputTimes: (climatologyFileName, regriddedFileName) = \ climatology.get_observation_climatology_file_names( config=config, fieldName=field, monthNames=months, componentName='ocean', gridFileName=obsFileName, latVarName='lat', lonVarName='lon') if not os.path.exists(regriddedFileName): buildObsClimatologies = True break varList = [field] if field == 'mld': iselvals = None if buildObsClimatologies: # Load MLD observational data dsObs = xr.open_mfdataset(obsFileName) # Increment month value to be consistent with the model output dsObs.iMONTH.values += 1 # Rename the dimensions to be consistent with other obs. data sets dsObs.rename( { 'month': 'calmonth', 'lat': 'latCoord', 'lon': 'lonCoord' }, inplace=True) dsObs.rename({ 'iMONTH': 'month', 'iLAT': 'lat', 'iLON': 'lon' }, inplace=True) # set the coordinates now that the dimensions have the same names dsObs.coords['lat'] = dsObs['latCoord'] dsObs.coords['lon'] = dsObs['lonCoord'] dsObs.coords['month'] = dsObs['calmonth'] # Reorder dataset for consistence with other obs. data sets dsObs = dsObs.transpose('month', 'lat', 'lon') obsFieldName = 'mld_dt_mean' # Set appropriate MLD figure labels observationTitleLabel = \ "Observations (HolteTalley density threshold MLD)" outFileLabel = "mldHolteTalleyARGO" unitsLabel = 'm' elif field == 'sst': iselvals = {'nVertLevels': 0} climStartYear = config.getint('oceanObservations', 'sstClimatologyStartYear') climEndYear = config.getint('oceanObservations', 'sstClimatologyEndYear') timeStart = datetime.datetime(year=climStartYear, month=1, day=1) timeEnd = datetime.datetime(year=climEndYear, month=12, day=31) if climStartYear < 1925: period = 'pre-industrial' else: period = 'present-day' if buildObsClimatologies: dsObs = xr.open_mfdataset(obsFileName) dsTimeSlice = dsObs.sel(time=slice(timeStart, timeEnd)) monthlyClimatology = dsTimeSlice.groupby('time.month').mean('time') dsObs = monthlyClimatology.transpose('month', 'lat', 'lon') obsFieldName = 'SST' # Set appropriate figure labels for SST observationTitleLabel = \ "Observations (Hadley/OI, {} {:04d}-{:04d})".format(period, climStartYear, climEndYear) outFileLabel = "sstHADOI" unitsLabel = r'$^o$C' elif field == 'sss': iselvals = {'nVertLevels': 0} timeStart = datetime.datetime(2011, 8, 1) timeEnd = datetime.datetime(2014, 12, 31) if buildObsClimatologies: dsObs = xr.open_mfdataset(obsFileName) dsTimeSlice = dsObs.sel(time=slice(timeStart, timeEnd)) # The following line converts from DASK to numpy to supress an odd # warning that doesn't influence the figure output dsTimeSlice.SSS.values monthlyClimatology = dsTimeSlice.groupby('time.month').mean('time') # Rename the observation data for code compactness dsObs = monthlyClimatology.transpose('month', 'lat', 'lon') obsFieldName = 'SSS' observationTitleLabel = "Observations (Aquarius, 2011-2014)" outFileLabel = 'sssAquarius' unitsLabel = 'PSU' ds = open_multifile_dataset(fileNames=inputFiles, calendar=calendar, config=config, simulationStartTime=simulationStartTime, timeVariableName='Time', variableList=varList, iselValues=iselvals, variableMap=variableMap, startDate=startDate, endDate=endDate) changed, startYear, endYear = \ climatology.update_start_end_year(ds, config, calendar) monthlyClimatology = climatology.compute_monthly_climatology(ds, calendar) mpasMappingFileName = climatology.write_mpas_mapping_file( config=config, meshFileName=restartFileName) if buildObsClimatologies: obsMappingFileName = \ climatology.write_observations_mapping_file( config=config, componentName='ocean', fieldName=field, gridFileName=obsFileName, latVarName='lat', lonVarName='lon') else: obsMappingFileName = None (colormapResult, colorbarLevelsResult) = setup_colormap(config, sectionName, suffix='Result') (colormapDifference, colorbarLevelsDifference) = setup_colormap(config, sectionName, suffix='Difference') # Interpolate and compute biases for months in outputTimes: monthValues = constants.monthDictionary[months] (climatologyFileName, regriddedFileName) = \ climatology.get_mpas_climatology_file_names(config=config, fieldName=field, monthNames=months) if overwriteMpasClimatology or not os.path.exists(climatologyFileName): seasonalClimatology = climatology.compute_seasonal_climatology( monthlyClimatology, monthValues, field) # write out the climatology so we can interpolate it with # interpolate.remap seasonalClimatology.to_netcdf(climatologyFileName) interpolate.remap(inFileName=climatologyFileName, outFileName=regriddedFileName, inWeightFileName=mpasMappingFileName, sourceFileType='mpas', overwrite=overwriteMpasClimatology) ncFile = netCDF4.Dataset(regriddedFileName, mode='r') modelOutput = ncFile.variables[field][:] lons = ncFile.variables["lon"][:] lats = ncFile.variables["lat"][:] ncFile.close() lonTarg, latTarg = np.meshgrid(lons, lats) # now the observations (climatologyFileName, regriddedFileName) = \ climatology.get_observation_climatology_file_names( config=config, fieldName=field, monthNames=months, componentName='ocean', gridFileName=obsFileName, latVarName='lat', lonVarName='lon') if buildObsClimatologies: if (overwriteObsClimatology or (not os.path.exists(climatologyFileName) and not os.path.exists(regriddedFileName))): seasonalClimatology = climatology.compute_seasonal_climatology( dsObs, monthValues, obsFieldName) # Either we want to overwite files or neither the climatology # nor its regridded counterpart exist. Write out the # climatology so we can interpolate it with interpolate.remap seasonalClimatology.to_netcdf(climatologyFileName) if obsMappingFileName is None: # no remapping is needed regriddedFileName = climatologyFileName else: interpolate.remap(inFileName=climatologyFileName, outFileName=regriddedFileName, inWeightFileName=obsMappingFileName, sourceFileType='latlon', overwrite=overwriteObsClimatology) # read in the results from the remapped files ncFile = netCDF4.Dataset(regriddedFileName, mode='r') observations = ncFile.variables[obsFieldName][:] ncFile.close() bias = modelOutput - observations outFileName = "{}/{}_{}_{}_years{:04d}-{:04d}.png".format( plotsDirectory, outFileLabel, mainRunName, months, startYear, endYear) title = "{} ({}, years {:04d}-{:04d})".format(field.upper(), months, startYear, endYear) plot_global_comparison(config, lonTarg, latTarg, modelOutput, observations, bias, colormapResult, colorbarLevelsResult, colormapDifference, colorbarLevelsDifference, fileout=outFileName, title=title, modelTitle="{}".format(mainRunName), obsTitle=observationTitleLabel, diffTitle="Model-Observations", cbarlabel=unitsLabel)
urls = re.findall(r'href=[\'"]?([^\'" >]+)', datasets) x = re.findall(r'(ooi/.*?.nc)', datasets) for i in x: if i.endswith('.nc') == False: x.remove(i) for i in x: try: float(i[-4]) except: x.remove(i) datasets = [os.path.join(tds_url, i) for i in x] datasets # Load all files into a single xarray dataset: ds = xr.open_mfdataset(datasets) ds = ds.swap_dims({'obs': 'time'}) ds = ds.chunk({'time': 100}) ds = ds.sortby( 'time' ) # data from different deployments can overlap so we want to sort all data by time stamp. ds # Create dataframe df = ds.to_dataframe() # comment these lines back in, if you want to check the sampling frequency (takes a while) #res = (pd.Series(df.index[1:]) - pd.Series(df.index[:-1])).value_counts() #res # Choose a variable to examine:
def load(self): # load raw_data ie array of each images xr.merge([xr.open_dataset(f) for f in glob.glob(self.path + '/*.nc')]) # merge different files from the given path ds = xr.open_mfdataset(self.path + '/*.nc') # load the file as dataset self.raw_data = np.array(ds.variables[self.data_key]) self.dataset = [self.raw_data]
import xarray as xr #url="http://10.10.11.103:5000/thredds/dodsC/historical/2017-03/*.grb2" #url="http://10.10.11.103:5000/thredds/catalog/historical/2017-03/catalog.html?dataset=historical_grib/2017-03/2017-03-25-06.grb2" #url="http://10.10.11.103:5000/thredds/catalog/historical/2017-03/catalog.html?dataset=historical_grib/2017-03/2017-03-25-18.grb2" #url="http://10.10.11.103:5000/thredds/dodsC/historical/2017-03/2017-03-25-18.grb2.html" #url="http://10.10.11.103:5000/thredds/fileServer/historical/2017-03/2017-03-25-18.grb2" url = 'http://10.10.11.103:5000/thredds/dodsC/historical/2017-03/2017-03-25-18.grb2?Temperature_surface' #ds=xr.open_mfdataset(url,engine='pydap') print(url) ds = xr.open_dataset(url) print(ds) ds2 = xr.open_mfdataset([url]) print(ds2)
# Check if it is the most recent forecast # If so grab 48hr forecast if (x.datetime[-1] == c_for_end): x = x.isel(datetime=np.arange(1, 47)) # Otherwise only grab the 12 hours else: x = x.isel( datetime=np.arange(1, 25) ) # Grab forecast hours 02 - 25(01) (we can't use the first 01 forecsat hour, because radiation vars were saved accumulated, thus we don't have the first value. This is fine we just use later forecast hours by 1 hour x.load() return x ds = xr.open_mfdataset(all_files, concat_dim='datetime', engine='netcdf4', preprocess=lambda x: preprocess(x)) # Adjust to local time zone (i.e. from UTC to MST, local_time_offset should = -7) ds['datetime'] = pd.to_datetime( ds.datetime.values) + datetime.timedelta(hours=local_time_offset) # Move to ascii dir if not os.path.isdir(ascii_dir): os.mkdir(ascii_dir) os.chdir(ascii_dir) # Extract grid cells we want to export print 'Extracting cells within lat/long box' ds = ds.where((ds.gridlat_0 > lat_r[0]) & (ds.gridlat_0 < lat_r[1]) & (ds.gridlon_0 > lon_r[0]) & (ds.gridlon_0 < lon_r[1]),
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Feb 22 11:00:05 2019 @author: julia_wagemann """ import xarray as xr from era5_in_gee_functions import createFileList import time execTime = time.time() directory = '/Volumes/FREECOM HDD/era5_tp/nc/1979/' month_list = [ '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12' ] for i in month_list: fileList = createFileList(directory, './era5_tp_1979_' + i + '*') fileList.sort() print(fileList) array = xr.open_mfdataset(fileList) outFileName = '/Volumes/FREECOM HDD/era5_tp/nc/monthly/1979/era5_tp_1979_' + i + '.nc' array.resample(time='1M').sum().to_netcdf(outFileName, mode='w', compute=True) print("The script took {0} second !".format(time.time() - execTime))
def __init__( self, out_var=None, out_mean=None, forecast_dir=None, forcings_dir=None, reanalysis_dir=None, transform=None, hparams=None, **kwargs, ): """ Constructor for the ModelDataset class :param out_var: Variance of the output variable, defaults to None :type out_var: float, optional :param out_mean: Mean of the output variable, defaults to None :type out_mean: float, optional :param forecast_dir: The directory containing the FWI-Forecast data, defaults \ to None :type forecast_dir: str, optional :param forcings_dir: The directory containing the FWI-Forcings data, defaults \ to None :type forcings_dir: str, optional :param reanalysis_dir: The directory containing the FWI-Reanalysis data, \ to defaults to None :type reanalysis_dir: str, optional :param transform: Custom transform for the input variable, defaults to None :type transform: torch.transforms, optional :param hparams: Holds configuration values, defaults to None :type hparams: Namespace, optional """ super().__init__( out_var=out_var, out_mean=out_mean, forecast_dir=forecast_dir, forcings_dir=forcings_dir, reanalysis_dir=reanalysis_dir, transform=transform, hparams=hparams, **kwargs, ) # Number of input and prediction days assert (self.hparams.in_days > 0 and self.hparams.out_days > 0 ), "The number of input and output days must be > 0." self.n_input = self.hparams.in_days self.n_output = self.hparams.out_days # Generate the list of all valid files in the specified directories get_inp_time = (lambda x: int(x.split("_20")[1][:2]) * 10000 + int( x.split("_20")[1][2:].split("_1200_hr_")[0][:2]) * 100 + int( x.split("_20")[1][2:].split("_1200_hr_")[0][2:])) inp_files = sorted( sorted(glob(f"{forcings_dir}/ECMWF_FO_20*.nc")), # Extracting the month and date from filenames to sort by time. key=get_inp_time, ) get_out_time = (lambda x: int(x[-24:-22]) * 10000 + int(x[-22:-20]) * 100 + int(x[-20:-18])) out_files = sorted( glob(f"{reanalysis_dir}/ECMWF_FWI_20*_1200_hr_fwi_e5.nc"), # Extracting the month and date from filenames to sort by time. key=get_out_time, ) # Loading list of test-set files if self.hparams.test_set: with open(self.hparams.test_set, "rb") as f: test_out = pickle.load(f) time_indices = set(map(get_inp_time, inp_files)) inp_index = { k: [x for x in inp_files if get_inp_time(x) == k] for k in time_indices } test_inp = sum( [ inp_index[t] for f in test_out for t in (get_out_time(f), ) ], [], ) # Handling the input and output files using test-set files if not self.hparams.dry_run and "test_inp" in locals(): if hasattr(self.hparams, "eval"): inp_files = test_inp out_files = test_out else: inp_files = list(set(inp_files) - set(test_inp)) out_files = list(set(out_files) - set(test_out)) if self.hparams.dry_run: inp_files = inp_files[:8 * (self.n_output + self.n_input)] out_files = out_files[:2 * (self.n_output + self.n_input)] # Align the output files with the input files offset = len(out_files) - len(inp_files) // 4 out_files = out_files[offset:] if offset > 0 else out_files # Checking for valid date format out_invalid = lambda x: not (1 <= int(x[-22:-20]) <= 12 and 1 <= int(x[ -20:-18]) <= 31) assert not (sum([out_invalid(x) for x in out_files ])), ("Invalid date format for output file(s)." "The dates should be formatted as YYMMDD.") self.out_files = out_files inp_invalid = lambda x: not ( 1 <= int(x.split("_20")[1][2:].split("_1200_hr_")[0][:2]) <= 12 and 1 <= int(x.split("_20")[1][2:].split("_1200_hr_")[0][2:]) <= 31) assert not (sum([inp_invalid(x) for x in inp_files ])), ("Invalid date format for input file(s)." "The dates should be formatted as YYMMDD.") self.inp_files = inp_files # Consider only ground truth and discard forecast values preprocess = lambda x: x.isel(time=slice(0, 1)) with xr.open_mfdataset( inp_files, preprocess=preprocess, engine="h5netcdf", parallel=False if self.hparams.dry_run else True, combine="by_coords", ) as ds: self.input = ds.load() with xr.open_mfdataset( out_files, preprocess=preprocess, engine="h5netcdf", parallel=False if self.hparams.dry_run else True, combine="by_coords", ) as ds: self.output = ds.load() # Ensure timestamp matches for both the input and output assert self.output.fwi.time.min(skipna=True) == self.input.rh.time.min( skipna=True) assert self.output.fwi.time.max(skipna=True) == self.input.rh.time.max( skipna=True) assert len(self.input.time) == len(self.output.time) log.info( f"Start date: {self.output.fwi.time.min(skipna=True)}", f"\nEnd date: {self.output.fwi.time.max(skipna=True)}", ) # Loading the mask for output variable if provided as generating from NaN mask self.mask = (torch.nn.functional.max_pool2d( (~torch.from_numpy( np.load(self.hparams.mask) if self.hparams.mask else ~np.isnan(self.output["fwi"][0].values))).unsqueeze(0).float(), kernel_size=3, stride=1, padding=1, ).squeeze() == 0).cuda() # Mean of output variable used for bias-initialization. self.out_mean = out_mean if out_mean else 15.292629 # Variance of output variable used to scale the training loss. self.out_var = (out_var if out_var else 18.819166 if self.hparams.loss == "mae" else 414.2136 if self.hparams.mask else 621.65894) # Input transforms including mean and std normalization self.transform = ( transform if transform else transforms.Compose([ transforms.ToTensor(), # Mean and standard deviation stats used to normalize the input data # to the mean of zero and standard deviation of one. transforms.Normalize( [ x for i in range(self.n_input) for x in ( 72.47605, 279.96622, 2.4548044, 6.4765906, ) ], [ x for i in range(self.n_input) for x in ( 17.7426847, 21.2802498, 6.3852794, 3.69688883, ) ], ), ]))
# In[6]: ############################################################# # Load in Data ############################################################# E = ed.EsioData.load() # In[7]: # Load obs already aggregated by region import timeit ds_obs = xr.open_mfdataset(E.obs['NSIDC_0081']['sipn_nc']+'_yearly_agg/*.nc', concat_dim='time') ds_obs = ds_obs.Extent # use smoothed obs to compute damped anom # 10 days is assumed but would be better to embed this smoothing window in alpha # and then use it here ds_obs_smooth = ds_obs.rolling(time=10, min_periods=1, center=True).mean() print(ds_obs.region_names.values) # In[8]: # Load obs already aggregated by region, these are also computed after smoothing # the obs with 10 day running mean ds_climo = xr.open_mfdataset(E.obs['NSIDC_0079']['sipn_nc']+'_yearly_agg_climatology/*.nc', concat_dim='time')
def main(config_path): config = {} with open(config_path) as f_config: config = json.load(f_config) doms = sorted( set( map(lambda x: x.split('_')[0], os.listdir(os.path.join(config['output-wrf']))))) ds = [ xr.open_mfdataset(os.path.join(config['output-wrf'], '{}*.nc'.format(dom)), concat_dim='time') for dom in doms ] extents = { 'ireland': [-12, -3, 51, 55.5], 'europe': [ ds[0].lon.min(), ds[0].lon.max(), ds[0].lat.min(), ds[0].lat.max() - 1 ] } for z in zip(*map(lambda x: list(x.groupby('time')), ds)): for i, (t, d) in enumerate(z): is_fst = i == 0 is_lst = i == len(doms) - 1 t = pd.to_datetime(t) t_save = t.strftime('%Y%m%d%H%M') if is_fst: print(t) print('\t{} - temperature & pressure (Ireland)'.format(i)) figs = ['t2-p-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, d.t2 - 273.15, fig=figs[-1], newfig=is_fst, t=t, levels=LEVELS['t2-ir'], norm=MidpointNormalize(midpoint=0), cmap=CMAP['t2'], extent=extents['ireland'], extend='both', label='$^o$C', title='Temperature and Pressure', colorbar=is_lst, config=config) if is_fst: plot2d(d.lon.loc[extents['ireland'][0]:extents['ireland'][1]], d.lat.loc[extents['ireland'][2]:extents['ireland'][3]], d.p_sl.sel(lon=slice(extents['ireland'][0], extents['ireland'][1]), lat=slice(extents['ireland'][2], extents['ireland'][3])) * 1e-2, fig=figs[-1], newfig=False, levels_n=10, what='contour', config=config) print('\t{} - temperature & pressure (Europe)'.format(i)) figs += ['t2-p-e_{}'.format(t_save)] plot2d(d.lon, d.lat, d.t2 - 273.15, fig=figs[-1], newfig=is_fst, t=t, levels=LEVELS['t2-e'], norm=MidpointNormalize(midpoint=0), cmap=CMAP['t2'], extent=extents['europe'], extend='both', label='$^o$C', title='Temperature and Pressure', colorbar=is_lst, config=config) if is_fst: plot2d(d.lon, d.lat, d.p_sl * 1e-2, fig=figs[-1], newfig=False, levels_n=20, what='contour', config=config) print('\t{} - rain (Ireland)'.format(i)) figs += ['rain-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, d.rain, fig=figs[-1], newfig=is_fst, t=t, levels=LEVELS['rain'], cmap=CMAP['rain'], extent=extents['ireland'], label='mm/h', format='%.1f', title='Precipitation', colorbar=is_lst, config=config) print('\t{} - rain (Europe)'.format(i)) figs += ['rain-e_{}'.format(t_save)] plot2d(d.lon, d.lat, d.rain, fig=figs[-1], newfig=is_fst, t=t, levels=LEVELS['rain'], cmap=CMAP['rain'], extent=extents['europe'], label='mm/h', format='%.1f', title='Precipitation', colorbar=is_lst, config=config) print('\t{} - wind (Ireland)'.format(i)) step = 2 figs += ['wind-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, xu.sqrt(d.u10**2 + d.v10**2), fig=figs[-1], newfig=is_fst, t=t, levels=LEVELS['wind'], extent=extents['ireland'], cmap=CMAP['wind'], label='m/s', format='%.0f', title='Wind speed & direction', colorbar=is_lst, config=config) if is_fst: plot2d(d.lon[::step], d.lat[::step], d.isel(lat=slice(None, None, step), lon=slice(None, None, step)), fig=figs[-1], newfig=False, t=t, what='quiver', config=config) print('\t{} - wind (Europe)'.format(i)) step = 4 figs += ['wind-e_{}'.format(t_save)] plot2d(d.lon, d.lat, xu.sqrt(d.u10**2 + d.v10**2), fig=figs[-1], newfig=is_fst, t=t, levels=LEVELS['wind'], extent=extents['europe'], cmap=CMAP['wind'], label='m/s', format='%.0f', title='Wind speed & direction', colorbar=is_lst, config=config) if is_fst: plot2d(d.lon[::step], d.lat[::step], d.isel(lat=slice(None, None, step), lon=slice(None, None, step)), fig=figs[-1], newfig=False, t=t, what='quiver', config=config) print('\t -PM2.5 (Ireland)') figs += ['pm25-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, d.pm25[0, :, :], fig=figs[-1], newfig=is_fst, t=t, extent=extents['ireland'], levels=LEVELS['pm25'], cmap=CMAP['a'], label='PM2.5 (ug/m$^3$)', format='%.1f', title='PM2.5', colorbar=is_lst, config=config) print('\t -PM2.5 (Europe)') figs += ['pm25-eu_{}'.format(t_save)] plot2d(d.lon, d.lat, d.pm25[0, :, :], fig=figs[-1], newfig=is_fst, t=t, extent=extents['europe'], levels=LEVELS['pm25'], cmap=CMAP['a'], label='PM2.5 (ug/m$^3$)', format='%.1f', title='PM2.5', colorbar=is_lst, config=config) print('\t -PM10 (Ireland)') figs += ['pm10-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, d.pm10[0, :, :], fig=figs[-1], newfig=is_fst, t=t, extent=extents['ireland'], levels=LEVELS['pm10'], cmap=CMAP['a'], label='PM10 (ug/m$^3$)', format='%.1f', title='PM10', colorbar=is_lst, config=config) print('\t -PM10 (Europe)') figs += ['pm10-eu_{}'.format(t_save)] plot2d(d.lon, d.lat, d.pm10[0, :, :], fig=figs[-1], newfig=is_fst, t=t, extent=extents['europe'], levels=LEVELS['pm10'], cmap=CMAP['a'], label='PM10 (ug/m$^3$)', format='%.1f', title='PM10', colorbar=is_lst, config=config) print('\t -SO2 (Ireland)') figs += ['so2-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, d.so2_concentration[0, :, :] * 1e3, fig=figs[-1], newfig=is_fst, t=t, extent=extents['ireland'], extend='both', levels=LEVELS['so2'], cmap=CMAP['a'], label='SO2 (ppbv)', format='%.1f', title='SO2', colorbar=is_lst, config=config) print('\t -SO2 (Europe)') figs += ['so2-eu_{}'.format(t_save)] plot2d(d.lon, d.lat, d.so2_concentration[0, :, :] * 1e3, fig=figs[-1], newfig=is_fst, t=t, extent=extents['europe'], extend='both', levels=LEVELS['so2'], cmap=CMAP['a'], label='SO2 (ppbv)', format='%.1f', title='SO2', colorbar=is_lst, config=config) print('\t -O3 (Ireland)') figs += ['o3-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, d.o3_concentration[0, :, :] * 1e3, fig=figs[-1], newfig=is_fst, t=t, extent=extents['ireland'], levels=LEVELS['o3'], cmap=CMAP['a'], label='O3 (ppbv)', format='%.1f', title='O3', colorbar=is_lst, config=config) print('\t -O3 (Europe)') figs += ['o3-eu_{}'.format(t_save)] plot2d(d.lon, d.lat, d.o3_concentration[0, :, :] * 1e3, fig=figs[-1], newfig=is_fst, t=t, extent=extents['europe'], levels=LEVELS['o3'], cmap=CMAP['a'], label='O3 (ppbv)', format='%.1f', title='O3', colorbar=is_lst, config=config) print('\t -NOx (Ireland)') figs += ['nox-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, d.nox_concentration[0, :, :] * 1e3, fig=figs[-1], newfig=is_fst, t=t, extent=extents['ireland'], extend='both', levels=LEVELS['nox'], cmap=CMAP['a'], label='NOx (ppbv)', format='%.1f', title='NOx', colorbar=is_lst, config=config) print('\t -NOx (Europe)') figs += ['nox-eu_{}'.format(t_save)] plot2d(d.lon, d.lat, d.nox_concentration[0, :, :] * 1e3, fig=figs[-1], newfig=is_fst, t=t, extent=extents['europe'], extend='both', levels=LEVELS['nox'], cmap=CMAP['a'], label='NOx (ppbv)', format='%.1f', title='NOx', colorbar=is_lst, config=config) # plot2d( # d.lon, d.lat, d.rh, # fig=figs[-1], newfig=is_fst, t=t, # extend=extents['europe'], # levels=LEVELS['pm25'], cmap=CMAP['a'], # label='PM2.5 (ug/m$^2$)', # title='PM2.5', # config=config) print('\t{} - relative humidity (Ireland)'.format(i)) figs += ['rh-ir_{}'.format(t_save)] plot2d(d.lon, d.lat, d.rh, fig=figs[-1], newfig=is_fst, t=t, extent=extents['ireland'], levels=LEVELS['rh'], cmap=CMAP['rh'], label='%', format='%.1f', title='Relative Humidity', colorbar=is_lst, config=config) print('\t{} - relative humidity (Europe)'.format(i)) figs += ['rh-e_{}'.format(t_save)] plot2d(d.lon, d.lat, d.rh, fig=figs[-1], newfig=is_fst, t=t, extent=extents['europe'], levels=LEVELS['rh'], cmap=CMAP['rh'], label='%', format='%.1f', title='Relative Humidity', colorbar=is_lst, config=config) for fig in figs: plt.figure(fig) plt.savefig(os.path.join(config['imgs'], fig)) plt.close(fig)
def draw_sst(sel_year, sel_month): st = time.time() np.seterr(divide='ignore', invalid='ignore') print("darwing sst plot for " + str(sel_month).zfill(2) + " " + str(sel_year)) files_cli = sorted( glob.glob( os.path.join( '/home/alley/work/Dong/mongo/seasonal_analysis/data/data/download_from_mongo/cli', 'sst_*.grb'))) f_cli = xr.open_mfdataset(files_cli, concat_dim="time", combine="nested", engine="cfgrib", parallel=True) h_cli = f_cli["sst"] h_cli_ori = h_cli.mean(dim="time").values file_cur = "/home/alley/new_disk/data/sst_" + str(sel_year) + str( sel_month).zfill(2) + ".grb" f_cur = xr.open_mfdataset(file_cur, engine="cfgrib", parallel=True) h_cur_ori = f_cur["sst"] h_cur = (h_cur_ori.values - 273.15) h_cur = np.nan_to_num(h_cur, nan=-999) lat = f_cur["latitude"].values lon = f_cur["longitude"].values h_ano = h_cur_ori - h_cli_ori h_ano = np.nan_to_num(h_ano.values, nan=-999) # print(h_ano) et1 = time.time() # print(et1 - st) # leftString = "SST in " + str(sel_month) + str(sel_year) # rightString = "~S~o~N~C" wks_type = 'png' wks = Ngl.open_wks( wks_type, '/home/alley/work/Dong/mongo/seasonal_analysis/images/sst_' + str(sel_year) + str(sel_month).zfill(2) + '.png') res = Ngl.Resources() res.nglFrame = False res.nglDraw = False res.mpLimitMode = "LatLon" # res.mpFillOn = True #-- turn on map fill # res.mpLandFillColor = "gray" #-- change land color to gray # res.mpMinLonF= 50 # res.mpMaxLonF = 280 res.mpMinLatF = -45 res.mpMaxLatF = 45 res.cnFillOn = True res.mpCenterLonF = 120 res.sfMissingValueV = -999 res.sfXArray = lon res.sfYArray = lat res.lbOrientation = "Horizontal" # horizontal labelbar res.cnLinesOn = False res.tiMainFontHeightF = 0.015 res.cnLineLabelsOn = False res.cnFillDrawOrder = "Predraw" res.cnFillPalette = "BlAqGrYeOrRe" res.pmLabelBarDisplayMode = "Always" #-- turn on a labelbar res.tiMainString = "SST in " + str(sel_month).zfill(2) + " " + str( sel_year) + " (degC)" res.cnLevelSelectionMode = "ExplicitLevels" res.cnLevels = np.arange(20, 35, 1) plot_cur = Ngl.contour_map(wks, h_cur, res) res.cnLevelSelectionMode = "ExplicitLevels" res.tiMainString = "SST anomaly in " + str(sel_month).zfill(2) + " " + str( sel_year) + " (degC)" res.cnFillPalette = "GMT_polar" res.cnLevels = np.arange(-3, 4, 1) res.pmLabelBarHeightF = 0.3 plot_ano = Ngl.contour_map(wks, h_ano, res) Ngl.panel(wks, [plot_cur, plot_ano], [2, 1], False) Ngl.end() et2 = time.time() # print(et2 - et1) print("Finish darwing sst plot for " + str(sel_month).zfill(2) + " " + str(sel_year))
for v in range(len(varnames)): vstart = time.time() # Get variable name and path vn = varnames[v] datpath = "/stormtrack/data3/glliu/01_Data/02_AMV_Project/02_stochmod/%s/" % vn # Create list of variables nclist = ["%s%s_ens%03d.nc" % (datpath, vn, e) for e in mnum] # Open dataset ds = xr.open_mfdataset( nclist, concat_dim='ensemble', combine='nested', compat= 'identical', # seems to be strictest setting...not sure if necessary parallel="True", join="exact" # another strict selection... ) # Add ensemble as a dimension ds = ds.assign_coords({'ensemble': np.arange(1, len(mnum) + 1, 1)}) # Merge variables to Dataset (assuming they have the same coordinates) if v == 0: dsall = ds.copy() else: dsall = xr.merge([dsall, ds]) #%% Get the DJFM and Regional cuts for EOF calculation
import glob import matplotlib.pyplot as plt import time t0 = time.time() M03_dir = "/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD03/" M06_dir = "/home/savio1/cybertrn_common/common/Data/Satellite_Observations/MODIS/MYD06_L2/" M03_files = sorted(glob.glob(M03_dir + "MYD03.A2008*.hdf")) M06_files = sorted(glob.glob(M06_dir + "MYD06_L2.A2008*.hdf")) total_pix = np.zeros((180, 360)) cloud_pix = np.zeros((180, 360)) for M03, M06 in zip(M03_files, M06_files): d06 = xr.open_mfdataset(M06[:])['Cloud_Mask_1km'][:, :, :].values d06CM = d06[::3, ::3, 0] ds06_decoded = (np.array(d06CM, dtype="byte") & 0b00000110) >> 1 d03_lat = xr.open_mfdataset( M03[:], drop_variables="Scan Type")['Latitude'][:, :].values d03_lon = xr.open_mfdataset( M03[:], drop_variables="Scan Type")['Longitude'][:, :].values lat = d03_lat[::3, ::3] lon = d03_lon[::3, ::3] l_index = (lat + 89.5).astype(int).reshape(lat.shape[0] * lat.shape[1]) lat_index = np.where(l_index > -1, l_index, 0) ll_index = (lon + 179.5).astype(int).reshape(lon.shape[0] * lon.shape[1]) lon_index = np.where(ll_index > -1, ll_index, 0) for i, j in zip(lat_index, lon_index):
def get_files_type(case_name,case_type,var_cam,years) : type_desc = {} type_desc['cam'] = ['/glade/p/rneale'] allowed_types = ['cam','reanal'] if case_type not in allowed_types : print(case_type+ ' files - type not allowed') if case_type in allowed_types : print(case_type+ ' files - type allowed') print('-Grabbing data type/case -- '+case_type+' '+case_name) yr0 = years[0] yr1 = years[1] ## GRAB ANALYSIS ## lat_rev = False lcoord_names = False if var_cam != 'TS': if case_type=='reanal' : dir_rda = '/glade/collections/rda/data/' if case_name=='ERA5' : var_anal_fmap = {'T': 't', 'Q':'q'} var_anal_vmap = {'T': 'T', 'Q':'Q'} var_vname = var_anal_vmap[var_cam] ; var_fname = var_anal_fmap[var_cam] rda_cat = 'ds633.1' dir_glade = dir_rda+rda_cat+'/' files_glade = np.array([dir_rda+rda_cat+"/e5.moda.an.pl/%03d/e5.moda.an.pl.128_130_%s.ll025sc.%03d010100_%03d120100.nc"%(y,var_fname,y,y) for y in range(yr0,yr1+1)]) print(files_glade) lat_rev = True lcoord_names = True if case_name=='ERAI' : var_anal_fmap = {'T': 't', 'Q':'q'} var_anal_vmap = {'T': 'T', 'Q':'Q'} var_vname = var_anal_vmap[var_cam] ; var_fname = var_anal_fmap[var_cam] if var_cam in ['T'] : var_fname = 'sc' if var_cam in ['U','V'] : var_fname = 'uv' rda_cat = 'ds627.1' dir_glade = dir_rda+rda_cat+'/' files_glade = np.array([dir_rda+rda_cat+"/ei.moda.an.pl/ei.moda.an.pl.regn128%s.%03d%02d0100.nc"%(var_fname,y,m) for y in range(yr0,yr1+1) for m in range(1,12)]) print(files_glade) print('hi4') if case_name=='MERRA2' : #### NOT CLEAR MMEAN DATA AVAILABLE resn = '1.9x2.5' # var_anal_fmap = {'T': '', 'Q':'q'} var_anal_vmap = {'T': 'T', 'Q':'Q'} var_vname = var_anal_vmap[var_cam] rda_cat = 'ds613.3' dir_glade = dir_rda+rda_cat+'/' files_glade = np.array([dir_rda+rda_cat+"/%s/%03d/MERRA2%03d010100_%03d120100.nc"%(resn,y,y,y) for y in range(yr0,yr1+1)]) print(files_glade) #### GRAB CAM SST AMIP DATASET FOR NOW FOR ANALYSES if (var_cam=='TS') : print('- Grabbing file(s) for AMIP and REANALYSES from CESM inputdata -') dir_inputdata = '/glade/p/cesmdata/cseg/inputdata/atm/cam/sst/' hadisst_file = 'sst_HadOIBl_bc_0.9x1.25_1850_2020_c210521.nc' files_glade = dir_inputdata+hadisst_file var_vname = 'SST_cpl' ## POINT TO FILES ## data_files = xr.open_mfdataset(files_glade,parallel=True,chunks={"time": 1}) # data_files = xr.open_mfdataset(files_glade) ## STANDARDIZE COORDS/DIMS ## if lcoord_names : data_files = data_files.rename({'latitude':'lat', 'longitude':'lon', 'level':'lev'}) # Reverse lat array to get S->N if needed if lat_rev : data_files = data_files.reindex(lat=list(reversed(data_files.lat))) # print(data_files) return data_files,var_vname
outpath = '/proj/bolinc/users/x_sebsc/pr_disagg/smhi/preprocessed/' os.system(f'mkdir -p {outpath}') # create list of available files dates_all = pd.date_range(startdate, enddate, freq='1d') ifiles = [] for date in dates_all: fname = f'{datapath}/smhi_radar_{date.strftime("%Y%m%d")}.nc' if os.path.exists(fname): ifiles.append(fname) if len(ifiles) == 0: raise Exception('no input files found!') # now open all files lazily # they are automatically chunked per file (thus per day) data_raw = xr.open_mfdataset(ifiles, combine='nested', concat_dim='time') data_raw = data_raw['__xarray_dataarray_variable__'] # convert to 32bit data_raw = data_raw.astype('float32') # sum to desired timeresolution agg = data_raw.resample(time=f'{tres}h', label='left').sum(skipna=False) time_daily = agg.time[::int(24 / tres)] doy = time_daily.dt.dayofyear.values np.save(f'{outpath}/{startdate}-{enddate}_tres{tres}_doy', doy)
import xarray as xr from scipy.stats import pearsonr from paths_usa import * from dask.diagnostics import ProgressBar ProgressBar().register() # MERRA-2 and ERA5 only unique interpolated locations print('prepare turbine location data') # open turbine files wt_mer = pd.read_csv(usa_path + '/turbine_data_mer.csv', index_col=0) wt_era = pd.read_csv(usa_path + '/turbine_data_era.csv', index_col=0) # open wind files wind_mer = xr.open_mfdataset(mer_path + "/eff_ws/merra2_wind_USA_*.nc", chunks={'time': 38}) alpha_mer = xr.open_mfdataset(mer_path + "/eff_ws/merra2_alpha_USA_*.nc", chunks={'time': 38}) wind_era = xr.open_mfdataset(era_path + "/eff_ws/era5_wind_USA_*.nc", chunks={'time': 38}) alpha_era = xr.open_mfdataset(era_path + "/eff_ws/era5_alpha_USA_*.nc", chunks={'time': 38}) # Create dataframe with sequence the size of MERRA-2 grid to find out which turbines interpolate to the same point in_seq_mer = xr.Dataset( { 'x': (['lat', 'lon'], np.array(range( wind_mer.wh50.isel(time=0).values.size)).reshape( wind_mer.wh50.isel(time=0).values.shape)) },
# 'name_std' : ['sst', 'patm', 'eta', 'sss', 'u_s', 'v_s']} ) fields = pd.DataFrame( \ {'name_CAFE': ['sst'], 'name_std' : ['sst']} name_dict = fields.set_index('name_CAFE').to_dict()['name_std'] # Initial dates to include (takes approximately 1 min 30 sec per date) ----- init_dates = pd.date_range('2002-2','2016-5' , freq='1MS') # Ensembles to include ----- ensembles = range(1,12) path = fcst_folder + '/yr2016/mn1/OUTPUT.1/' + fcst_filename + '.nc' dataset = xr.open_mfdataset(path, autoclose=True) time_use = dataset.time[:366] years = range(2002,2017) months = range(1,13) ensembles = range(1,12) for year in years: print(year) print('----------') for idx, variable in enumerate(fields['name_CAFE']): print(variable) savename = 'cafe.fcst.v1.ocean.' + fields['name_std'][idx] + '.' + str(year) + '.clim.nc' try: temp = xr.open_mfdataset('/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/' + savename, autoclose=True)
def FLDAS_to_csv(year, min_month, max_month, min_lat, max_lat,\ min_lon, max_lon): ''' Prepare file for WGENW This method prepares the csvfile to run the weather generator Args: - year (int): The year of interest. Ontained from findYear method - min_month (int): The first month of the season of interest - max_month (int): The last month of the season of interest - min_lat (float): The minimum latitude for the bounding box - max_lat (float): The maximum latitude for the bounding box - min_lon (float): The minimum longitude for the bounding box - max_lon (float): The maximum longitude for the bounding box ''' #Get the season season = np.arange(min_month,max_month+1,1) # Get a csv file ready csv_file = str(os.getcwd())+'/FLDAS_WGEN.csv' #Start a counting index for the station ID. id_idx = 1 #Density of water rho_w = 997 with open(csv_file,'w',newline='') as csvfile: wgenwriter = csv.writer(csvfile, delimiter=',') wgenwriter.writerow(['station id','lon','lat',\ 'year','month','min. temperature', 'max. temperature','cloud fraction', 'wind speed','precipitation','wet']) for month in season: if month<10: month_str='0'+str(month) else: month_str=str(month) subdir = path_daily+'/'+str(year)+'/'+month_str nc_files = (glob.glob(subdir+'/*.nc')) # open the data nc_fid = xr.open_mfdataset(nc_files) # Get the index for lat/lon lats = np.where(np.logical_and(nc_fid['Y'].values>=min_lat, nc_fid['Y'].values<=max_lat))[0] lons = np.where(np.logical_and(nc_fid['X'].values>=min_lon, nc_fid['X'].values<=max_lon))[0] # Get the missing value flag flag_miss = nc_fid.attrs['missing_value'] for lat_idx in lats: for lon_idx in lons: #Next generate a station_id if len(str(id_idx))==1: station_ID = 'FLDAS_0000'+str(id_idx) elif len(str(id_idx))==2: station_ID = 'FLDAS_000'+str(id_idx) elif len(str(id_idx))==3: station_ID = 'FLDAS_00'+str(id_idx) elif len(str(id_idx))==4: station_ID = 'FLDAS_0'+str(id_idx) elif len(str(id_idx))==5: station_ID = 'FLDAS_'+str(id_idx) else: sys.exit('Station_ID index out of bounds') ## Deal with temperature # Replace missing values data = nc_fid['Tair_f_tavg'].values data = data.astype('float') data[data==flag_miss]=np.nan #Calculate minT and maxT minT = np.nanmin(data[:,lat_idx,lon_idx])- 273.15 #Convert to C maxT = np.nanmax(data[:,lat_idx,lon_idx])- 273.15 #Convert to C ## wind speed # Replace missing values data = nc_fid['Wind_f_tavg'].values data = data.astype('float') data[data==flag_miss]=np.nan # Take the average for the month avWind =np.nanmean(data[:,lat_idx,lon_idx]) # Precipitation data = nc_fid['Rainf_f_tavg'].values data = data.astype('float') data[data==flag_miss]=np.nan # Calculate the total for the month totP = np.nansum(data[:,lat_idx,lon_idx]) # Convert to mm/day totP = totP*1000*86400/rho_w # Number of wet days in a month wet_days = len(np.where(data[:,lat_idx,lon_idx]>0)[0]) # Write it out wgenwriter.writerow([station_ID,str(round(nc_fid['X'].values[lon_idx],2)),\ str(round(nc_fid['Y'].values[lat_idx],2)),\ str(year), str(month),str(round(minT,2)),\ str(round(maxT,2)),'0.5',str(round(avWind,2)),\ str(round(totP,2)),str(wet_days)]) # Close netcdf file nc_fid.close() # update the counter id_idx+=1
def open_output(self): filenames = [self._get_output_filename(times) for times in self._times] return xr.open_mfdataset(filenames)
def __init__(self, **kwargs): rpath = kwargs.get("rpath", "./d3d/") folders = kwargs.get( "folders", None ) # [os.path.join(os.path.abspath(loc),name) for name in os.listdir(loc) if os.path.isdir(os.path.join(loc,name))] if folders: self.folders = folders else: self.folders = [rpath] # check if many tags present ifiles = glob.glob(self.folders[0] + "/*_model.json") if len(ifiles) > 1: # --------------------------------------------------------------------- logger.warning( "more than one configuration, specify tag argument \n") # --------------------------------------------------------------------- tag = kwargs.get("tag", None) if tag: ifile = self.folders[0] + "/" + tag + "_model.json" else: ifile = ifiles[0] # --------------------------------------------------------------------- logger.info("reading data based on {} \n".format(ifile)) # --------------------------------------------------------------------- with open(ifile, "rb") as f: info = pd.read_json(f, lines=True).T info[info.isnull().values] = None self.info = info.to_dict()[0] grid = r2d.read_file(self.folders[0] + "/" + self.info["tag"] + ".grd") deb = np.loadtxt(self.folders[0] + "/" + self.info["tag"] + ".dep") # create mask d = deb[1:-1, 1:-1] self.w = d == -999.0 b = deb[:-1, :-1] b[b == -999.0] = np.nan self.dem = xr.Dataset( {"bathymetry": (["latitude", "longitude"], -b)}, coords={ "longitude": ("longitude", grid.lons[0, :].values), "latitude": ("latitude", grid.lats[:, 0].values), }, ) self.grid = grid # READ DATA nfiles = [ folder + "/" + "trim-" + self.info["tag"] + ".nc" for folder in self.folders ] ds = xr.open_mfdataset(nfiles, combine="by_coords", data_vars="minimal") self.Dataset = ds # clean duplicates self.Dataset = self.Dataset.sel( time=~self.Dataset.indexes["time"].duplicated()) dic = self.info.copy() # start with x's keys and values dic.update( kwargs) # modifies z with y's keys and values & returns None if "sa_date" not in dic.keys(): dic.update({"sa_date": self.Dataset.time.values[0]}) if "se_date" not in dic.keys(): dic.update({"se_date": self.Dataset.time.values[-1]}) self.obs = obs(**dic)
def __init__(self, **kwargs): rpath = kwargs.get("rpath", "./schism/") folders = kwargs.get( "folders", None ) # [os.path.join(os.path.abspath(loc),name) for name in os.listdir(loc) if os.path.isdir(os.path.join(loc,name))] if folders: self.folders = folders else: self.folders = [rpath] datai = [] tag = kwargs.get("tag", "schism") misc = kwargs.get("misc", {}) for folder in self.folders: logger.info(" Combining output for folder {}\n".format(folder)) xdat = glob.glob(folder + "/outputs/schout_[!0]*.nc") xdat.sort(key=lambda f: int("".join(filter(str.isdigit, f)))) if len(xdat) > 0: datai.append(xdat) # append to list else: # run merge output with open(folder + "/" + tag + "_model.json", "r") as f: info = pd.read_json(f, lines=True).T info[info.isnull().values] = None info = info.to_dict()[0] p = pm.set(**info) p.misc = misc p.results() self.misc = p.misc xdat = glob.glob(folder + "/outputs/schout_[!0]*.nc") xdat.sort(key=lambda f: int("".join(filter(str.isdigit, f)))) datai.append(xdat) # append to list merge = kwargs.get("merge", True) if merge: datai = flat_list(datai) self.Dataset = xr.open_mfdataset(datai, combine="by_coords", data_vars="minimal") with open(self.folders[-1] + "/" + tag + "_model.json", "r") as f: info = pd.read_json(f, lines=True).T info[info.isnull().values] = None info = info.to_dict()[0] p = pm.set(**info) if hasattr(p, "stations"): logger.info(" Retrieve station timeseries\n") dstamp = kwargs.get("dstamp", info["date"]) p.get_station_data(dstamp=dstamp) self.time_series = p.time_series else: self.Dataset = [ xr.open_mfdataset(x, combine="by_coords", data_vars="minimal") for x in datai ] ts = [] for folder in self.folders: p = pm.read_model(folder + "/{}_model.json".format(tag)) # read model if hasattr(p, "stations"): logger.info(" Retrieve station timeseries\n") dstamp = kwargs.get("dstamp", p.date) p.get_station_data(dstamp=dstamp) ts.append(p.time_series) self.time_series = ts
def split_list(alist, wanted_parts=1): length = len(alist) return np.array([ alist[i * length // wanted_parts:(i + 1) * length // wanted_parts] for i in range(wanted_parts) ]) files2analyse = split_list(onlyfiles, divisions) print('Analizing the year ', year, 'from file ', files2analyse[index_files][0], '-', files2analyse[index_files][-1], ']') data = xr.open_mfdataset(files2analyse[index_files]) lon = data.longitude.values lat = data.latitude.values init_time = datetime.datetime.strptime( str(data.time.isel(time=0).values).split('T')[0], "%Y-%m-%d") print(init_time) outfolder = '/g/data/v45/jm5970/trackeddy_output/AVISO+/' sshatime = data.sla.values sshatime = ma.masked_where(sshatime <= -2147483647, sshatime) print('End loading data') sshashape = np.shape(sshatime)
# tracking.py creates a dataset which contains the radius (r) as a function of time and height, # and the thermal midpoint (x_c, y_c) as functions of time import xarray as xr import numpy as np import matplotlib.pyplot as plt %matplotlib inline ds = xr.open_mfdataset('/work/bnm/buoyant_entrainment/no_g/data/slice*.nc',concat_dim='t') contour = np.loadtxt('/work/bnm/buoyant_entrainment/data/no_g/1e4_sim5_no_g/contour_flux.dat') midpoint = np.loadtxt('/work/bnm/buoyant_entrainment/data/no_g/1e4_sim5_no_g/thermal_midpoint_1e4_g0.dat') rout = '/work/bnm/buoyant_entrainment/data/no_g/1e4_sim5_no_g/thermal_boundary.nc' mout = '/work/bnm/buoyant_entrainment/data/no_g/1e4_sim5_no_g/mask.nc' tracking = xr.Dataset({'r': (['t', 'z'], contour), 'x_c': (['t'], midpoint[1]), 'y_c': (['t'], midpoint[2])}, coords={'t': (['t'], ds.t.values[:-1]), #remove last element from array 'z': (['z'], ds.z.values)}) tracking.to_netcdf(rout,engine='scipy') tracking = xr.open_dataset(rout) # convert thermal boundary dataset to have same dimensions as rho, u, v, w, ... r, foo, bar = xr.broadcast(tracking.r,ds.x,ds.y) delta_x = ds.x - tracking.x_c delta_y = ds.y - tracking.y_c mask = xr.ufuncs.sqrt(delta_x ** 2 + delta_y ** 2) < r
rootdir = '/home/ocean_personal_data/graemem/ariane/' model = 'orca025_global_5d' experiment = 'quant_back_seedNAn1_t*_sign27.7-28_MLrefz8delsig0.01' filepath = rootdir+'experiments/'+model+'/'+experiment+'/ariane_positions_quantitative.nc' filepath_initial = rootdir+'experiments/'+model+'/'+experiment+'/ariane_initial.nc' filepath_time = rootdir+'time/time_orca025_global_5d.mat' filepath_region = rootdir+'experiments/'+model+'/quant_back_seedNAn1_t3560-sep-4217_sign27.7-28_MLrefz8delsig0.01/region_limits' # Universal variables spy = 365*24*60*60 yrst = 1958 yrend = 2016 ventsec = 7 lastinit = 4217 # Ariane input ds_initial = xr.open_mfdataset(filepath_initial,combine='nested',concat_dim='ntraj') ds_initial.init_volume.name = 'init_volume' # Ariane output ds = xr.open_mfdataset(filepath,combine='nested',concat_dim='ntraj') ds = xr.merge([ds, ds_initial.init_volume]) ds['final_age'] = ds.final_age.astype('timedelta64[s]').astype('float64')/spy ds['final_dens'] = calc_sigmantr(ds.final_temp,ds.final_salt) # Model times time_vals = np.append(np.array([0]),sio.loadmat(filepath_time)['time'].squeeze()) time = xr.DataArray(time_vals,dims=['nfile'],coords={'nfile':np.arange(time_vals.size)}) # Reagion limits region_limits = np.loadtxt(filepath_region) # Bins years = np.arange(yrst,yrend+1) ages = np.arange(-3/12,yrend-yrst+9/12)