def try_to_open_grib_file(path: str, ) -> xr.Dataset: """Try a few different ways to open up a grib file. Parameters ---------- path : str Path pointing to location of grib file Returns ------- ds : xr.Dataset The xarray Dataset that contains information from the grib file. """ try: ds = xr.open_dataset(path, engine="cfgrib") except Exception as e: try: import cfgrib ds = cfgrib.open_datasets(path) ds = xr.combine_by_coords(ds) except: logger.error(f"Oh no! There was a problem opening up {path}: {e}") return return ds
def load_wholesale_gribs_to_xarray(filepattern, channels): # load in all the wholesale filenumbers needed cfgrib.dataset.LOG.disabled = True datasets = { i: cfgrib.open_datasets(filepattern.format(i)) for i in load_filenumbers } datasets = adhoc_merge(datasets, filepattern) cfgrib.dataset.LOG.disabled = False # rename channels from grib names to unique short names # also select t the channels we need dataset_list = [] # for all the files we want variables from for whsl_num in load_filenumbers: file_channels_meta = channels_meta_data.loc[channels]\ .query("wholesale_file_number==@whsl_num") # for all the chunks of this files we want variables from for i in file_channels_meta.index_after_loading.unique(): # rename the variables to defined non-clashing names df_ = file_channels_meta.query("index_after_loading==@i") rename = { k: v for k, v in zip(df_.wholesale_index_variable_name, df_.index) } datasets[whsl_num][i] = datasets[whsl_num][i].rename(rename)[list( df_.index)] # gather the needed parts of the dataset dataset_list.append(datasets[whsl_num][i]) # do the merge to a single dataset merged_datasets = xr.merge(dataset_list, compat='override') return merged_datasets
def get_dset(vars_2d=[], vars_3d=[], f_times=0): if vars_2d or vars_3d: date_string, _ = get_run() urls = find_file_name(vars_2d=vars_2d, vars_3d=vars_3d, f_times=f_times) fils = download_extract_files(urls) # We cat the files on Linux and read the resulting grib, this is much # much faster!! But it will not work everywhere if (type(fils) is list and len(fils) > 3): # multiple files extractor merged_file = '/tmp/'+date_string + '_' + '_'.join(vars_3d+vars_2d) +'.grib2' if os.path.exists(merged_file) == False: os.system('cat %s > %s' % (' '.join(fils), merged_file) ) dss = cfgrib.open_datasets(merged_file) for i,_ in enumerate(dss): dss[i] = preprocess(dss[i]) ds = xr.merge(dss) else: ds = xr.open_mfdataset(fils, engine='cfgrib', preprocess=preprocess, combine="by_coords", concat_dim='step', parallel=False) return ds
def main(): args = getRuntimeArgs() run = Run.get_context() file_name = '_D2D05150000051703001' file_path = os.path.join(args.data_path, file_name) print(f'Data Path: {args.data_path}') print(f'Files in data path: {glob.glob(args.data_path)}') print(f'Filename: {file_name}') print(f'Full file path: {file_path}') ds = cfgrib.open_datasets(file_path, backend_kwargs={ 'read_keys': ['pv'], 'indexpath': '' }) x = ds[0].t.attrs['GRIB_pv'] nl = len(x) if nl == 184: #91 vertical levels a = dict(zip(range(92), list(x[0:92]))) b = dict(zip(range(92), list(x[92:184]))) elif nl == 276: #137 vertical levels a = dict(zip(range(138), list(x[0:138]))) b = dict(zip(range(138), list(x[138:276]))) else: raise Exception("Cannot retrieve a,b parameters for vertical levels!") ab = pd.DataFrame(index=a.keys(), columns=['A'], data=a.values()) ab['B'] = b.values()
def pv(input_file): # Vars grib_vars = ['t','u','v'] # Load a list of datasets, one for each variable we want ds_list = [cfgrib.open_datasets(input_file,backend_kwargs={'filter_by_keys':{'typeOfLevel':'isobaricInhPa','shortName':v},'indexpath':''}) for v in grib_vars] # Flatten the list of lists to a single list of datasets ds_flat = [x.sel(isobaricInhPa=x.isobaricInhPa[x.isobaricInhPa>=100.0].values) for ds in ds_list for x in ds] # Merge the variables into a single dataset ds = xr.merge(ds_flat) # Add pressure ds['p'] = xr.DataArray(ds.isobaricInhPa.values,dims=['isobaricInhPa'],coords={'isobaricInhPa':ds.isobaricInhPa.values},attrs={'units':'hPa'}).broadcast_like(ds['t']) # Calculate potential temperature ds['theta'] = mpcalc.potential_temperature(ds['p'].metpy.convert_units('Pa'),ds['t']) # Compute baroclinic PV ds['pv'] = mpcalc.potential_vorticity_baroclinic(ds['theta'],ds['p'].metpy.convert_units('Pa'),ds['u'],ds['v'],latitude=ds.latitude)/(1.0e-6) met_data = ds['pv'].sel(isobaricInhPa=slice(float(os.environ.get('PV_LAYER_MAX_PRESSURE',1000.0)),float(os.environ.get('PV_LAYER_MIN_PRESSURE',100.0)))).mean(axis=0).values return met_data
def load_forecast(self, subset_lat=None, subset_lon=None): try: flist = [ n for n in glob.glob(f'{self.paths["data_store"]}/*') if self.stat in n and '.idx' not in n ] try: new_gefs = xr.open_mfdataset( flist, engine='cfgrib', combine='nested', concat_dim='time', chunks={ 'lat': 5, 'lon': 5 }, backend_kwargs=dict(filter_by_keys=self.key_filter, indexpath='')).compute() except OSError: raise FileNotFoundError( 'sprd files likely not in download folder, please check!') except KeyError: import cfgrib new_gefs = cfgrib.open_datasets( f'{self.paths["data_store"]}gefs_mean_000.grib2') subset_gefs = self._get_var(new_gefs) subset_gefs = self._rename_latlon(new_gefs) if subset_lat is not None and subset_lon is not None: subset_gefs = self._subset_latlon(subset_gefs, subset_lat, subset_lon) else: pass self.date = str(subset_gefs.time.values).partition('T')[0] subset_gefs = self._map(subset_gefs) return subset_gefs
def read_fnl_grib2(filename): """ Read fnl analysis data file. Args: filename (string): file path name. Return: A list of xarray object. """ return cfgrib.open_datasets(filename)
def load_rainfall_values(file): datasets = cfgrib.open_datasets(file) print("executed open datasets") i = 0 for cur in datasets: try: rainfall_values = cur['tp'].values print(f"Found at iteration {i}") break except: i += 1 # rainfall_values = dataset[26]['tp'].values return rainfall_values
def ingest_gfs(f): # print('Reading %s'%os.path.basename(f)) datasets = cfgrib.open_datasets(f) keep_keys = ['tp', 'q', 't', 'u', 'v', 'absv', 'w', 'gh', 'r', 'd', 'u10', 'v10', 'u100', 'v100', 't2m', 'd2m', 'cape', 'prmsl', 'sp', 'orog', 'hpbl'] sfc, iso = [], [] for ds in datasets: key_match = np.array(list(ds.data_vars))[np.isin(list(ds.data_vars), keep_keys)] if len(key_match) > 0: dims = ds.dims.keys() coords = ds[key_match].coords if ('heightAboveGround' in coords) & ('heightAboveGround' not in dims): sfc.append(ds[key_match].drop('heightAboveGround')) elif 'isobaricInhPa' in coords: iso.append(ds[key_match]) elif (('surface' in coords)|('meanSea' in coords)): sfc.append(ds[key_match]) elif 'prmsl' in list(ds.data_vars): sfc.append(ds['prmsl']) else: pass else: pass ds.close() sfc = xr.merge(sfc).drop('t') iso = xr.merge(iso).rename({'isobaricInhPa':'level'}) iso = iso.sel(level=iso.level[::-1]) sfc['longitude'] = sfc['longitude'] - 360 iso['longitude'] = iso['longitude'] - 360 return [sfc, iso]
def __init__(self, datea, fhr, config): init = dt.datetime.strptime(datea, '%Y%m%d%H') init_s = init.strftime("%m%d%H%M") datef = init + dt.timedelta(hours=fhr) datef_s = datef.strftime("%m%d%H%M") # Construct the grib file dictionary for a particular forecast hour file_name = os.path.join(config['work_dir'], "E1E{0}{1}1".format(str(init_s), str(datef_s))) try: ds = cfgrib.open_datasets(file_name) self.grib_dict = {} for d in ds: for tt in d: if 'number' in d[tt].dims: self.grib_dict.update({'{0}_pf'.format(tt): d[tt]}) else: self.grib_dict.update({'{0}_cf'.format(tt): d[tt]}) except IOError as exc: raise RuntimeError('Failed to open {0}'.format(file_name)) from exc # This is a dictionary that maps from generic variable names to the name of variable in file self.var_dict = {'zonal_wind': 'u', \ 'meridional_wind': 'v', \ 'zonal_wind_10m': 'u10', \ 'meridional_wind_10m': 'v10', \ 'geopotential_height': 'gh', \ 'temperature': 't', \ 'relative_humidity': 'r', \ 'specific_humidity': 'q', \ 'sea_level_pressure': 'msl', \ 'precipitation': 'tp'} for key in self.grib_dict: if np.max(self.grib_dict[key].coords['longitude']) > 180: self.grib_dict[key].coords['longitude'] = (self.grib_dict[key].coords['longitude'] + 180) % 360 - 180 if '{0}_cf'.format(self.var_dict['specific_humidity']) in self.grib_dict: self.has_specific_humidity = True else: self.has_specific_humidity = False self.nens = int(self.grib_dict['gh_pf'].attrs['GRIB_totalNumber'])
def convert_and_write_df(filepath): filename = os.path.basename(filepath) outfile = os.path.join(data_output_path, "converted" + filename + ".parquet.gzip") print(f'Processing file {filename}') ds = cfgrib.open_datasets(filepath, backend_kwargs={ 'read_keys': ['pv'], 'indexpath': '' }) for i in range(len(ds)): outfile = os.path.join( data_output_path, "converted" + filename + str(i) + ".parquet.gzip") pdf = ds[i].to_dataframe() pdf.reset_index(inplace=True) pdf = pdf.drop(columns='step') print(f'Writing results to {outfile}') pdf.to_parquet(outfile, compression='gzip')
# plt.figure() # pcs.plot(color='DarkRed', # label='pcs', # linestyle='--') # pseudo_pcs.sel(mode=0).plot(linestyle='-.', # color='DarkBlue', # label='pseudo pcs') # plt.legend() # plt.show(block=False) """ Calculando EOF e PCs usando ERA5""" # single file # reading grib file with 1000 hpa geopotential height dataDIR = '/home/aline/Documents/Dados/ERA5/geopotential_1979_2020.grib' DS = cfgrib.open_datasets(dataDIR)[0] DS = DS.assign(hgt=DS.z / 9.81) DS = DS.assign(hgt_mean=DS.hgt.mean(axis=0)) DS = DS.assign(hgt_anomalie=DS.hgt - DS.hgt_mean) # separando soh o HGT para ficar mais leve o calculo hgt = DS['hgt'].to_dataset() solver, eof1, var1 = calcEOF(hgt, 'hgt', 'latitude') eof1 = (eof1.sel(mode=0) * (-1)) # Plot the leading EOF expressed as covariance in the European/Atlantic domain. clevs = np.linspace(-50, 50, 12) proj = ccrs.Orthographic(0, 90) # ax = plt.axes(projection=proj) plt.figure()
https://www.sciencedirect.com/science/article/pii/B9780128117149000061 https://www.sciencedirect.com/science/article/pii/B9780127329512500120 """ import cartopy.crs as ccrs import matplotlib.pyplot as plt import matplotlib.cm as cm import xarray as xr import cfgrib g = 9.81 file = '/home/aline/Documents/IST_investigation/ERA5/geopotential_1989_2019.grib' DS = cfgrib.open_datasets(file)[0] height = DS / g Hmean = height.mean(dim='time') anomalie = height - Hmean # making plot area ax1 = plt.subplot(projection=ccrs.Orthographic(0, 90)) DS.isel(time=0).z.plot(transform=ccrs.PlateCarree(), subplot_kws={"projection": ccrs.Orthographic(0, 90)}, cmap='RdBu_r') ax1.coastlines(zorder=3) plt.title('Geopotencial 1989-01-01') plt.savefig( '/home/aline/Documents/IST_investigation/ERA5/images/geopotencialt0.png') plt.show(block=False)
''' ****************************************************************************** ****************************************************************************** ****************************************************************************** WAVE CORRELATION ****************************************************************************** ****************************************************************************** ****************************************************************************** ''' # reading wave file fwave = '/home/aline/Documents/Dados/ERA5/montly_mean_1979_2020.grib' dwave = cfgrib.open_datasets(fwave)[0] # cropping index time series to match data index_crop = indices[slice( dwave.isel(time=0).time.values, dwave.isel(time=-1).time.values)] index_crop = index_crop.id.to_xarray() # selecting winter season for index data index_winter = index_crop.sel(time=index_crop['time.season'] == 'DJF') ''' # Compute the Pearson correlation coefficient between # two DataArray objects along a shared dimension ''' # correlacao = {'Hs': xr.corr(dwave.swh.round(3), # index_crop.round(3), dim='time'),
def xhrrr(DATE, searchString, fxx=0, *, DATE_is_valid_time=False, remove_grib2=True, add_crs=True, **download_kwargs): """ Download HRRR data and return as an xarray Dataset (or Datasets) You may only request one `DATE` and one `fxx` (forecast lead time). .. note:: See https://github.com/ecmwf/cfgrib/issues/187 for why there is a problem with reading multiple accumulated precipitation fields when searchString=':APCP:'. Parameters ---------- DATE : datetime A single datetime object. searchString: string A string representing a field or fields from the GRIB2 file. See more details in ``download_hrrr`` docstring. Some examples: ============================= =============================================== ``searchString`` Messages that will be downloaded ============================= =============================================== ':TMP:2 m' Temperature at 2 m. ':TMP:' Temperature fields at all levels. ':UGRD:.* mb' U Wind at all pressure levels. ':500 mb:' All variables on the 500 mb level. ':APCP:' All accumulated precipitation fields. ':APCP:surface:0-[1-9]*' Accumulated precip since initialization time ':APCP:surface:[1-9]*-[1-9]*' Accumulated precip over last hour ':UGRD:10 m' U wind component at 10 meters. ':(U|V)GRD:' U and V wind component at all levels. ':.GRD:' (Same as above) ':(TMP|DPT):' Temperature and Dew Point for all levels . ':(TMP|DPT|RH):' TMP, DPT, and Relative Humidity for all levels. ':REFC:' Composite Reflectivity ':surface:' All variables at the surface. ============================= =============================================== fxx : int Forecast lead time. Default will get the analysis, F00. DATE_is_valid_time: bool False - (default) The DATE argument represents the model initialization datetime. True - The DATE argument represents the model valid time. This is handy when you want a specific forecast leadtime that is valid at a certian date. remove_grib2 : bool True - (default) Delete the GRIB2 file after reading into a Dataset. This requires a copy to memory, so it might slow things down. False - Keep the GRIB2 file downloaded. This might be a better option performance-wise, because it does not need to copy the data but keeps the file on disk. You would be responsible for removing files when you don't need them. add_crs : bool True - (default) Add the Cartopy coordinate reference system (crs) projection as an attribute to the Dataset. **download_kwargs : Any other key word argument accepted by ``download_hrrr``. - model : {'hrrr', 'hrrrak', 'hrrrX'} - field : {'sfc', 'prs', 'nat', 'subh'} - save_dir : pathlib.Path - download_source_priority : a list of download sources - dryrun : bool - verbose : bool """ # Convert DATE input to a pandas datetime (Pandas can parse some strings as dates.) DATE = pd.to_datetime(DATE) inputs = locals() assert not hasattr( DATE, '__len__'), "`DATE` must be a single datetime, not a list." assert not hasattr( fxx, '__len__'), "`fxx` must be a single integer, not a list." if DATE_is_valid_time: # Change DATE to the model run initialization DATE so that when we take # into account the forecast lead time offset, the the returned data # be valid for the DATE the user requested. DATE = DATE - timedelta(hours=fxx) # Download the GRIB2 file grib2file, url = download_hrrr(DATE, searchString, fxx=fxx, **download_kwargs) # Some extra backend kwargs for cfgrib backend_kwargs = { 'indexpath': '', 'read_keys': ['parameterName', 'parameterUnits'], 'errors': 'raise' } # Use cfgrib.open_datasets, just in case there are multiple "hypercubes" # for what we requested. H = cfgrib.open_datasets(grib2file, backend_kwargs=backend_kwargs) # Create a cartopy projection object if add_crs: crs = get_crs(H[0]) for ds in H: ds.attrs['history'] = inputs ds.attrs['url'] = url # CF 1.8 map projection information for the HRRR model # http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#_lambert_conformal ## ## I'm not sure why, but when I assign an attribute for the main Dataset ## with a cartopy.crs and then copy it (for the remove_grib2 case), ## the cartopy.crs is reset to the default proj4_params. ## This isn't an issue when I set cartopy.crs as an attribute for ## a variable (DataArray). ## For this reason, I will return the 'crs' as an attribute for ## each variable's DataArray. Later, I will add the 'crs' as ## a Dataset attribute for convenience. ## ds.attrs['grid_mapping_name'] = 'lambert_conformal_conic' ds.attrs['standard_parallel'] = (38.5, 38.5) ds.attrs['longitude_of_central_meridian'] = 262.5 ds.attrs['latitude_of_projection_origin'] = 38.5 # This is redundant, but I want every variable to also have the # map projection information... for var in list(ds): ds[var].attrs['grid_mapping_name'] = 'lambert_conformal_conic' ds[var].attrs['standard_parallel'] = (38.5, 38.5) ds[var].attrs['longitude_of_central_meridian'] = 262.5 ds[var].attrs['latitude_of_projection_origin'] = 38.5 if add_crs: ds[var].attrs['crs'] = crs if remove_grib2: # Load the data to memory before removing the file H = [ds.load() for ds in H] # Ok, now we can remove the grib2 file os.remove(grib2file) if len(H) == 1: H = H[0] # Add the cartopy map projection object as an attribute, for convenience. H.attrs['crs'] = crs else: warnings.warn( '⚠ ALERT! Could not load grib2 data into a single ' 'xarray Dataset. You might consider refining your ' '`searchString` if you are getting data you do not need.') # Add the cartopy map projection object as an attribute, for convenience. for i in H: i.attrs['crs'] = crs return H
def xarray( self, searchString=None, backend_kwargs={}, remove_grib=True, **download_kwargs, ): """ Open GRIB2 data as xarray DataSet Parameters ---------- searchString : str Variables to read into xarray Dataset remove_grib : bool If True, grib file will be removed ONLY IF it didn't exist before we downloaded it. """ download_kwargs = {**dict(overwrite=False), **download_kwargs} # Download file if local file does not exists local_file = self.get_localFilePath(searchString=searchString) # ! \/ This is critical... # Only remove file if it did n0t exists before we download it remove_grib = not local_file.exists() and remove_grib # ! \/ Fail-safe; Never remove a file if the source is 'local' if self.grib_source == "local": remove_grib = False if not local_file.exists() or download_kwargs["overwrite"]: self.download(searchString=searchString, **download_kwargs) # Backend kwargs for cfgrib backend_kwargs.setdefault("indexpath", "") backend_kwargs.setdefault( "read_keys", ["parameterName", "parameterUnits", "stepRange"]) backend_kwargs.setdefault("errors", "raise") # Use cfgrib.open_datasets, just in case there are multiple "hypercubes" # for what we requested. Hxr = cfgrib.open_datasets( local_file, backend_kwargs=backend_kwargs, ) # Get CF grid projection information with pygrib and pyproj because # this is something cfgrib doesn't do (https://github.com/ecmwf/cfgrib/issues/251) # NOTE: Assumes the projection is the same for all variables grib = pygrib.open(str(local_file)) msg = grib.message(1) cf_params = CRS(msg.projparams).to_cf() # Funny stuff with polar stereographic (https://github.com/pyproj4/pyproj/issues/856) # TODO: Is there a better way to handle this? What about south pole? if cf_params["grid_mapping_name"] == "polar_stereographic": cf_params["latitude_of_projection_origin"] = cf_params.get( "latitude_of_projection_origin", 90) # Here I'm looping over each dataset in the list returned by cfgrib for ds in Hxr: # Add some details # ---------------- ds.attrs["model"] = self.model ds.attrs["product"] = self.product ds.attrs["description"] = self.DESCRIPTION ds.attrs["remote_grib"] = self.grib ds.attrs["local_grib"] = local_file ds.attrs["searchString"] = searchString # Attach CF grid mapping # ---------------------- # http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#appendix-grid-mappings ds["gribfile_projection"] = None ds["gribfile_projection"].attrs = cf_params ds["gribfile_projection"].attrs[ "long_name"] = f"{self.model.upper()} model grid projection" # Assign this grid_mapping for all variables for var in list(ds): if var == "gribfile_projection": continue ds[var].attrs["grid_mapping"] = "gribfile_projection" # ! DO NOT REMOVE GRIB FILES IF THE SOURCE IS LOCAL # ! (I know I already checked this; I am just so worried about erasing my local data) if self.grib_source != "local": if remove_grib: # Load the datasets into memory before removing the file Hxr = [ds.load() for ds in Hxr] _ = [ds.close() for ds in Hxr] # TODO: # Forcefully close the files so it can be removed # (this is a WindowsOS specific requirement). # os.close(?WHAT IS THE FILE HANDLER?) """ https://docs.python.org/3/library/os.html#os.remove On Windows, attempting to remove a file that is in use causes an exception to be raised; on Unix, the directory entry is removed but the storage allocated to the file is not made available until the original file is no longer in use. >> HOW DO I COMPLETELY CLOSE THE FILE OPENED BY CFGRIB?? """ if not sys.platform == "win32": # Removes file local_file.unlink() else: warnings.warn( "sorry, on windows I couldn't remove the file.") if len(Hxr) == 1: return Hxr[0] else: # cfgrib returned multiple hypercubes. try: # Handle case where HRRR subh returns multiple hypercubes (see #73) data_vars = set(itertools.chain(*[list(i) for i in Hxr])) data_vars.remove("gribfile_projection") Hxr = xr.concat(Hxr, dim="step", data_vars=data_vars) except: print( f"Note: Returning a list of [{len(Hxr)}] xarray.Datasets because cfgrib opened with multiple hypercubes." ) return Hxr
def get_HRRR(DATE, searchString, *, fxx=0, DATE_is_valid_time=False, remove_grib2=True, add_crs=True, **download_kwargs): """ Download HRRR data and return as an xarray Dataset (or Datasets) Only request one `DATE` and `fxx` (forecast lead time). Parameters ---------- DATE : datetime A single datetime object. searchString: string A string representing a field or fields from the GRIB2 file. See more details in ``download_hrrr`` docstring. Some examples: ================ =============================================== ``searchString`` Messages that will be downloaded ================ =============================================== ':TMP:2 m' Temperature at 2 m. ':TMP:' Temperature fields at all levels. ':500 mb:' All variables on the 500 mb level. ':APCP:' All accumulated precipitation fields. ':UGRD:10 m' U wind component at 10 meters. ':(U|V)GRD:' U and V wind component at all levels. ':.GRD:' (Same as above) ':(TMP|DPT):' Temperature and Dew Point for all levels . ':(TMP|DPT|RH):' TMP, DPT, and Relative Humidity for all levels. ':REFC:' Composite Reflectivity ':surface:' All variables at the surface. ================ =============================================== fxx : int Forecast lead time. Default will get the analysis, F00. DATE_is_valid_time: bool False - (default) The DATE argument represents the model initialization datetime. True - The DATE argument represents the model valid time. This is handy when you want a specific forecast leadtime that is valid at a certian date. remove_grib2 : bool True - (default) Delete the GRIB2 file after reading into a Dataset. This requires a copy to memory, so it might slow things down. False - Keep the GRIB2 file downloaded. This might be a better option performance-wise, because it does not need to copy the data but keeps the file on disk. You would be responsible for removing files when you don't need them. add_crs : bool True - (default) Append the Cartopy coordinate reference system (crs) projection as an attribute to the Dataset. **download_kwargs : Any other key word argument accepted by ``download_HRRR`. {model, field, SAVEDIR, dryrun, verbose} """ inputs = locals() assert not hasattr( DATE, '__len__'), "`DATE` must be a single datetime, not a list." assert not hasattr( fxx, '__len__'), "`fxx` must be a single integer, not a list." if DATE_is_valid_time: # Change DATE to the model run initialization DATE so that when we take # into account the forecast lead time offset, the the returned data # be valid for the DATE the user requested. DATE = DATE - timedelta(hours=fxx) # Download the GRIB2 file grib2file, url = download_HRRR(DATE, searchString, fxx=fxx, **download_kwargs) # Some extra backend kwargs for cfgrib backend_kwargs = { 'indexpath': '', 'read_keys': ['parameterName', 'parameterUnits'], 'errors': 'raise' } # Use cfgrib.open_datasets, just in case there are multiple "hypercubes" # for what we requested. H = cfgrib.open_datasets(grib2file, backend_kwargs=backend_kwargs) # Create a cartopy projection object if add_crs: crs = get_crs(H[0]) for ds in H: ds.attrs['get_HRRR inputs'] = inputs ds.attrs['url'] = url if add_crs: # Add the crs projection info as a Dataset attribute ds.attrs['crs'] = crs # ...and add attrs for each variable for ease of access. for var in list(ds): ds[var].attrs['crs'] = crs if remove_grib2: H = [ds.copy(deep=True) for ds in H] os.remove(grib2file) if len(H) == 1: H = H[0] else: warnings.warn( '⚠ ALERT! Could not load grib2 data into a single xarray Dataset. You might consider refining your `searchString` if you are getting data you do not need.' ) return H
def le_indice_onda(modelo, tempo, parametro): if modelo == 'ERA5': ''' REading index file for index as txt ''' findex = '/home/aline/Documents/Dados/indices/calculados/index_era5.txt' pseudo_pcs = pd.read_csv(findex, header=0, parse_dates=True, index_col='time', names=['time', 'indice']) # selecionando 30 anos do ERA5 que representa o tempo presente: # de 1980 a 2020 # reading wave file fwave = '/home/aline/Documents/Dados/ERA5/montly_mean_1979_2020.grib' dwave = cfgrib.open_datasets(fwave)[0] if parametro == 'WND': fwave = '/home/aline/Documents/Dados/ERA5/wind_monthly_1979_2020.grib' dwave = cfgrib.open_datasets(fwave)[0] dwave = dwave.sel(time=slice('1980-01-01', '2009-12-01'), latitude=slice(90, 20), longitude=slice(-101, 35)) elif modelo == 'CMIP5': indexpath = '/home/aline/Documents/Dados/Jerry/GEOPOT_1000hPa/' #mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/' + parametro if parametro == 'swh': mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/Hs/' #fwave = mypath + 'presente_hs_mean_mensal2.nc' if tempo == 'presente': fwave = mypath + 'mensalmean_1980_2009.nc' elif tempo == 'futuro': fwave = mypath + 'mensalmean_2070_2099.nc' #fwave = mypath + 'futuro_hs_mean_mensal2.nc' dwave = xr.open_dataset(fwave).rename({'hs': 'swh'}) elif parametro == 'mwp': mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/T02/' #fwave = mypath + 'presente_hs_mean_mensal2.nc' if tempo == 'presente': fwave = mypath + 'mensalmean_1980_2009.nc' elif tempo == 'futuro': fwave = mypath + 'mensalmean_2070_2099.nc' #fwave = mypath + 'futuro_hs_mean_mensal2.nc' dwave = xr.open_dataset(fwave).rename({'t02': 'mwp'}) elif parametro == 'mwd': mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/DIR/' #fwave = mypath + 'presente_hs_mean_mensal2.nc' if tempo == 'presente': fwave = mypath + 'mensalmean_1980_2009.nc' elif tempo == 'futuro': fwave = mypath + 'mensalmean_2070_2099.nc' #fwave = mypath + 'futuro_hs_mean_mensal2.nc' dwave = xr.open_dataset(fwave).rename({'dir': 'mwd'}) elif parametro == 'WND': mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/WND/' #fwave = mypath + 'presente_hs_mean_mensal2.nc' if tempo == 'presente': fwave = mypath + 'mensalmean_1980_2009.nc' elif tempo == 'futuro': fwave = mypath + 'mensalmean_2070_2099.nc' #fwave = mypath + 'futuro_hs_mean_mensal2.nc' dwave = xr.open_dataset(fwave).rename({ 'uwnd': 'u10', 'vwnd': 'v10' }) pseudo_pcs = xr.open_dataset(indexpath + 'index_historical1.nc') pseudo_pcs = pseudo_pcs.rename( {'__xarray_dataarray_variable__': 'indice'}) pseudo_pcs = pd.DataFrame(data=pseudo_pcs.indice.values, index=pseudo_pcs.time.values, columns=['indice']) # precisei fazer essas transformacoes para calcular a correlacao # nao estava identificando a dimensao time na serie ppcs (estava como index) # e nao sei por qual motivo estava com algumas medias mensais com o # indice no segundo dia do mes... datetime_indices = { 'YEAR': pseudo_pcs.index.year, 'MONTH': pseudo_pcs.index.month, 'DAY': np.ones(len(pseudo_pcs), dtype=int) } pseudo_pcs.index = pd.to_datetime(datetime_indices) pseudo_pcs.index.name = 'time' return pseudo_pcs, dwave
pathTo = path.replace( '/D2', '/_D2' ) #rename a file so it doesn't get processed by another instance dbutils.fs.mv(path, pathTo) path = pathTo.replace('dbfs:', '/dbfs') print(path) break # COMMAND ---------- import cfgrib import xarray as xr ds = cfgrib.open_datasets(path, backend_kwargs={ 'read_keys': ['pv'], 'indexpath': '' }) # COMMAND ---------- import pandas as pd x = ds[0].t.attrs['GRIB_pv'] nl = len(x) if nl == 184: #91 vertical levels a = dict(zip(range(92), list(x[0:92]))) b = dict(zip(range(92), list(x[92:184]))) elif nl == 276: #137 vertical levels a = dict(zip(range(138), list(x[0:138])))
print('invalid log level: {}, setting to INFO by default'.format(LOGLEVEL), file=sys.stderr) logger.setLevel(logging.INFO) logging.basicConfig() # Create the inserter from this config inserter = insert.InsertGFS(windb2, windb2_config) # Insert the file, domainKey should be None if it wasn't set, which will create a new domain for var in windb2_config.config['vars']: var_config = windb2_config.config['vars'][var] if isinstance(var_config['insert'], list): # will fail if insert does not exist # Debug logger.debug(cfgrib.open_datasets(args.gribfile)) # Calculate the level required backend_kwargs = { 'filter_by_keys': { 'typeOfLevel': var_config['cfgribTypeOfLevel'] } } # if var_config['insert'][0] != 0: backend_kwargs['filter_by_keys']['level'] = var_config['insert'][0] # Open the GRIB2 file using cfgrib logger.debug('Trying to open variable: {}'.format(var)) with xarray.open_dataset(args.gribfile, engine='cfgrib', backend_kwargs=backend_kwargs) as gribfile: