Python open_datasetsの例、cfgrib.open_datasets Pythonの例

コード例 #1

0

ファイルを表示

ファイル: process_reforecast_data.py プロジェクト: mogismog/redissertation

def try_to_open_grib_file(path: str, ) -> xr.Dataset:
    """Try a few different ways to open up a grib file.

    Parameters
    ----------
    path : str
        Path pointing to location of grib file

    Returns
    -------
    ds : xr.Dataset
        The xarray Dataset that contains information
        from the grib file.
    """
    try:
        ds = xr.open_dataset(path, engine="cfgrib")
    except Exception as e:
        try:
            import cfgrib

            ds = cfgrib.open_datasets(path)
            ds = xr.combine_by_coords(ds)
        except:
            logger.error(f"Oh no! There was a problem opening up {path}: {e}")
            return
    return ds

コード例 #2

0

ファイルを表示

def load_wholesale_gribs_to_xarray(filepattern, channels):

    #  load in all the wholesale filenumbers needed
    cfgrib.dataset.LOG.disabled = True
    datasets = {
        i: cfgrib.open_datasets(filepattern.format(i))
        for i in load_filenumbers
    }
    datasets = adhoc_merge(datasets, filepattern)
    cfgrib.dataset.LOG.disabled = False

    # rename channels from grib names to unique short names
    # also select t the channels we need
    dataset_list = []

    # for all the files we want variables from
    for whsl_num in load_filenumbers:
        file_channels_meta = channels_meta_data.loc[channels]\
                                    .query("wholesale_file_number==@whsl_num")
        # for all the chunks of this files we want variables from
        for i in file_channels_meta.index_after_loading.unique():
            # rename the variables to defined non-clashing names
            df_ = file_channels_meta.query("index_after_loading==@i")
            rename = {
                k: v
                for k, v in zip(df_.wholesale_index_variable_name, df_.index)
            }
            datasets[whsl_num][i] = datasets[whsl_num][i].rename(rename)[list(
                df_.index)]
            # gather the needed parts of the dataset
            dataset_list.append(datasets[whsl_num][i])

    # do the merge to a single dataset
    merged_datasets = xr.merge(dataset_list, compat='override')
    return merged_datasets

コード例 #3

0

ファイルを表示

def get_dset(vars_2d=[], vars_3d=[], f_times=0):
    if vars_2d or vars_3d:
        date_string, _ = get_run()
        urls = find_file_name(vars_2d=vars_2d,
                              vars_3d=vars_3d,
                              f_times=f_times)
        fils = download_extract_files(urls)
        # We cat the files on Linux and read the resulting grib, this is much
        # much faster!! But it will not work everywhere 
        if (type(fils) is list and len(fils) > 3): # multiple files extractor
            merged_file = '/tmp/'+date_string + '_' + '_'.join(vars_3d+vars_2d) +'.grib2'
            
            if os.path.exists(merged_file) == False:
                os.system('cat %s > %s' % (' '.join(fils), merged_file) )
            
            dss = cfgrib.open_datasets(merged_file)

            for i,_ in enumerate(dss):
                dss[i] = preprocess(dss[i])
            ds = xr.merge(dss)

        else:
            ds = xr.open_mfdataset(fils, engine='cfgrib', preprocess=preprocess,
                  combine="by_coords", concat_dim='step', parallel=False)

    return ds

コード例 #4

0

ファイルを表示

def main():
    args = getRuntimeArgs()
    run = Run.get_context()

    file_name = '_D2D05150000051703001'
    file_path = os.path.join(args.data_path, file_name)

    print(f'Data Path: {args.data_path}')
    print(f'Files in data path: {glob.glob(args.data_path)}')
    print(f'Filename: {file_name}')
    print(f'Full file path: {file_path}')

    ds = cfgrib.open_datasets(file_path,
                              backend_kwargs={
                                  'read_keys': ['pv'],
                                  'indexpath': ''
                              })

    x = ds[0].t.attrs['GRIB_pv']
    nl = len(x)

    if nl == 184:  #91 vertical levels
        a = dict(zip(range(92), list(x[0:92])))
        b = dict(zip(range(92), list(x[92:184])))
    elif nl == 276:  #137 vertical levels
        a = dict(zip(range(138), list(x[0:138])))
        b = dict(zip(range(138), list(x[138:276])))
    else:
        raise Exception("Cannot retrieve a,b parameters for vertical levels!")

    ab = pd.DataFrame(index=a.keys(), columns=['A'], data=a.values())
    ab['B'] = b.values()

コード例 #5

0

ファイルを表示

def pv(input_file):

    # Vars
    grib_vars = ['t','u','v']

    # Load a list of datasets, one for each variable we want
    ds_list = [cfgrib.open_datasets(input_file,backend_kwargs={'filter_by_keys':{'typeOfLevel':'isobaricInhPa','shortName':v},'indexpath':''}) for v in grib_vars]

    # Flatten the list of lists to a single list of datasets
    ds_flat = [x.sel(isobaricInhPa=x.isobaricInhPa[x.isobaricInhPa>=100.0].values) for ds in ds_list for x in ds]

    # Merge the variables into a single dataset
    ds = xr.merge(ds_flat)

    # Add pressure
    ds['p'] = xr.DataArray(ds.isobaricInhPa.values,dims=['isobaricInhPa'],coords={'isobaricInhPa':ds.isobaricInhPa.values},attrs={'units':'hPa'}).broadcast_like(ds['t'])

    # Calculate potential temperature
    ds['theta'] = mpcalc.potential_temperature(ds['p'].metpy.convert_units('Pa'),ds['t'])

    # Compute baroclinic PV
    ds['pv'] = mpcalc.potential_vorticity_baroclinic(ds['theta'],ds['p'].metpy.convert_units('Pa'),ds['u'],ds['v'],latitude=ds.latitude)/(1.0e-6)

    met_data = ds['pv'].sel(isobaricInhPa=slice(float(os.environ.get('PV_LAYER_MAX_PRESSURE',1000.0)),float(os.environ.get('PV_LAYER_MIN_PRESSURE',100.0)))).mean(axis=0).values

    return met_data

コード例 #6

0

ファイルを表示

ファイル: farray.py プロジェクト: aaTman/espr

 def load_forecast(self, subset_lat=None, subset_lon=None):
     try:
         flist = [
             n for n in glob.glob(f'{self.paths["data_store"]}/*')
             if self.stat in n and '.idx' not in n
         ]
         try:
             new_gefs = xr.open_mfdataset(
                 flist,
                 engine='cfgrib',
                 combine='nested',
                 concat_dim='time',
                 chunks={
                     'lat': 5,
                     'lon': 5
                 },
                 backend_kwargs=dict(filter_by_keys=self.key_filter,
                                     indexpath='')).compute()
         except OSError:
             raise FileNotFoundError(
                 'sprd files likely not in download folder, please check!')
     except KeyError:
         import cfgrib
         new_gefs = cfgrib.open_datasets(
             f'{self.paths["data_store"]}gefs_mean_000.grib2')
     subset_gefs = self._get_var(new_gefs)
     subset_gefs = self._rename_latlon(new_gefs)
     if subset_lat is not None and subset_lon is not None:
         subset_gefs = self._subset_latlon(subset_gefs, subset_lat,
                                           subset_lon)
     else:
         pass
     self.date = str(subset_gefs.time.values).partition('T')[0]
     subset_gefs = self._map(subset_gefs)
     return subset_gefs

コード例 #7

0

ファイルを表示

ファイル: read_grib.py プロジェクト: suelincl/nmc_met_io

def read_fnl_grib2(filename):
    """
    Read fnl analysis data file.
    
    Args:
        filename (string): file path name.

    Return:
        A list of xarray object.
    """

    return cfgrib.open_datasets(filename)

コード例 #8

0

ファイルを表示

def load_rainfall_values(file):
    datasets = cfgrib.open_datasets(file)
    print("executed open datasets")
    i = 0
    for cur in datasets:
        try:
            rainfall_values = cur['tp'].values
            print(f"Found at iteration {i}")
            break
        except:
            i += 1
    # rainfall_values = dataset[26]['tp'].values
    return rainfall_values

コード例 #9

0

ファイルを表示

ファイル: gfs_plumes.py プロジェクト: m-wessler/model-tools

def ingest_gfs(f):
    
    # print('Reading %s'%os.path.basename(f))

    datasets = cfgrib.open_datasets(f)

    keep_keys = ['tp', 'q', 't', 'u', 'v', 'absv', 'w', 'gh', 'r', 'd', 
                  'u10', 'v10', 'u100', 'v100', 't2m', 'd2m', 
                  'cape', 'prmsl', 'sp', 'orog', 'hpbl']

    sfc, iso = [], []

    for ds in datasets:

        key_match = np.array(list(ds.data_vars))[np.isin(list(ds.data_vars), keep_keys)]

        if len(key_match) > 0:

            dims = ds.dims.keys()
            coords = ds[key_match].coords

            if ('heightAboveGround' in coords) & ('heightAboveGround' not in dims):
                sfc.append(ds[key_match].drop('heightAboveGround'))

            elif 'isobaricInhPa' in coords:
                iso.append(ds[key_match])

            elif (('surface' in coords)|('meanSea' in coords)):
                sfc.append(ds[key_match])

            elif 'prmsl' in list(ds.data_vars):
                sfc.append(ds['prmsl'])

            else:
                pass

        else:
            pass
        
        ds.close()

    sfc = xr.merge(sfc).drop('t')
    iso = xr.merge(iso).rename({'isobaricInhPa':'level'})
    iso = iso.sel(level=iso.level[::-1])

    sfc['longitude'] = sfc['longitude'] - 360
    iso['longitude'] = iso['longitude'] - 360
    
    return [sfc, iso]

コード例 #10

0

ファイルを表示

    def __init__(self, datea, fhr, config):

        init    = dt.datetime.strptime(datea, '%Y%m%d%H')
        init_s  = init.strftime("%m%d%H%M")
        datef   = init + dt.timedelta(hours=fhr)
        datef_s = datef.strftime("%m%d%H%M")

        #  Construct the grib file dictionary for a particular forecast hour
        file_name = os.path.join(config['work_dir'], "E1E{0}{1}1".format(str(init_s), str(datef_s)))
        try:  
           ds = cfgrib.open_datasets(file_name)
           self.grib_dict = {}
           for d in ds:
              for tt in d:
                 if 'number' in d[tt].dims:
                    self.grib_dict.update({'{0}_pf'.format(tt): d[tt]})
                 else:
                    self.grib_dict.update({'{0}_cf'.format(tt): d[tt]})

        except IOError as exc:
           raise RuntimeError('Failed to open {0}'.format(file_name)) from exc

        #  This is a dictionary that maps from generic variable names to the name of variable in file
        self.var_dict = {'zonal_wind': 'u',           \
                         'meridional_wind': 'v',      \
                         'zonal_wind_10m': 'u10',      \
                         'meridional_wind_10m': 'v10', \
                         'geopotential_height': 'gh', \
                         'temperature': 't',          \
                         'relative_humidity': 'r',    \
                         'specific_humidity': 'q',    \
                         'sea_level_pressure': 'msl', \
                         'precipitation': 'tp'}

        for key in self.grib_dict:
           if np.max(self.grib_dict[key].coords['longitude']) > 180:
              self.grib_dict[key].coords['longitude']  = (self.grib_dict[key].coords['longitude'] + 180) % 360 - 180

        if '{0}_cf'.format(self.var_dict['specific_humidity']) in self.grib_dict:
           self.has_specific_humidity = True
        else:
           self.has_specific_humidity = False

        self.nens = int(self.grib_dict['gh_pf'].attrs['GRIB_totalNumber'])

コード例 #11

0

ファイルを表示

def convert_and_write_df(filepath):

    filename = os.path.basename(filepath)
    outfile = os.path.join(data_output_path,
                           "converted" + filename + ".parquet.gzip")

    print(f'Processing file {filename}')
    ds = cfgrib.open_datasets(filepath,
                              backend_kwargs={
                                  'read_keys': ['pv'],
                                  'indexpath': ''
                              })

    for i in range(len(ds)):
        outfile = os.path.join(
            data_output_path,
            "converted" + filename + str(i) + ".parquet.gzip")
        pdf = ds[i].to_dataframe()
        pdf.reset_index(inplace=True)
        pdf = pdf.drop(columns='step')
        print(f'Writing results to {outfile}')
        pdf.to_parquet(outfile, compression='gzip')

コード例 #12

0

ファイルを表示

ファイル: PCA_monthly.py プロジェクト: alinedefreitasocn/wave_modeling

# plt.figure()
# pcs.plot(color='DarkRed',
#        label='pcs',
#        linestyle='--')
# pseudo_pcs.sel(mode=0).plot(linestyle='-.',
#                            color='DarkBlue',
#                            label='pseudo pcs')
# plt.legend()
# plt.show(block=False)
""" Calculando EOF e PCs usando ERA5"""
# single file
# reading grib file with 1000 hpa geopotential height
dataDIR = '/home/aline/Documents/Dados/ERA5/geopotential_1979_2020.grib'

DS = cfgrib.open_datasets(dataDIR)[0]
DS = DS.assign(hgt=DS.z / 9.81)
DS = DS.assign(hgt_mean=DS.hgt.mean(axis=0))
DS = DS.assign(hgt_anomalie=DS.hgt - DS.hgt_mean)

# separando soh o HGT para ficar mais leve o calculo
hgt = DS['hgt'].to_dataset()

solver, eof1, var1 = calcEOF(hgt, 'hgt', 'latitude')
eof1 = (eof1.sel(mode=0) * (-1))

# Plot the leading EOF expressed as covariance in the European/Atlantic domain.
clevs = np.linspace(-50, 50, 12)
proj = ccrs.Orthographic(0, 90)
# ax = plt.axes(projection=proj)
plt.figure()

コード例 #13

0

ファイルを表示

ファイル: calculate_geopotential_height.py プロジェクト: alinedefreitasocn/wave_modeling

https://www.sciencedirect.com/science/article/pii/B9780128117149000061

https://www.sciencedirect.com/science/article/pii/B9780127329512500120
"""

import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import xarray as xr
import cfgrib

g = 9.81

file = '/home/aline/Documents/IST_investigation/ERA5/geopotential_1989_2019.grib'

DS = cfgrib.open_datasets(file)[0]
height = DS / g
Hmean = height.mean(dim='time')
anomalie = height - Hmean

# making plot area
ax1 = plt.subplot(projection=ccrs.Orthographic(0, 90))
DS.isel(time=0).z.plot(transform=ccrs.PlateCarree(),
                       subplot_kws={"projection": ccrs.Orthographic(0, 90)},
                       cmap='RdBu_r')
ax1.coastlines(zorder=3)
plt.title('Geopotencial 1989-01-01')
plt.savefig(
    '/home/aline/Documents/IST_investigation/ERA5/images/geopotencialt0.png')
plt.show(block=False)

コード例 #14

0

ファイルを表示

ファイル: correlation_index_AO2.py プロジェクト: alinedefreitasocn/wave_modeling

'''
******************************************************************************
******************************************************************************
******************************************************************************

                           WAVE CORRELATION
                           
******************************************************************************
******************************************************************************
******************************************************************************
                           
'''

# reading wave file
fwave = '/home/aline/Documents/Dados/ERA5/montly_mean_1979_2020.grib'
dwave = cfgrib.open_datasets(fwave)[0]

# cropping index time series to match data
index_crop = indices[slice(
    dwave.isel(time=0).time.values,
    dwave.isel(time=-1).time.values)]
index_crop = index_crop.id.to_xarray()

# selecting winter season for index data
index_winter = index_crop.sel(time=index_crop['time.season'] == 'DJF')
'''
# Compute the Pearson correlation coefficient between
# two DataArray objects along a shared dimension
'''
# correlacao = {'Hs': xr.corr(dwave.swh.round(3),
#                             index_crop.round(3), dim='time'),

コード例 #15

0

ファイルを表示

ファイル: archive.py プロジェクト: ephraims28/HRRR_archive_download

def xhrrr(DATE,
          searchString,
          fxx=0,
          *,
          DATE_is_valid_time=False,
          remove_grib2=True,
          add_crs=True,
          **download_kwargs):
    """
    Download HRRR data and return as an xarray Dataset (or Datasets)

    You may only request one `DATE` and one `fxx` (forecast lead time).

    .. note:: 
        See https://github.com/ecmwf/cfgrib/issues/187 for why there is
        a problem with reading multiple accumulated precipitation
        fields when searchString=':APCP:'.

    Parameters
    ----------
    DATE : datetime
        A single datetime object.
    searchString: string
        A string representing a field or fields from the GRIB2 file.
        See more details in ``download_hrrr`` docstring.

        Some examples:

        ============================= ===============================================
        ``searchString``              Messages that will be downloaded
        ============================= ===============================================
        ':TMP:2 m'                    Temperature at 2 m.
        ':TMP:'                       Temperature fields at all levels.
        ':UGRD:.* mb'                 U Wind at all pressure levels.
        ':500 mb:'                    All variables on the 500 mb level.
        ':APCP:'                      All accumulated precipitation fields.
        ':APCP:surface:0-[1-9]*'      Accumulated precip since initialization time
        ':APCP:surface:[1-9]*-[1-9]*' Accumulated precip over last hour
        ':UGRD:10 m'                  U wind component at 10 meters.
        ':(U|V)GRD:'                  U and V wind component at all levels.
        ':.GRD:'                      (Same as above)
        ':(TMP|DPT):'                 Temperature and Dew Point for all levels .
        ':(TMP|DPT|RH):'              TMP, DPT, and Relative Humidity for all levels.
        ':REFC:'                      Composite Reflectivity
        ':surface:'                   All variables at the surface.
        ============================= ===============================================

    fxx : int
        Forecast lead time. Default will get the analysis, F00.
    DATE_is_valid_time: bool
        False - (default) The DATE argument represents the model
                initialization datetime.
        True  - The DATE argument represents the model valid time.
                This is handy when you want a specific forecast leadtime
                that is valid at a certian date.
    remove_grib2 : bool
        True  - (default) Delete the GRIB2 file after reading into a Dataset.
                This requires a copy to memory, so it might slow things down.
        False - Keep the GRIB2 file downloaded.
                This might be a better option performance-wise, because it
                does not need to copy the data but keeps the file on disk.
                You would be responsible for removing files when you don't
                need them.
    add_crs : bool
        True  - (default) Add the Cartopy coordinate reference system (crs)
                projection as an attribute to the Dataset.
    **download_kwargs :
        Any other key word argument accepted by ``download_hrrr``.
            - model : {'hrrr', 'hrrrak', 'hrrrX'}
            - field : {'sfc', 'prs', 'nat', 'subh'}
            - save_dir : pathlib.Path
            - download_source_priority : a list of download sources
            - dryrun : bool
            - verbose : bool
    """
    # Convert DATE input to a pandas datetime (Pandas can parse some strings as dates.)
    DATE = pd.to_datetime(DATE)

    inputs = locals()

    assert not hasattr(
        DATE, '__len__'), "`DATE` must be a single datetime, not a list."
    assert not hasattr(
        fxx, '__len__'), "`fxx` must be a single integer, not a list."

    if DATE_is_valid_time:
        # Change DATE to the model run initialization DATE so that when we take
        # into account the forecast lead time offset, the the returned data
        # be valid for the DATE the user requested.
        DATE = DATE - timedelta(hours=fxx)

    # Download the GRIB2 file
    grib2file, url = download_hrrr(DATE,
                                   searchString,
                                   fxx=fxx,
                                   **download_kwargs)

    # Some extra backend kwargs for cfgrib
    backend_kwargs = {
        'indexpath': '',
        'read_keys': ['parameterName', 'parameterUnits'],
        'errors': 'raise'
    }

    # Use cfgrib.open_datasets, just in case there are multiple "hypercubes"
    # for what we requested.
    H = cfgrib.open_datasets(grib2file, backend_kwargs=backend_kwargs)

    # Create a cartopy projection object
    if add_crs:
        crs = get_crs(H[0])

    for ds in H:
        ds.attrs['history'] = inputs
        ds.attrs['url'] = url

        # CF 1.8 map projection information for the HRRR model
        # http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#_lambert_conformal

        ##
        ## I'm not sure why, but when I assign an attribute for the main Dataset
        ## with a cartopy.crs and then copy it (for the remove_grib2 case),
        ## the cartopy.crs is reset to the default proj4_params.
        ## This isn't an issue when I set cartopy.crs as an attribute for
        ## a variable (DataArray).
        ## For this reason, I will return the 'crs' as an attribute for
        ## each variable's DataArray. Later, I will add the 'crs' as
        ## a Dataset attribute for convenience.
        ##

        ds.attrs['grid_mapping_name'] = 'lambert_conformal_conic'
        ds.attrs['standard_parallel'] = (38.5, 38.5)
        ds.attrs['longitude_of_central_meridian'] = 262.5
        ds.attrs['latitude_of_projection_origin'] = 38.5

        # This is redundant, but I want every variable to also have the
        # map projection information...
        for var in list(ds):
            ds[var].attrs['grid_mapping_name'] = 'lambert_conformal_conic'
            ds[var].attrs['standard_parallel'] = (38.5, 38.5)
            ds[var].attrs['longitude_of_central_meridian'] = 262.5
            ds[var].attrs['latitude_of_projection_origin'] = 38.5
            if add_crs:
                ds[var].attrs['crs'] = crs

    if remove_grib2:
        # Load the data to memory before removing the file
        H = [ds.load() for ds in H]
        # Ok, now we can remove the grib2 file
        os.remove(grib2file)

    if len(H) == 1:
        H = H[0]
        # Add the cartopy map projection object as an attribute, for convenience.
        H.attrs['crs'] = crs
    else:
        warnings.warn(
            '⚠ ALERT! Could not load grib2 data into a single '
            'xarray Dataset. You might consider refining your '
            '`searchString` if you are getting data you do not need.')
        # Add the cartopy map projection object as an attribute, for convenience.
        for i in H:
            i.attrs['crs'] = crs

    return H

コード例 #16

0

ファイルを表示

ファイル: archive.py プロジェクト: blaylockbk/HRRR_archive_download

    def xarray(
        self,
        searchString=None,
        backend_kwargs={},
        remove_grib=True,
        **download_kwargs,
    ):
        """
        Open GRIB2 data as xarray DataSet

        Parameters
        ----------
        searchString : str
            Variables to read into xarray Dataset
        remove_grib : bool
            If True, grib file will be removed ONLY IF it didn't exist
            before we downloaded it.
        """

        download_kwargs = {**dict(overwrite=False), **download_kwargs}

        # Download file if local file does not exists
        local_file = self.get_localFilePath(searchString=searchString)

        # ! \/ This is critical...
        # Only remove file if it did n0t exists before we download it
        remove_grib = not local_file.exists() and remove_grib

        # ! \/ Fail-safe; Never remove a file if the source is 'local'
        if self.grib_source == "local":
            remove_grib = False

        if not local_file.exists() or download_kwargs["overwrite"]:
            self.download(searchString=searchString, **download_kwargs)

        # Backend kwargs for cfgrib
        backend_kwargs.setdefault("indexpath", "")
        backend_kwargs.setdefault(
            "read_keys", ["parameterName", "parameterUnits", "stepRange"])
        backend_kwargs.setdefault("errors", "raise")

        # Use cfgrib.open_datasets, just in case there are multiple "hypercubes"
        # for what we requested.
        Hxr = cfgrib.open_datasets(
            local_file,
            backend_kwargs=backend_kwargs,
        )

        # Get CF grid projection information with pygrib and pyproj because
        # this is something cfgrib doesn't do (https://github.com/ecmwf/cfgrib/issues/251)
        # NOTE: Assumes the projection is the same for all variables
        grib = pygrib.open(str(local_file))
        msg = grib.message(1)
        cf_params = CRS(msg.projparams).to_cf()

        # Funny stuff with polar stereographic (https://github.com/pyproj4/pyproj/issues/856)
        # TODO: Is there a better way to handle this? What about south pole?
        if cf_params["grid_mapping_name"] == "polar_stereographic":
            cf_params["latitude_of_projection_origin"] = cf_params.get(
                "latitude_of_projection_origin", 90)

        # Here I'm looping over each dataset in the list returned by cfgrib
        for ds in Hxr:
            # Add some details
            # ----------------
            ds.attrs["model"] = self.model
            ds.attrs["product"] = self.product
            ds.attrs["description"] = self.DESCRIPTION
            ds.attrs["remote_grib"] = self.grib
            ds.attrs["local_grib"] = local_file
            ds.attrs["searchString"] = searchString

            # Attach CF grid mapping
            # ----------------------
            # http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#appendix-grid-mappings
            ds["gribfile_projection"] = None
            ds["gribfile_projection"].attrs = cf_params
            ds["gribfile_projection"].attrs[
                "long_name"] = f"{self.model.upper()} model grid projection"

            # Assign this grid_mapping for all variables
            for var in list(ds):
                if var == "gribfile_projection":
                    continue
                ds[var].attrs["grid_mapping"] = "gribfile_projection"

        # ! DO NOT REMOVE GRIB FILES IF THE SOURCE IS LOCAL
        # ! (I know I already checked this; I am just so worried about erasing my local data)
        if self.grib_source != "local":
            if remove_grib:
                # Load the datasets into memory before removing the file
                Hxr = [ds.load() for ds in Hxr]
                _ = [ds.close() for ds in Hxr]

                # TODO:
                # Forcefully close the files so it can be removed
                # (this is a WindowsOS specific requirement).
                # os.close(?WHAT IS THE FILE HANDLER?)
                """
                https://docs.python.org/3/library/os.html#os.remove
                On Windows, attempting to remove a file that is in use
                causes an exception to be raised; on Unix, the directory
                entry is removed but the storage allocated to the file is
                not made available until the original file is no longer in
                use.
                >> HOW DO I COMPLETELY CLOSE THE FILE OPENED BY CFGRIB??
                """
                if not sys.platform == "win32":
                    # Removes file
                    local_file.unlink()
                else:
                    warnings.warn(
                        "sorry, on windows I couldn't remove the file.")

        if len(Hxr) == 1:
            return Hxr[0]
        else:
            # cfgrib returned multiple hypercubes.
            try:
                # Handle case where HRRR subh returns multiple hypercubes (see #73)
                data_vars = set(itertools.chain(*[list(i) for i in Hxr]))
                data_vars.remove("gribfile_projection")
                Hxr = xr.concat(Hxr, dim="step", data_vars=data_vars)
            except:
                print(
                    f"Note: Returning a list of [{len(Hxr)}] xarray.Datasets because cfgrib opened with multiple hypercubes."
                )
            return Hxr

コード例 #17

0

ファイルを表示

ファイル: HRRR_archive.py プロジェクト: meso2/HRRR_archive_download

def get_HRRR(DATE,
             searchString,
             *,
             fxx=0,
             DATE_is_valid_time=False,
             remove_grib2=True,
             add_crs=True,
             **download_kwargs):
    """
    Download HRRR data and return as an xarray Dataset (or Datasets)
    
    Only request one `DATE` and `fxx` (forecast lead time).
    
    Parameters
    ----------
    DATE : datetime
        A single datetime object.
    searchString: string
        A string representing a field or fields from the GRIB2 file.
        See more details in ``download_hrrr`` docstring.
        
        Some examples:
        
        ================ ===============================================
        ``searchString`` Messages that will be downloaded
        ================ ===============================================
        ':TMP:2 m'       Temperature at 2 m.
        ':TMP:'          Temperature fields at all levels.
        ':500 mb:'       All variables on the 500 mb level.
        ':APCP:'         All accumulated precipitation fields.
        ':UGRD:10 m'    U wind component at 10 meters.
        ':(U|V)GRD:'     U and V wind component at all levels.
        ':.GRD:'         (Same as above)
        ':(TMP|DPT):'    Temperature and Dew Point for all levels .
        ':(TMP|DPT|RH):' TMP, DPT, and Relative Humidity for all levels.
        ':REFC:'         Composite Reflectivity
        ':surface:'      All variables at the surface.
        ================ =============================================== 
        
    fxx : int
        Forecast lead time. Default will get the analysis, F00.
    DATE_is_valid_time: bool
        False - (default) The DATE argument represents the model 
                initialization datetime.
        True  - The DATE argument represents the model valid time.
                This is handy when you want a specific forecast leadtime
                that is valid at a certian date.
    remove_grib2 : bool
        True  - (default) Delete the GRIB2 file after reading into a Dataset.
                This requires a copy to memory, so it might slow things down.
        False - Keep the GRIB2 file downloaded.
                This might be a better option performance-wise, because it
                does not need to copy the data but keeps the file on disk.
                You would be responsible for removing files when you don't
                need them.
    add_crs : bool
        True  - (default) Append the Cartopy coordinate reference system (crs) 
                projection as an attribute to the Dataset.
    **download_kwargs :
        Any other key word argument accepted by ``download_HRRR`.
        {model, field, SAVEDIR, dryrun, verbose}
    """
    inputs = locals()

    assert not hasattr(
        DATE, '__len__'), "`DATE` must be a single datetime, not a list."
    assert not hasattr(
        fxx, '__len__'), "`fxx` must be a single integer, not a list."

    if DATE_is_valid_time:
        # Change DATE to the model run initialization DATE so that when we take
        # into account the forecast lead time offset, the the returned data
        # be valid for the DATE the user requested.
        DATE = DATE - timedelta(hours=fxx)

    # Download the GRIB2 file
    grib2file, url = download_HRRR(DATE,
                                   searchString,
                                   fxx=fxx,
                                   **download_kwargs)

    # Some extra backend kwargs for cfgrib
    backend_kwargs = {
        'indexpath': '',
        'read_keys': ['parameterName', 'parameterUnits'],
        'errors': 'raise'
    }

    # Use cfgrib.open_datasets, just in case there are multiple "hypercubes"
    # for what we requested.
    H = cfgrib.open_datasets(grib2file, backend_kwargs=backend_kwargs)

    # Create a cartopy projection object
    if add_crs:
        crs = get_crs(H[0])

    for ds in H:
        ds.attrs['get_HRRR inputs'] = inputs
        ds.attrs['url'] = url
        if add_crs:
            # Add the crs projection info as a Dataset attribute
            ds.attrs['crs'] = crs
            # ...and add attrs for each variable for ease of access.
            for var in list(ds):

                ds[var].attrs['crs'] = crs
    if remove_grib2:
        H = [ds.copy(deep=True) for ds in H]
        os.remove(grib2file)

    if len(H) == 1:
        H = H[0]
    else:
        warnings.warn(
            '⚠ ALERT! Could not load grib2 data into a single xarray Dataset. You might consider refining your `searchString` if you are getting data you do not need.'
        )

    return H

コード例 #18

0

ファイルを表示

def le_indice_onda(modelo, tempo, parametro):
    if modelo == 'ERA5':
        '''
        REading index file
        for index as txt
        '''
        findex = '/home/aline/Documents/Dados/indices/calculados/index_era5.txt'
        pseudo_pcs = pd.read_csv(findex,
                                 header=0,
                                 parse_dates=True,
                                 index_col='time',
                                 names=['time', 'indice'])

        # selecionando 30 anos do ERA5 que representa o tempo presente:
        # de 1980 a 2020
        # reading wave file
        fwave = '/home/aline/Documents/Dados/ERA5/montly_mean_1979_2020.grib'
        dwave = cfgrib.open_datasets(fwave)[0]

        if parametro == 'WND':
            fwave = '/home/aline/Documents/Dados/ERA5/wind_monthly_1979_2020.grib'
            dwave = cfgrib.open_datasets(fwave)[0]
        dwave = dwave.sel(time=slice('1980-01-01', '2009-12-01'),
                          latitude=slice(90, 20),
                          longitude=slice(-101, 35))
    elif modelo == 'CMIP5':
        indexpath = '/home/aline/Documents/Dados/Jerry/GEOPOT_1000hPa/'

        #mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/' + parametro

        if parametro == 'swh':
            mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/Hs/'
            #fwave = mypath + 'presente_hs_mean_mensal2.nc'
            if tempo == 'presente':
                fwave = mypath + 'mensalmean_1980_2009.nc'
            elif tempo == 'futuro':
                fwave = mypath + 'mensalmean_2070_2099.nc'
            #fwave = mypath + 'futuro_hs_mean_mensal2.nc'
            dwave = xr.open_dataset(fwave).rename({'hs': 'swh'})
        elif parametro == 'mwp':
            mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/T02/'
            #fwave = mypath + 'presente_hs_mean_mensal2.nc'
            if tempo == 'presente':
                fwave = mypath + 'mensalmean_1980_2009.nc'
            elif tempo == 'futuro':
                fwave = mypath + 'mensalmean_2070_2099.nc'
            #fwave = mypath + 'futuro_hs_mean_mensal2.nc'
            dwave = xr.open_dataset(fwave).rename({'t02': 'mwp'})
        elif parametro == 'mwd':
            mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/DIR/'
            #fwave = mypath + 'presente_hs_mean_mensal2.nc'
            if tempo == 'presente':
                fwave = mypath + 'mensalmean_1980_2009.nc'
            elif tempo == 'futuro':
                fwave = mypath + 'mensalmean_2070_2099.nc'
            #fwave = mypath + 'futuro_hs_mean_mensal2.nc'
            dwave = xr.open_dataset(fwave).rename({'dir': 'mwd'})
        elif parametro == 'WND':
            mypath = '/home/aline/Documents/Dados/Jerry/WW3_Marta/WND/'
            #fwave = mypath + 'presente_hs_mean_mensal2.nc'
            if tempo == 'presente':
                fwave = mypath + 'mensalmean_1980_2009.nc'
            elif tempo == 'futuro':
                fwave = mypath + 'mensalmean_2070_2099.nc'
            #fwave = mypath + 'futuro_hs_mean_mensal2.nc'
            dwave = xr.open_dataset(fwave).rename({
                'uwnd': 'u10',
                'vwnd': 'v10'
            })

        pseudo_pcs = xr.open_dataset(indexpath + 'index_historical1.nc')

        pseudo_pcs = pseudo_pcs.rename(
            {'__xarray_dataarray_variable__': 'indice'})
        pseudo_pcs = pd.DataFrame(data=pseudo_pcs.indice.values,
                                  index=pseudo_pcs.time.values,
                                  columns=['indice'])
        # precisei fazer essas transformacoes para calcular a correlacao
        # nao estava identificando a dimensao time na serie ppcs (estava como index)
        # e nao sei por qual motivo estava com algumas medias mensais com o
        # indice no segundo dia do mes...
        datetime_indices = {
            'YEAR': pseudo_pcs.index.year,
            'MONTH': pseudo_pcs.index.month,
            'DAY': np.ones(len(pseudo_pcs), dtype=int)
        }
        pseudo_pcs.index = pd.to_datetime(datetime_indices)
        pseudo_pcs.index.name = 'time'
    return pseudo_pcs, dwave

コード例 #19

0

ファイルを表示

        pathTo = path.replace(
            '/D2', '/_D2'
        )  #rename a file so it doesn't get processed by another instance
        dbutils.fs.mv(path, pathTo)
        path = pathTo.replace('dbfs:', '/dbfs')
        print(path)
        break

# COMMAND ----------

import cfgrib
import xarray as xr

ds = cfgrib.open_datasets(path,
                          backend_kwargs={
                              'read_keys': ['pv'],
                              'indexpath': ''
                          })

# COMMAND ----------

import pandas as pd

x = ds[0].t.attrs['GRIB_pv']
nl = len(x)

if nl == 184:  #91 vertical levels
    a = dict(zip(range(92), list(x[0:92])))
    b = dict(zip(range(92), list(x[92:184])))
elif nl == 276:  #137 vertical levels
    a = dict(zip(range(138), list(x[0:138])))

コード例 #20

0

ファイルを表示

    print('invalid log level: {}, setting to INFO by default'.format(LOGLEVEL),
          file=sys.stderr)
    logger.setLevel(logging.INFO)
logging.basicConfig()

# Create the inserter from this config
inserter = insert.InsertGFS(windb2, windb2_config)

# Insert the file, domainKey should be None if it wasn't set, which will create a new domain
for var in windb2_config.config['vars']:
    var_config = windb2_config.config['vars'][var]
    if isinstance(var_config['insert'],
                  list):  # will fail if insert does not exist

        # Debug
        logger.debug(cfgrib.open_datasets(args.gribfile))

        # Calculate the level required
        backend_kwargs = {
            'filter_by_keys': {
                'typeOfLevel': var_config['cfgribTypeOfLevel']
            }
        }
        # if var_config['insert'][0] != 0:
        backend_kwargs['filter_by_keys']['level'] = var_config['insert'][0]

        # Open the GRIB2 file using cfgrib
        logger.debug('Trying to open variable: {}'.format(var))
        with xarray.open_dataset(args.gribfile,
                                 engine='cfgrib',
                                 backend_kwargs=backend_kwargs) as gribfile: