def grib_url(reftime, forecast): '''The URL to a GRIB for the given reference and forecast times. This method resolves the URL from one of two sources. The production NAM forecasts are hosted by the National Centers for Environmental Prediction (NCEP, <https://www.ncep.noaa.gov/>). After seven days, the forecasts are moved to an eleven month archive hosted by the National Climatic Data Center (NCDC, <https://www.ncdc.noaa.gov/>). Older forecasts will resolve to the NCDC URL, but they are unlikely to exist. Arguments: reftime (timestamp): The reference time. forecast (int): The forecast hour. Returns: str: A URL to a GRIB file. ''' reftime = apollo.Timestamp(reftime).floor('6h') now = apollo.Timestamp('now').floor('6h') delta = now - reftime if pd.Timedelta(7, 'd') < delta: url_fmt = ARCHIVE_URL else: url_fmt = PROD_URL return url_fmt.format(ref=reftime, forecast=forecast)
def open_range(start, stop='now', on_miss='skip', **kwargs): '''Open a forecast for a range of reference times. Arguments: start (timestamp): The first time in the range. stop (timestamp): The last time in the range. on_miss (str): Determines the behavior on a cache miss: - ``'raise'``: Raise a :class:`CacheMiss` exception. - ``'download'``: Attempt to download the forecast. - ``'skip'``: Skip missing forecasts. **kwargs: Additional keyword arguments are forwarded to :func:`download`. Returns: xarray.Dataset: A single dataset containing all forecasts at the given reference times. ''' start = apollo.Timestamp(start).floor('6h') stop = apollo.Timestamp(stop).floor('6h') reftimes = pd.date_range(start, stop, freq='6h') return open(reftimes, on_miss=on_miss, **kwargs)
def open(start='2016-01-01', stop='now'): '''Load Georgia Power irradiance data between two timestamps. Args: start (timestamp-like): The Timestamp of the first reftime which should be read. The default is January 1st, 2016. We have no earlier data. stop (timestamp-like): The Timestamp of the last reftime which will be read. The default is the current time when the function is called. Returns: pd.DataFrame: The contents of the query. ''' start = apollo.Timestamp(start) stop = apollo.Timestamp(stop) with connect() as con: df = pd.read_sql_query(f''' SELECT * FROM IRRADIANCE WHERE TIMESTAMP BETWEEN {start.timestamp()} AND {stop.timestamp()} ''', con=con, index_col='TIMESTAMP', parse_dates=['TIMESTAMP']) df.index.name = 'time' df.index = df.index.tz_localize('UTC') return df
def open(reftimes='now', on_miss='raise', **kwargs): '''Open a forecast for one or more reference times. Arguments: reftimes (timestamp or sequence): The reference time(s) to open. The default is to load the most recent forecast. on_miss ('raise' or 'download' or 'skip'): Determines the behavior on a cache miss: - ``'raise'``: Raise a :class:`CacheMiss` exception. - ``'download'``: Attempt to download the missing forecast. - ``'skip'``: Skip missing forecasts. This mode will raise a :class:`CacheMiss` exception only if the resulting dataset would be empty. **kwargs: Additional keyword arguments are forwarded to :func:`download`. Returns: xarray.Dataset: A single dataset containing all forecasts at the given reference times. ''' if not on_miss in ('raise', 'download', 'skip'): raise ValueError(f"Unknown cache miss strategy: {repr(on_miss)}") try: reftimes = [apollo.Timestamp(reftimes).floor('6h')] except TypeError: reftimes = [apollo.Timestamp(r).floor('6h') for r in reftimes] paths = [] for reftime in reftimes: path = nc_path(reftime) if path.exists(): paths.append(path) elif on_miss == 'download': download(reftime) paths.append(path) elif on_miss == 'skip': continue else: raise CacheMiss(f'Missing forecast for reftime {reftime}') if len(paths) == 0: raise CacheMiss('No applicable forecasts were found') ds = _open_dataset(paths) # Reconstruct `time` dimension by combining `reftime` and `forecast`. # - `reftime` is the time the forecast was made. # - `forecast` is the offset of the data relative to the reftime. # - `time` is the time being forecasted. time = ds.reftime + ds.forecast ds = ds.assign_coords(time=time) return ds
def get_times(args): '''Load the target timestamps from CLI arguments. ''' import pandas as pd import apollo start = apollo.Timestamp(args.start).floor('1H') if args.stop is None: stop = start + pd.Timedelta(24, 'H') else: stop = apollo.Timestamp(args.stop).floor('1H') return apollo.date_range(start, stop, freq='1H')
def reftimes(args): '''Iterate over the reftimes specified by the command-line arguments. Yields: Timestamp: A timestamp for the reftime. ''' import pandas as pd import apollo import logging logger = logging.getLogger(__name__) # The ``reftime`` mode gives a single reftime. if args.reftime is not None: reftime = apollo.Timestamp(args.reftime) logger.info(f'selected the forecast for reftime {reftime}') yield reftime # The ``range`` mode gives the reftime between two inclusive endpoints. elif args.range is not None: start = apollo.Timestamp(args.range[0]) stop = apollo.Timestamp(args.range[1]) step = pd.Timedelta(6, 'h') logger.info( f'selected the forecasts between {start} and {stop} (inclusive)') while start <= stop: yield start start += step # The ``count`` mode downloads the N most recent reftimes. elif args.count is not None: n = args.count reftime = apollo.Timestamp('now').floor('6h') step = pd.Timedelta(6, 'h') logger.info( f'selected the {n} most recent forecasts (ending at {reftime})') for _ in range(n): yield reftime reftime -= step # The default is to use the most recent reftime. else: reftime = apollo.Timestamp('now').floor('6h') logger.info(f'selected the most recent forecast ({reftime})') yield reftime
def main(argv=None): from pathlib import Path import apollo from apollo import nam import logging logger = logging.getLogger(__name__) args = parse_args(argv) logger.debug('called with the following options:') for arg, val in vars(args).items(): logger.debug(f' {arg}: {val}') now = apollo.Timestamp('now') for (a, b) in dataset_pairs(args): time_a = apollo.Timestamp(a.reftime.data[0]).floor('6h') time_b = apollo.Timestamp(b.reftime.data[0]).floor('6h') vars_a = set(a.variables.keys()) vars_b = set(b.variables.keys()) path_a = nam.nc_path(time_a) path_backup = Path(f'{path_a}.bak') if vars_a - vars_b: diff = list(vars_a - vars_b) print(f'Variables found for {time_a} but not for {time_b}: {diff}') if not args.dry_run: fix = input(f'Delete these variables from {time_a} [y/N]? ') if fix.upper().startswith('Y'): logger.info(f'backing up dataset to {path_backup}') path_a.rename(path_backup) logger.info(f'deleting spurious variables from {path_a}') history = a.attrs['history'] if not history.endswith('\n'): history += '\n' for var in diff: history += f'{now.isoformat()} Delete variable {var}\n' ds = a.load() ds = ds.assign_attrs(history=history) ds = ds.drop(diff) ds.to_netcdf(path_a) assert path_a.exists()
def nc_path(reftime): '''The path to a netCDF for the given reference time. NetCDF files are generated after forecasts are processed from the raw GRIB data. This file does not necessarily exist. Arguments: reftime (timestamp): The reference time. Returns: pathlib.Path: The local path to a netCDF file, which may not exist. ''' reftime = reftime = apollo.Timestamp(reftime).floor('6h') prefix = f'nam.{reftime.year:04d}{reftime.month:02d}{reftime.day:02d}' filename = f'nam.t{reftime.hour:02d}z.awphys.tm00.nc' return apollo.path(f'NAM-NMM/{prefix}/{filename}')
def iter_available_forecasts(): '''Iterate over the reftimes of available forecasts. Yields: pandas.Timestamp: The forecast's reference time, with UTC timezone. ''' for day_dir in sorted(apollo.path('NAM-NMM').glob('nam.*')): name = day_dir.name # Formatted like "nam.20180528". year = int(name[4:8]) month = int(name[8:10]) day = int(name[10:12]) for path in sorted(day_dir.glob('nam.*')): name = path.name # Formatted like "nam.t18z.awphys.tm00.nc". if not name.endswith('.nc'): continue hour = int(name[5:7]) yield apollo.Timestamp(f'{year:04}-{month:02}-{day:02}T{hour:02}Z')
def grib_path(reftime, forecast): '''The path to a GRIB for the given reference and forecast times. GRIB forecasts are downloaded to this path and may be deleted once the forecast is processed into netCDF. This file does not necessarily exist. Arguments: reftime (timestamp): The reference time. forecast (int): The forecast hour. Returns: pathlib.Path: The local path for a GRIB file, which may not exist. ''' reftime = apollo.Timestamp(reftime).floor('6h') prefix_fmt = 'nam.{ref.year:04d}{ref.month:02d}{ref.day:02d}' filename_fmt = 'nam.t{ref.hour:02d}z.awphys{forecast:02d}.tm00.grib' prefix = prefix_fmt.format(forecast=forecast, ref=reftime) filename = filename_fmt.format(forecast=forecast, ref=reftime) return apollo.path(f'NAM-NMM/{prefix}/{filename}')
def _process_grib(ds, reftime, forecast): '''Process a forecast loaded from GRIB. GRIB files contain a forecast for a specific forecast hour at a specific reftime, including all NAM data variables for the entire NAM 218 grid. This method trims the dataset to the subset of variables and geographic region that we are interested in, normalizes variable names and shapes to a more consistent format, and adds additional metadata. Arguments: ds (xarray.Dataset): The dataset to process. reftime (timestamp): The reference time associated with the dataset. forecast (int): The forecast hour associated with the dataset. Returns: xarray.Dataset: A processed dataset. ''' features = { # Data variables 'DLWRF_P0_L1_GLC0': 'DLWRF_SFC', 'DSWRF_P0_L1_GLC0': 'DSWRF_SFC', 'PRES_P0_L1_GLC0': 'PRES_SFC', 'PRES_P0_L6_GLC0': 'PRES_MWSL', 'PRES_P0_L7_GLC0': 'PRES_TRO', 'TCDC_P0_L200_GLC0': 'TCC_EATM', 'TMP_P0_2L108_GLC0': 'TMP_SPDY', 'TMP_P0_L1_GLC0': 'TMP_SFC', 'TMP_P0_L100_GLC0': 'TMP_ISBL', 'TMP_P0_L103_GLC0': 'TMP_HTGL', 'TMP_P0_L7_GLC0': 'TMP_TRO', 'RH_P0_2L104_GLC0': 'RH_SIGY', 'RH_P0_2L108_GLC0': 'RH_SPDY', 'RH_P0_L100_GLC0': 'RH_ISBL', 'RH_P0_L4_GLC0': 'RH_0DEG', 'UGRD_P0_2L108_GLC0': 'UGRD_SPDY', 'UGRD_P0_L100_GLC0': 'UGRD_ISBL', 'UGRD_P0_L103_GLC0': 'UGRD_HTGL', 'UGRD_P0_L220_GLC0': 'UGRD_TOA', 'UGRD_P0_L6_GLC0': 'UGRD_MWSL', 'UGRD_P0_L7_GLC0': 'UGRD_TRO', 'VGRD_P0_2L108_GLC0': 'VGRD_SPDY', 'VGRD_P0_L100_GLC0': 'VGRD_ISBL', 'VGRD_P0_L103_GLC0': 'VGRD_HTGL', 'VGRD_P0_L220_GLC0': 'VGRD_TOA', 'VGRD_P0_L6_GLC0': 'VGRD_MWSL', 'VGRD_P0_L7_GLC0': 'VGRD_TRO', 'VIS_P0_L1_GLC0': 'VIS_SFC', 'LHTFL_P0_L1_GLC0': 'LHTFL_SFC', 'SHTFL_P0_L1_GLC0': 'SHTFL_SFC', 'REFC_P0_L200_GLC0': 'REFC_EATM', 'REFD_P0_L103_GLC0': 'REFD_HTGL', 'REFD_P0_L105_GLC0': 'REFD_HYBL', 'VVEL_P0_L100_GLC0': 'VVEL_ISBL', 'HGT_P0_L1_GLC0': 'HGT_SFC', 'HGT_P0_L100_GLC0': 'HGT_ISBL', 'HGT_P0_L2_GLC0': 'HGT_CBL', 'HGT_P0_L220_GLC0': 'HGT_TOA', 'HGT_P0_L245_GLC0': 'HGT_LLTW', 'HGT_P0_L4_GLC0': 'HGT_0DEG', 'PWAT_P0_L200_GLC0': 'PWAT_EATM', 'TKE_P0_L100_GLC0': 'TKE_ISBL', # Coordinate variables 'lv_HTGL1': 'z_HTGL1', 'lv_HTGL3': 'z_HTGL2', 'lv_HTGL6': 'z_HTGL3', 'lv_ISBL0': 'z_ISBL', 'lv_SPDL2': 'z_SPDY', 'xgrid_0': 'x', 'ygrid_0': 'y', 'gridlat_0': 'lat', 'gridlon_0': 'lon', } unwanted = [k for k in ds.variables.keys() if k not in features] ds = ds.drop(unwanted) ds = ds.rename(features) # Subset the geographic region to a square area centered around Macon, GA. ds = ds.isel(y=slice(63, 223, None), x=slice(355, 515, None)) # Free memory from unused features and areas. ds = ds.copy(deep=True) # Compute the coordinates for x and y x, y = proj_coords(ds.lat.data, ds.lon.data) x, y = x[0, :], y[:, 0] ds = ds.assign_coords(x=x, y=y) # Add a z dimension to variables that don't have one. for v in ds.data_vars: if ds[v].dims == ('y', 'x'): layer = ds[v].name.split('_')[1] ds[v] = ds[v].expand_dims(f'z_{layer}') # Create reftime and forecast dimensions. # Both are stored as integers with appropriate units. # The reftime dimension is hours since the Unix epoch (1970-01-01 00:00). # The forecast dimension is hours since the reftime. reftime = apollo.Timestamp(reftime).floor('6h') epoch = apollo.Timestamp('1970-01-01 00:00') delta_seconds = int((reftime - epoch).total_seconds()) delta_hours = delta_seconds // 60 // 60 ds = ds.assign_coords( reftime=delta_hours, forecast=forecast, ) for v in ds.data_vars: ds[v] = ds[v].expand_dims(('reftime', 'forecast')) # Fix the z_SPDY coordinate. # The layer is defined in term of bounds above and below. # The dataset expresses this as three coordinates: the index, lower bound, and upper bound. # We kept the index and now replace the values to be the upper bound, in Pascals ds['z_SPDY'] = ds['z_SPDY'].assign_attrs( comment= 'The values give the upper bound of the layer, the lower bound is 3000 Pa less', ) ds['z_SPDY'].data = np.array([3000, 6000, 9000, 12000, 15000, 18000]) # Set metadata according to CF conventions # http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html metadata = { # Data Variables # TODO: The wind directions may be backwards, should be confirmed with NCEP. 'DLWRF_SFC': { 'standard_name': 'downwelling_longwave_flux', 'units': 'W/m^2' }, 'DSWRF_SFC': { 'standard_name': 'downwelling_shortwave_flux', 'units': 'W/m^2' }, 'HGT_0DEG': { 'standard_name': 'geopotential_height', 'units': 'gpm' }, 'HGT_CBL': { 'standard_name': 'geopotential_height', 'units': 'gpm' }, 'HGT_ISBL': { 'standard_name': 'geopotential_height', 'units': 'gpm' }, 'HGT_LLTW': { 'standard_name': 'geopotential_height', 'units': 'gpm' }, 'HGT_TOA': { 'standard_name': 'geopotential_height', 'units': 'gpm' }, 'HGT_SFC': { 'standard_name': 'geopotential_height', 'units': 'gpm' }, 'PRES_MWSL': { 'standard_name': 'air_pressure', 'units': 'Pa' }, 'PRES_SFC': { 'standard_name': 'air_pressure', 'units': 'Pa' }, 'PRES_TRO': { 'standard_name': 'air_pressure', 'units': 'Pa' }, 'PWAT_EATM': { 'standard_name': 'atmosphere_water_vapor_content', 'units': 'kg/m^2' }, 'REFC_EATM': { 'standard_name': 'equivalent_reflectivity_factor', 'units': 'dBZ' }, 'REFD_HTGL': { 'standard_name': 'equivalent_reflectivity_factor', 'units': 'dBZ' }, 'REFD_HYBL': { 'standard_name': 'equivalent_reflectivity_factor', 'units': 'dBZ' }, 'RH_0DEG': { 'standard_name': 'relative_humidity', 'units': '%' }, 'RH_ISBL': { 'standard_name': 'relative_humidity', 'units': '%' }, 'RH_SIGY': { 'standard_name': 'relative_humidity', 'units': '%' }, 'RH_SPDY': { 'standard_name': 'relative_humidity', 'units': '%' }, 'LHTFL_SFC': { 'standard_name': 'upward_latent_heat_flux', 'units': 'W/m2' }, 'SHTFL_SFC': { 'standard_name': 'upward_sensible_heat_flux', 'units': 'W/m2' }, 'TCC_EATM': { 'standard_name': 'cloud_area_fraction', 'units': '%' }, 'TKE_ISBL': { 'standard_name': 'atmosphere_kinetic_energy_content', 'units': 'J/kg' }, 'TMP_HTGL': { 'standard_name': 'air_temperature', 'units': 'K' }, 'TMP_ISBL': { 'standard_name': 'air_temperature', 'units': 'K' }, 'TMP_SFC': { 'standard_name': 'air_temperature', 'units': 'K' }, 'TMP_SPDY': { 'standard_name': 'air_temperature', 'units': 'K' }, 'TMP_TRO': { 'standard_name': 'air_temperature', 'units': 'K' }, 'UGRD_HTGL': { 'standard_name': 'eastward_wind', 'units': 'm/s' }, 'UGRD_ISBL': { 'standard_name': 'eastward_wind', 'units': 'm/s' }, 'UGRD_MWSL': { 'standard_name': 'eastward_wind', 'units': 'm/s' }, 'UGRD_TOA': { 'standard_name': 'eastward_wind', 'units': 'm/s' }, 'UGRD_SPDY': { 'standard_name': 'eastward_wind', 'units': 'm/s' }, 'UGRD_TRO': { 'standard_name': 'eastward_wind', 'units': 'm/s' }, 'VGRD_HTGL': { 'standard_name': 'northward_wind', 'units': 'm/s' }, 'VGRD_ISBL': { 'standard_name': 'northward_wind', 'units': 'm/s' }, 'VGRD_MWSL': { 'standard_name': 'northward_wind', 'units': 'm/s' }, 'VGRD_TOA': { 'standard_name': 'northward_wind', 'units': 'm/s' }, 'VGRD_SPDY': { 'standard_name': 'northward_wind', 'units': 'm/s' }, 'VGRD_TRO': { 'standard_name': 'northward_wind', 'units': 'm/s' }, 'VIS_SFC': { 'standard_name': 'visibility', 'units': 'm' }, 'VVEL_ISBL': { 'standard_name': 'vertical_air_velocity_expressed_as_tendency_of_pressure', 'units': 'Pa/s' }, # Coordinates # I couldn't find standard names for all of the layers... # I'm not sure if both forecast and reftime should be marked as axis T... 'x': { 'axis': 'X', 'standard_name': 'projection_x_coordinate', 'units': 'm' }, 'y': { 'axis': 'Y', 'standard_name': 'projection_y_coordinate', 'units': 'm' }, 'z_CBL': { 'axis': 'Z', 'standard_name': 'cloud_base' }, 'z_HYBL': { 'axis': 'Z', 'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate' }, 'z_TOA': { 'axis': 'Z', 'standard_name': 'toa' }, 'z_SFC': { 'axis': 'Z', 'standard_name': 'surface' }, 'z_SIGY': { 'axis': 'Z', 'standard_name': 'atmosphere_sigma_coordinate' }, 'z_TRO': { 'axis': 'Z', 'standard_name': 'tropopause' }, 'z_SPDY': { 'axis': 'Z', 'long_name': 'specified pressure difference', 'units': 'Pa' }, 'z_HTGL1': { 'axis': 'Z', 'long_name': 'fixed_height_above_ground', 'units': 'm' }, 'z_HTGL2': { 'axis': 'Z', 'long_name': 'fixed_height_above_ground', 'units': 'm' }, 'z_HTGL3': { 'axis': 'Z', 'long_name': 'fixed_height_above_ground', 'units': 'm' }, 'z_ISBL': { 'axis': 'Z', 'long_name': 'isobaric_level', 'units': 'Pa' }, 'z_0DEG': { 'axis': 'Z', 'long_name': '0_degree_C_isotherm' }, 'z_EATM': { 'axis': 'Z', 'long_name': 'entire_atmosphere' }, 'z_LLTW': { 'axis': 'Z', 'long_name': 'lowest_level_of_the_wet_bulb_zero' }, 'z_MWSL': { 'axis': 'Z', 'long_name': 'max_wind_surface_layer' }, 'forecast': { 'axis': 'T', 'standard_name': 'forecast_period', 'units': 'hours' }, 'reftime': { 'axis': 'T', 'standard_name': 'forecast_reference_time', 'units': 'hours since 1970-01-01T00:00' }, 'lat': { 'standard_name': 'latitude', 'units': 'degree_north' }, 'lon': { 'standard_name': 'longitude', 'units': 'degree_east' }, } for v in metadata: ds[v] = ds[v].assign_attrs(metadata[v]) now = apollo.Timestamp('now') ds.attrs[ 'title'] = 'NAM-UGA, a subset of NAM-NMM for solar forecasting research in Georgia' ds.attrs[ 'history'] = f'{now.isoformat()} Initial conversion from GRIB files released by NCEP\n' ds = xr.decode_cf(ds) return ds
def nam_figure(xrds, feature, reftime=0, forecast=0, level=0, title=None, detail='states', scale='10m', cmap='viridis', **kwargs): '''Plot a variable from a NAM forecast on a new figure. This function downloads shape files to draw the map. This may take a while the first time you use a specific combination of ``detail`` and ``scale``. Arguments: xrds (xarray.Dataset): The dataset of NAM data containing the feature to plot. feature (str): The name pf the feature being plotted. reftime (int or timestamp): The reference time of the data being plotted. If given as an integer, it is interpreted as an index along the reftime axis. Otherwise, it is interpreted as a :class:`pandas.Timestamp` naming the reftime. forecast (int): The forecast hour of the data being plotted. level (int): The index along the z-axis of the data to plot. title (str or None): The title of the figure. The default title combines the reftime and forecast hour. detail (str): The level of detail of the map. Recognized values from most to least detailed include ``'states'``, ``'countries'``, ``'coastlines'``. scale (str): The scale of the map details. The value ``'110m'`` means a scale of 1:110,000,000 thus smaller values yield greater detail. Recognized values from most to least detailed include ``'10m'``, ``'50m'``, and ``'110m'``. cmap (matplotlib.colors.Colormap or str or None): The colormap for the plot. **kwargs: Forwarded to :meth:`xarray.DataArray.plot.contourf`. Returns: matplotlib.figure.Figure: The figure that was drawn. ''' from apollo import nam # Select the feature data = xrds[feature] # Select along the reftime, forecast, and z dimensions. if 'forecast' in data.dims: data = data.isel(forecast=forecast) if 'reftime' in data.dims: if isinstance(reftime, int): data = data.isel(reftime=reftime) else: data = data.sel(reftime=reftime) if len(data.dims) == 3: z_dim = data.dims[0] data = data.isel({z_dim: level}) # Get the axes. fig = plt.figure() ax = plt.axes(projection=nam.NAM218) # Plot the data. contours = data.plot.contourf(ax=ax, transform=nam.NAM218, cmap=cmap) # Draw the map. feature = MAP_FEATURES[detail].with_scale(scale) ax.add_feature(feature, edgecolor='black', facecolor='none') ax.set_global() ax.autoscale() # Set the title. if title is None: reftime_iso = apollo.Timestamp(data.reftime.data).isoformat() plt.title(f'{reftime_iso}Z + {forecast} hours') else: plt.title(title) return fig