def __init__( self, start_date: datetime = None, rnday: Union[int, float, timedelta] = timedelta(days=5.), product='medium_range_mem1', verbose=False, fallback=True, ): """This will download the latest National Water Model data. NetCDF files are saved to the system's temporary directory. The AWS data goes back 30 days. For requesting hindcast data from before we need a different data source """ self.start_date = nearest_cycle_date() if start_date is None \ else localize_datetime(start_date).astimezone(pytz.utc) self.rnday = rnday if isinstance(rnday, timedelta) \ else timedelta(days=rnday) self.product = product self.fallback = fallback # if the model start_date aligns to a "zero" with the NWM data, then # fetching the data is trivial self._files = {dt: None for dt in self.timevector} if self.start_date == nearest_cycle_date(self.start_date): self._fetch_data() # if they don't align then we need to inject a "zero" entry at the # beginning. My suggestion is to repeat the first value. Program will # raise if that is the case, so we can address this special case later. # TODO: Put a "zero" entry if start_date and NWM dates do not align. else: raise NotImplementedError( f'Model start_date={str(self.start_date)} is not a "pivot" ' 'time.')
def __init__(self, start_date=None, rnday=2, bbox=None): self.start_date = nearest_cycle_date() if start_date is None else \ localize_datetime(start_date).astimezone(pytz.utc) self.rnday = rnday if isinstance(rnday, timedelta) else \ timedelta(days=rnday) if self.start_date != nearest_cycle_date(self.start_date): raise NotImplementedError( 'Argment start_date is does not align with any HRRR cycle ' 'times.') self._files = {_: None for _ in np.arange( self.start_date, self.start_date + self.rnday + self.output_interval, self.output_interval ).astype(datetime)} for dt in self.pivot_times: if None not in list(self._files.values()): break base_url = BASE_URL + f'/{self.product}' + \ f'/hrrr{pivot_time(dt).strftime("%Y%m%d")}' # cycle for cycle in reversed(range(0, 24, int(self.output_interval.total_seconds() / 3600))): test_url = f'{base_url}/' + \ f'hrrr_sfc.t{cycle:02d}z' try: logger.info(f'Checking url: {test_url}') nc = Dataset(test_url) logger.info('Success!') except OSError as e: if e.errno == -70: print() continue elif e.errno == -73: nc = False def retry(): try: return Dataset(test_url) except Exception: return False while not isinstance(nc, Dataset): nc = retry() else: raise e file_dates = self.get_nc_datevector(nc) for _datetime in reversed(list(self._files.keys())): if _datetime in file_dates: if self._files[_datetime] is None: self._files[_datetime] = nc if not any(nc is None for nc in self._files.values()): break missing_records = [dt for dt, nc in self._files.items() if nc is None] if len(missing_records) > 0: raise ValueError(f'No HRRR data for dates: {missing_records}.') self._bbox = self._modified_bbox(bbox)
def get_nc_datevector(self, nc): try: base_date = localize_datetime( datetime.strptime( nc['time'].minimum.split('z')[-1], '%d%b%Y')) + timedelta( hours=float(nc['time'].minimum.split('z')[0])) return np.arange( base_date + self.output_interval, base_date + len(nc['time'][:]) * self.output_interval, self.output_interval).astype(datetime) except RuntimeError: return self.get_nc_datevector(nc)
def add_data( self, time: datetime, element_id: str, flow: float, temperature: float = np.nan, salinity: float = np.nan, ): time = localize_datetime(time).astimezone(pytz.utc) data_for_element = self._data.get(time, {}).get('element_id', {}) # TODO: What happens if we have two different flows that both are # assigned to the same element? Example: 100 m^3/s @ 1 psu then # another flow on the same element of 1 m^3/s @ 100 psu. How do we # combine these on a single element? Flow is just simple summation, _tmp = data_for_element.get('temperature', np.nan) if not np.isnan(_tmp): if _tmp != temperature: raise NotImplementedError( 'Two different values of temperature for same ' 'time/element.') _sal = data_for_element.get('salinity', np.nan) if not np.isnan(_sal): if _sal != salinity: raise NotImplementedError( 'Two different values of salinity for same time/element.') self._data.setdefault(time, {}).setdefault(element_id, {}).update( { 'flow': np.nansum( [data_for_element.get('flow', np.nan), flow]), 'temperature': temperature, 'salinity': salinity }) if hasattr(self, '_df'): del self._df
def start_date(self, start_date): self._start_date = start_date if start_date is not None: self._start_date = localize_datetime( start_date).astimezone(pytz.utc) return self._start_date
def __call__(self, model_driver, nramp_ss: bool = False, dramp_ss=None, nprocs=-1): """Initializes the NWM data. Used by :class:`pyschism.driver.ModelDriver` Will pick the "best" data source based on start date and rnday. The are fringe cases not yet covered, for example when the data spans more than 1 data source. """ super().__init__(model_driver.param.opt.start_date, model_driver.param.core.rnday) logger.info('Initializing NationalWaterModel data.') pairings = NWMElementPairings(model_driver.model_domain.hgrid) # start_date = model_driver.param.opt.start_date # rnday = model_driver.param.core.rnday # forecast if self.start_date >= pivot_time() - timedelta(days=30): logger.info('Fetching NWM data.') # (self._pairings, h0, nprocs) inventory = AWSDataInventory(start_date=self.start_date, rnday=self.rnday, product='medium_range_mem1', verbose=False) logger.info('Launching streamflow lookup...') source_indexes, sinks_indexes = inventory.get_nc_pairing_indexes( pairings) start = time() with Pool(processes=cpu_count()) as pool: sources = pool.starmap(streamflow_lookup, [(file, source_indexes) for file in inventory.files]) sinks = pool.starmap(streamflow_lookup, [(file, sinks_indexes) for file in inventory.files]) pool.join() logger.info(f'streamflow lookup took {time()-start}...') # Pass NWM data to Hydrology class. logging.info('Generating per-element hydrologic timeseries...') start = time() hydro = Hydrology(self.start_date, self.rnday) for i, file in enumerate(inventory.files): nc = Dataset(file) _time = localize_datetime( datetime.strptime(nc.model_output_valid_time, "%Y-%m-%d_%H:%M:%S")) for j, element_id in enumerate(pairings.sources.keys()): hydro.add_data(_time, element_id, sources[i][j], -9999, 0.) for k, element_id in enumerate(pairings.sinks.keys()): hydro.add_data(_time, element_id, -sinks[i][k]) logging.info('Generating per-element hydrologic timeseries took ' f'{time() - start}.') # hindcast else: raise NotImplementedError('Hindcast is not implemented 30 days.') # aggregate timeseries if self.aggregation_radius is not None: aggregation_radius = float(self.aggregation_radius) logging.info('Aggregating hydrology timeseries/elements using a ' f'radius of {aggregation_radius} meters.') start = time() hydro.aggregate_by_radius(model_driver.model_domain.hgrid, aggregation_radius) logging.info(f'Aggregating NWM elements took {time() - start}.') # turn 'on' the source/sink system in SCHISM. model_driver.param.opt.if_source = 1 # set the ramps if applicable, no ramp by default. if int(nramp_ss) != 0: # nramp_ss = 1 # needed if if_source=1; ramp-up flag for # source/sinks model_driver.param.opt.nramp_ss = nramp_ss # dramp_ss = 2 # needed if if_source=1; ramp-up period in days if dramp_ss is not None: model_driver.param.opt.dramp_ss = dramp_ss
def fetch_data( self, start_date: datetime = None, rnday: Union[float, timedelta] = 4, air: bool = True, prc: bool = True, rad: bool = True, bbox: Bbox = None, ): """Fetches HRRR data from NOMADS server. """ logger.info('Fetching HRRR data.') self.start_date = nearest_cycle_date() if start_date is None else \ localize_datetime(start_date).astimezone(pytz.utc) self.rnday = rnday if isinstance(rnday, timedelta) else \ timedelta(days=rnday) inventory = HRRRInventory( self.start_date, self.rnday + self.output_interval, bbox ) nx_grid, ny_grid = inventory.xy_grid() if air is True: with Dataset( self.tmpdir / f"air_{inventory.product}_" f"{str(self.start_date)}.nc", 'w', format='NETCDF3_CLASSIC' ) as dst: # global attributes dst.setncatts({"Conventions": "CF-1.0"}) # dimensions dst.createDimension('nx_grid', nx_grid.shape[1]) dst.createDimension('ny_grid', ny_grid.shape[0]) dst.createDimension('time', None) # variables # lon dst.createVariable('lon', 'f4', ('ny_grid', 'nx_grid')) dst['lon'].long_name = "Longitude" dst['lon'].standard_name = "longitude" dst['lon'].units = "degrees_east" dst['lon'][:] = nx_grid # lat dst.createVariable('lat', 'f4', ('ny_grid', 'nx_grid')) dst['lat'].long_name = "Latitude" dst['lat'].standard_name = "latitude" dst['lat'].units = "degrees_north" dst['lat'][:] = ny_grid # time dst.createVariable('time', 'f4', ('time',)) dst['time'].long_name = 'Time' dst['time'].standard_name = 'time' date = pivot_time(self.start_date) dst['time'].units = f'days since {date.year}-{date.month}'\ f'-{date.day} 00:00'\ f'{date.tzinfo}' dst['time'].base_date = (date.year, date.month, date.day, 0) dst['time'][:] = inventory.get_sflux_timevector() for var in AirComponent.var_types: dst.createVariable( var, 'f4', ('time', 'ny_grid', 'nx_grid') ) logger.info(f'Put field {var}') inventory.put_sflux_field(getattr(self, f'{var}_name'), dst, var) # prmsl dst['prmsl'].long_name = "Pressure reduced to MSL" dst['prmsl'].standard_name = "air_pressure_at_sea_level" dst['prmsl'].units = "Pa" # spfh dst['spfh'].long_name = "Surface Specific Humidity "\ "(2m AGL)" dst['spfh'].standard_name = "specific_humidity" dst['spfh'].units = "1" # stmp dst['stmp'].long_name = "Surface Air Temperature (2m AGL)" dst['stmp'].standard_name = "air_temperature" dst['stmp'].units = "K" # uwind dst['uwind'].long_name = "Surface Eastward Air Velocity "\ "(10m AGL)" dst['uwind'].standard_name = "eastward_wind" dst['uwind'].units = "m/s" # vwind dst['vwind'].long_name = "Surface Northward Air Velocity "\ "(10m AGL)" dst['vwind'].standard_name = "northward_wind" dst['vwind'].units = "m/s" if prc is True: with Dataset( self.tmpdir / f"prc_{inventory.product}_" f"{str(self.start_date)}.nc", 'w', format='NETCDF3_CLASSIC' ) as dst: # global attributes dst.setncatts({"Conventions": "CF-1.0"}) # dimensions dst.createDimension('nx_grid', nx_grid.shape[1]) dst.createDimension('ny_grid', ny_grid.shape[0]) dst.createDimension('time', None) # lon dst.createVariable('lon', 'f4', ('ny_grid', 'nx_grid')) dst['lon'].long_name = "Longitude" dst['lon'].standard_name = "longitude" dst['lon'].units = "degrees_east" dst['lon'][:] = nx_grid # lat dst.createVariable('lat', 'f4', ('ny_grid', 'nx_grid')) dst['lat'].long_name = "Latitude" dst['lat'].standard_name = "latitude" dst['lat'].units = "degrees_north" dst['lat'][:] = ny_grid # time dst.createVariable('time', 'f4', ('time',)) dst['time'].long_name = 'Time' dst['time'].standard_name = 'time' date = pivot_time(self.start_date) dst['time'].units = f'days since {date.year}-{date.month}'\ f'-{date.day} 00:00'\ f'{date.tzinfo}' dst['time'].base_date = (date.year, date.month, date.day, 0) dst['time'][:] = inventory.get_sflux_timevector() for var in PrcComponent.var_types: dst.createVariable(var, float, ('time', 'ny_grid', 'nx_grid')) logger.info(f'Put field {var}') inventory.put_sflux_field(getattr(self, f'{var}_name'), dst, var) # prate dst['prate'].long_name = "Surface Precipitation Rate" dst['prate'].standard_name = "air_pressure_at_sea_level" dst['prate'].units = "kg/m^2/s" if rad is True: with Dataset( self.tmpdir / f"rad_{inventory.product}_" f"{str(self.start_date)}.nc", 'w', format='NETCDF3_CLASSIC' ) as dst: # global attributes dst.setncatts({"Conventions": "CF-1.0"}) # dimensions dst.createDimension('nx_grid', nx_grid.shape[1]) dst.createDimension('ny_grid', ny_grid.shape[0]) dst.createDimension('time', None) # lon dst.createVariable('lon', 'f4', ('ny_grid', 'nx_grid')) dst['lon'].long_name = "Longitude" dst['lon'].standard_name = "longitude" dst['lon'].units = "degrees_east" dst['lon'][:] = nx_grid # lat dst.createVariable('lat', 'f4', ('ny_grid', 'nx_grid')) dst['lat'].long_name = "Latitude" dst['lat'].standard_name = "latitude" dst['lat'].units = "degrees_north" dst['lat'][:] = ny_grid # time dst.createVariable('time', 'f4', ('time',)) dst['time'].long_name = 'Time' dst['time'].standard_name = 'time' date = pivot_time(self.start_date) dst['time'].units = f'days since {date.year}-{date.month}'\ f'-{date.day} 00:00'\ f'{date.tzinfo}' dst['time'].base_date = (date.year, date.month, date.day, 0) dst['time'][:] = inventory.get_sflux_timevector() for var in RadComponent.var_types: dst.createVariable(var, float, ('time', 'ny_grid', 'nx_grid')) logger.info(f'Put field {var}') inventory.put_sflux_field(getattr(self, f'{var}_name'), dst, var) # dlwrf dst['dlwrf'].long_name = "Downward Long Wave Radiation "\ "Flux" dst['dlwrf'].standard_name = "surface_downwelling_"\ "longwave_flux_in_air" dst['dlwrf'].units = "W/m^2" # dswrf dst['dswrf'].long_name = "Downward Short Wave Radiation "\ "Flux" dst['dswrf'].standard_name = "surface_downwelling_"\ "shortwave_flux_in_air" dst['dswrf'].units = "W/m^2" self.resource = self.tmpdir self.air = AirComponent(self.fields) self.prc = PrcComponent(self.fields) self.rad = RadComponent(self.fields)
def get_sflux_timevector(self): timevec = list(self._files.keys()) _pivot_time = pivot_time(np.min(timevec)) return [(localize_datetime(x) - _pivot_time) / timedelta(days=1) for x in timevec]
def write(self, outdir: Union[str, os.PathLike], level: int, overwrite: bool = False, start_date: datetime = None, rnday: Union[float, int, timedelta] = None): assert level in [1, 2] outdir = pathlib.Path(outdir) if start_date is None: for vartype in self.var_types: variable = getattr(self, vartype) if start_date is None: start_date = np.min(variable.datetime_array) if start_date is not None: # naive condition if start_date.tzinfo is None \ or start_date.tzinfo.utcoffset(start_date) is None: start_date = pytz.timezone('UTC').localize(start_date) timezone = start_date.tzinfo stacks = [] for i, field in enumerate( getattr( self, self.var_types[0] ).get_fields(start_date, rnday) ): stacks.append(f"sflux_{self.name}_{level}.{i+1:04d}.nc") for i, filename in enumerate(stacks): with Dataset(outdir / filename, 'w', format='NETCDF3_CLASSIC') as dst: dst.setncatts({"Conventions": "CF-1.0"}) # dimensions variable = getattr(self, self.var_types[0]) dst.createDimension('nx_grid', variable.nx_grids[0].shape[1]) dst.createDimension('ny_grid', variable.ny_grids[0].shape[0]) dst.createDimension('time', None) # variables # lon dst.createVariable('lon', 'f4', ('ny_grid', 'nx_grid')) dst['lon'].long_name = "Longitude" dst['lon'].standard_name = "longitude" dst['lon'].units = "degrees_east" dst['lon'][:] = variable.nx_grids[0] # lat dst.createVariable('lat', 'f4', ('ny_grid', 'nx_grid')) dst['lat'].long_name = "Latitude" dst['lat'].standard_name = "latitude" dst['lat'].units = "degrees_north" dst['lat'][:] = variable.ny_grids[0] nc_start_date = list(variable.reference_datetimes)[0] nc_start_date = nc_start_date.astimezone(timezone) dst.createVariable('time', 'f4', ('time',)) dst['time'].long_name = 'Time' dst['time'].standard_name = 'time' dst['time'].units = f'days since {nc_start_date.year}-' \ f'{nc_start_date.month}-'\ f'{nc_start_date.day} '\ '00:00:00+' \ f'{nc_start_date.tzinfo}' dst['time'].base_date = ( nc_start_date.year, nc_start_date.month, nc_start_date.day, 0) dst['time'][:] = [ (localize_datetime(x) - nc_start_date) / timedelta(days=1) for x in variable.datetime_array] for vartype in self.var_types: variable = getattr(self, vartype) dst.createVariable( variable.name, 'f4', ('time', 'ny_grid', 'nx_grid')) for field in variable.get_fields(start_date, rnday): dst[variable.name][:] = field setattr( dst[variable.name], "long_name", variable.long_name) setattr( dst[variable.name], "standard_name", variable.standard_name) setattr( dst[variable.name], "units", variable.units)