def __call__(self, model_driver, nramp_ss: bool = False, dramp_ss=None, nprocs=-1): """Initializes the NWM data. Used by :class:`pyschism.driver.ModelDriver` Will pick the "best" data source based on start date and rnday. The are fringe cases not yet covered, for example when the data spans more than 1 data source. """ super().__init__(model_driver.param.opt.start_date, model_driver.param.core.rnday) logger.info('Initializing NationalWaterModel data.') pairings = NWMElementPairings(model_driver.model_domain.hgrid) # start_date = model_driver.param.opt.start_date # rnday = model_driver.param.core.rnday # forecast if self.start_date >= pivot_time() - timedelta(days=30): logger.info('Fetching NWM data.') # (self._pairings, h0, nprocs) inventory = AWSDataInventory(start_date=self.start_date, rnday=self.rnday, product='medium_range_mem1', verbose=False) logger.info('Launching streamflow lookup...') source_indexes, sinks_indexes = inventory.get_nc_pairing_indexes( pairings) start = time() with Pool(processes=cpu_count()) as pool: sources = pool.starmap(streamflow_lookup, [(file, source_indexes) for file in inventory.files]) sinks = pool.starmap(streamflow_lookup, [(file, sinks_indexes) for file in inventory.files]) pool.join() logger.info(f'streamflow lookup took {time()-start}...') # Pass NWM data to Hydrology class. logging.info('Generating per-element hydrologic timeseries...') start = time() hydro = Hydrology(self.start_date, self.rnday) for i, file in enumerate(inventory.files): nc = Dataset(file) _time = localize_datetime( datetime.strptime(nc.model_output_valid_time, "%Y-%m-%d_%H:%M:%S")) for j, element_id in enumerate(pairings.sources.keys()): hydro.add_data(_time, element_id, sources[i][j], -9999, 0.) for k, element_id in enumerate(pairings.sinks.keys()): hydro.add_data(_time, element_id, -sinks[i][k]) logging.info('Generating per-element hydrologic timeseries took ' f'{time() - start}.') # hindcast else: raise NotImplementedError('Hindcast is not implemented 30 days.') # aggregate timeseries if self.aggregation_radius is not None: aggregation_radius = float(self.aggregation_radius) logging.info('Aggregating hydrology timeseries/elements using a ' f'radius of {aggregation_radius} meters.') start = time() hydro.aggregate_by_radius(model_driver.model_domain.hgrid, aggregation_radius) logging.info(f'Aggregating NWM elements took {time() - start}.') # turn 'on' the source/sink system in SCHISM. model_driver.param.opt.if_source = 1 # set the ramps if applicable, no ramp by default. if int(nramp_ss) != 0: # nramp_ss = 1 # needed if if_source=1; ramp-up flag for # source/sinks model_driver.param.opt.nramp_ss = nramp_ss # dramp_ss = 2 # needed if if_source=1; ramp-up period in days if dramp_ss is not None: model_driver.param.opt.dramp_ss = dramp_ss
def _fetch_data(self): # this needs to be checked if there is no "zero" alignment requested_time = pivot_time(self.start_date) # This download here could be more robust. Right now it tries the # "nowcast" first and since it is expected to fail (NWM is 6 hours # offset from nowcast) then immediately downloads the previous nowcast. # This has been kept like this as a reminder that the "nowcast" never # exists for NWM, but this is not true for other models with shorter # lags (for example GFS). nwm_time = requested_time.strftime('%Y%m%d') res = self.s3.list_objects_v2(Bucket=self.bucket, Delimiter='/', Prefix=f'nwm.{nwm_time}/{self.product}/') if 'Contents' in res: # contents will be empty when t00z is called. data = list( reversed( sorted([ data['Key'] for data in res['Contents'] if 'channel' in data['Key'] ]))) else: data = [] # In reality the previous will always fail, so we need to get the pivot # time nwm_time = (requested_time - timedelta(days=1)).strftime('%Y%m%d') res = self.s3.list_objects_v2(Bucket=self.bucket, Delimiter='/', Prefix=f'nwm.{nwm_time}/{self.product}/') data.extend( list( reversed( sorted([ data['Key'] for data in res['Contents'] if 'channel' in data['Key'] ])))) nearest_cycle = int(6 * np.floor(requested_time.hour / 6)) previous_cycle = (nearest_cycle - 6) % 24 if f't{nearest_cycle:02d}z' not in data[0] \ and f't{previous_cycle:02d}z' in data[0]: if self.fallback is True: warnings.warn( f'NWM data for cycle t{nearest_cycle:02d}z is not yet ' 'on the server, defaulting to previous cycle.') else: raise IOError('Unknown error while fetching NWM data.') for d in data: base_date_str = f'{d.split("/")[0].split(".")[-1]}' timedelta_str = d.split('channel_rt_1.')[-1].split('.')[0].strip( 'f') file_datetime = datetime.strptime(base_date_str, '%Y%m%d') \ + timedelta(hours=int(d.split('.')[2].strip('tz'))) \ + timedelta(hours=float(timedelta_str)) if file_datetime in self._files: file = self._files[file_datetime] if file is None: filename = pathlib.Path(self.tmpdir.name) / d filename.parent.mkdir(parents=True, exist_ok=True) with open(filename, 'wb') as f: logger.info(f'Downloading file {d}.') self.s3.download_fileobj(self.bucket, d, f) self._files[file_datetime] = filename for dt, data in self._files.items(): if data is None: raise IOError(f'No NWM data for time {str(dt)}.')
def pivot_time(self): if not hasattr(self, '_pivot_time'): self._pivot_time = pivot_time() return self._pivot_time
def fetch_data( self, start_date: datetime = None, rnday: Union[float, timedelta] = 4, air: bool = True, prc: bool = True, rad: bool = True, bbox: Bbox = None, ): """Fetches HRRR data from NOMADS server. """ logger.info('Fetching HRRR data.') self.start_date = nearest_cycle_date() if start_date is None else \ localize_datetime(start_date).astimezone(pytz.utc) self.rnday = rnday if isinstance(rnday, timedelta) else \ timedelta(days=rnday) inventory = HRRRInventory( self.start_date, self.rnday + self.output_interval, bbox ) nx_grid, ny_grid = inventory.xy_grid() if air is True: with Dataset( self.tmpdir / f"air_{inventory.product}_" f"{str(self.start_date)}.nc", 'w', format='NETCDF3_CLASSIC' ) as dst: # global attributes dst.setncatts({"Conventions": "CF-1.0"}) # dimensions dst.createDimension('nx_grid', nx_grid.shape[1]) dst.createDimension('ny_grid', ny_grid.shape[0]) dst.createDimension('time', None) # variables # lon dst.createVariable('lon', 'f4', ('ny_grid', 'nx_grid')) dst['lon'].long_name = "Longitude" dst['lon'].standard_name = "longitude" dst['lon'].units = "degrees_east" dst['lon'][:] = nx_grid # lat dst.createVariable('lat', 'f4', ('ny_grid', 'nx_grid')) dst['lat'].long_name = "Latitude" dst['lat'].standard_name = "latitude" dst['lat'].units = "degrees_north" dst['lat'][:] = ny_grid # time dst.createVariable('time', 'f4', ('time',)) dst['time'].long_name = 'Time' dst['time'].standard_name = 'time' date = pivot_time(self.start_date) dst['time'].units = f'days since {date.year}-{date.month}'\ f'-{date.day} 00:00'\ f'{date.tzinfo}' dst['time'].base_date = (date.year, date.month, date.day, 0) dst['time'][:] = inventory.get_sflux_timevector() for var in AirComponent.var_types: dst.createVariable( var, 'f4', ('time', 'ny_grid', 'nx_grid') ) logger.info(f'Put field {var}') inventory.put_sflux_field(getattr(self, f'{var}_name'), dst, var) # prmsl dst['prmsl'].long_name = "Pressure reduced to MSL" dst['prmsl'].standard_name = "air_pressure_at_sea_level" dst['prmsl'].units = "Pa" # spfh dst['spfh'].long_name = "Surface Specific Humidity "\ "(2m AGL)" dst['spfh'].standard_name = "specific_humidity" dst['spfh'].units = "1" # stmp dst['stmp'].long_name = "Surface Air Temperature (2m AGL)" dst['stmp'].standard_name = "air_temperature" dst['stmp'].units = "K" # uwind dst['uwind'].long_name = "Surface Eastward Air Velocity "\ "(10m AGL)" dst['uwind'].standard_name = "eastward_wind" dst['uwind'].units = "m/s" # vwind dst['vwind'].long_name = "Surface Northward Air Velocity "\ "(10m AGL)" dst['vwind'].standard_name = "northward_wind" dst['vwind'].units = "m/s" if prc is True: with Dataset( self.tmpdir / f"prc_{inventory.product}_" f"{str(self.start_date)}.nc", 'w', format='NETCDF3_CLASSIC' ) as dst: # global attributes dst.setncatts({"Conventions": "CF-1.0"}) # dimensions dst.createDimension('nx_grid', nx_grid.shape[1]) dst.createDimension('ny_grid', ny_grid.shape[0]) dst.createDimension('time', None) # lon dst.createVariable('lon', 'f4', ('ny_grid', 'nx_grid')) dst['lon'].long_name = "Longitude" dst['lon'].standard_name = "longitude" dst['lon'].units = "degrees_east" dst['lon'][:] = nx_grid # lat dst.createVariable('lat', 'f4', ('ny_grid', 'nx_grid')) dst['lat'].long_name = "Latitude" dst['lat'].standard_name = "latitude" dst['lat'].units = "degrees_north" dst['lat'][:] = ny_grid # time dst.createVariable('time', 'f4', ('time',)) dst['time'].long_name = 'Time' dst['time'].standard_name = 'time' date = pivot_time(self.start_date) dst['time'].units = f'days since {date.year}-{date.month}'\ f'-{date.day} 00:00'\ f'{date.tzinfo}' dst['time'].base_date = (date.year, date.month, date.day, 0) dst['time'][:] = inventory.get_sflux_timevector() for var in PrcComponent.var_types: dst.createVariable(var, float, ('time', 'ny_grid', 'nx_grid')) logger.info(f'Put field {var}') inventory.put_sflux_field(getattr(self, f'{var}_name'), dst, var) # prate dst['prate'].long_name = "Surface Precipitation Rate" dst['prate'].standard_name = "air_pressure_at_sea_level" dst['prate'].units = "kg/m^2/s" if rad is True: with Dataset( self.tmpdir / f"rad_{inventory.product}_" f"{str(self.start_date)}.nc", 'w', format='NETCDF3_CLASSIC' ) as dst: # global attributes dst.setncatts({"Conventions": "CF-1.0"}) # dimensions dst.createDimension('nx_grid', nx_grid.shape[1]) dst.createDimension('ny_grid', ny_grid.shape[0]) dst.createDimension('time', None) # lon dst.createVariable('lon', 'f4', ('ny_grid', 'nx_grid')) dst['lon'].long_name = "Longitude" dst['lon'].standard_name = "longitude" dst['lon'].units = "degrees_east" dst['lon'][:] = nx_grid # lat dst.createVariable('lat', 'f4', ('ny_grid', 'nx_grid')) dst['lat'].long_name = "Latitude" dst['lat'].standard_name = "latitude" dst['lat'].units = "degrees_north" dst['lat'][:] = ny_grid # time dst.createVariable('time', 'f4', ('time',)) dst['time'].long_name = 'Time' dst['time'].standard_name = 'time' date = pivot_time(self.start_date) dst['time'].units = f'days since {date.year}-{date.month}'\ f'-{date.day} 00:00'\ f'{date.tzinfo}' dst['time'].base_date = (date.year, date.month, date.day, 0) dst['time'][:] = inventory.get_sflux_timevector() for var in RadComponent.var_types: dst.createVariable(var, float, ('time', 'ny_grid', 'nx_grid')) logger.info(f'Put field {var}') inventory.put_sflux_field(getattr(self, f'{var}_name'), dst, var) # dlwrf dst['dlwrf'].long_name = "Downward Long Wave Radiation "\ "Flux" dst['dlwrf'].standard_name = "surface_downwelling_"\ "longwave_flux_in_air" dst['dlwrf'].units = "W/m^2" # dswrf dst['dswrf'].long_name = "Downward Short Wave Radiation "\ "Flux" dst['dswrf'].standard_name = "surface_downwelling_"\ "shortwave_flux_in_air" dst['dswrf'].units = "W/m^2" self.resource = self.tmpdir self.air = AirComponent(self.fields) self.prc = PrcComponent(self.fields) self.rad = RadComponent(self.fields)
def get_sflux_timevector(self): timevec = list(self._files.keys()) _pivot_time = pivot_time(np.min(timevec)) return [(localize_datetime(x) - _pivot_time) / timedelta(days=1) for x in timevec]