def dataWithoutGaps(self, messages, fill_method, missing, grib_indexes, grid_shape_2D, grid_mask, debug=False): data_records = [] first_msg = messages[0] prev_grid = reshapeGrid(first_msg, missing, grib_indexes, grid_shape_2D, grid_mask) prev_time = asUTCTime(first_msg.validDate) prev_record = ('ndfd', prev_time, prev_grid) if debug: stats = (N.nanmin(prev_grid), N.nanmax(prev_grid)) print 'value range :', prev_time, stats open_gap = False for msg in messages[1:]: grid = reshapeGrid(msg, missing, grib_indexes, grid_shape_2D, grid_mask) this_time = asUTCTime(msg.validDate) this_record = ('ndfd', this_time, grid) if debug: stats = (N.nanmin(grid), N.nanmax(grid)) print 'value stats :', msg.validDate, stats gap = hoursInTimespan(prev_time, this_time, inclusive=False) if gap > 1: records = \ self.fillTimeGap(prev_record, this_record, fill_method) # check whether previous record was replaced # if the list comprehension is empty, it needs to be added if not [rec for rec in records if rec[1] == prev_record[1]]: data_records.append(prev_record) # add the gap records data_records.extend(records) # there is an open gap open_gap = True else: # no gap, add previous record data_records.append(prev_record) open_gap = False prev_record = this_record prev_time = this_time if not open_gap: # at this point prev_record == last this_record data_records.append(prev_record) if debug: msg = messages[-1] grid = reshapeGrid(msg, missing, grib_indexes, grid_shape_2D, grid_mask) print '\nlast msg :', msg.validDate, N.nanmin(grid), N.nanmax(grid) return data_records
def dataWithoutGaps(self, messages, varconfig, grib_indexes, grid_shape_2D, grid_mask, debug=False): data_records = [] first_msg = messages[0] missing = float(first_msg.missingValue) prev_grid = reshapeGrid(first_msg, missing, grib_indexes, grid_shape_2D, grid_mask) prev_time = asUTCTime(first_msg.validDate) prev_record = ('ndfd', prev_time, prev_grid) if debug: print 'processing %d grib messages :' % len(messages) stats = (N.nanmin(prev_grid), N.nanmax(prev_grid)) print ' stats :', first_msg.validDate, stats open_gap = False for msg in messages[1:]: grid = reshapeGrid(msg, missing, grib_indexes, grid_shape_2D, grid_mask) this_time = asUTCTime(msg.validDate) this_record = ('ndfd', this_time, grid) if debug: stats = (N.nanmin(grid), N.nanmax(grid)) print ' stats :', msg.validDate, stats gap = hoursInTimespan(prev_time, this_time, inclusive=False) if gap > 1: records = \ self.fillTimeGap(prev_record, this_record, varconfig) # add the gap records data_records.extend(records) else: # no gap, add previous record data_records.append(prev_record) prev_record = this_record prev_time = this_time # at this point prev_record == last this_record records = self.fillTimeGap(prev_record, None, varconfig) if len(records) > 0: data_records.extend(records) else: data_records.append(prev_record) return data_records
def threatWeatherEndTime(self, threat_key, target_date, period_key): """ This routine determines the date/time that input data must end in order to obtain data for the target_date. Arguments: threat_key: short name for threat/disease configuration target_date: datetime.date object for the last date desired in threat risk results. period_key: key string for accessing period configuration Turf threat/disease models require data for a "standard" 24 hour day that begins on the morning of the previous day. e.g. a model run for target_date == 3/2/2018 would begin at 8 AM on 3/1/2018 and end at 7 AM on 3/2/2018. Returns: datetime.datetime object with the UTC time of the last hour that data must contain in order to return results for the target_date. NOTE: this will always be an hour of the target_date """ day_ends = self.threatDayEnds(threat_key) end_time = datetime.datetime.combine(target_date + ONE_DAY, day_ends) end_time = tzutils.asUTCTime(end_time, self.project.local_timezone) return end_time
def commonObsEnd(self, threat_key, date): weather = self.threatWeather(threat_key).attrs last_obs = tzutils.asUTCTime(datetime.datetime(date.year, 12, 31, 23)) factory = self.smartWeatherReader() for weather_key, variables in weather.items(): reader = factory.weatherFileReader(weather_key, date.year, date.month, 'NE') for variable in variables: var_end = reader.timeAttribute(variable, 'last_obs_time') if var_end is None: return None last_obs = min(last_obs, var_end) return last_obs
def commonFcastEnd(self, threat_key, date): weather = self.threatWeather(threat_key).attrs fcast_end = tzutils.asUTCTime(datetime.datetime(date.year, 12, 31, 23)) factory = self.smartWeatherReader() for weather_key, variables in weather.items(): reader = factory.weatherFileReader(weather_key, date.year, date.month, 'NE') for variable in variables: var_end = reader.timeAttribute(variable, 'fcast_end_time') if var_end is None: if variable.lower() == 'pcpn': continue return None fcast_end = min(fcast_end, var_end) return fcast_end
def gridForRegion(self, fcast_date, variable, timespan, grid_region, grid_source, fill_gaps=False, graceful_fail=False, debug=False): """ Returns a 3D NumPy grid containing data at all nodes in the grid region for all messages in file. Shape of returned grid is [num_hours, num_lons, num_lats] Assumes file contains a range of time periods for a single variable. """ self.openGribfile(fcast_date, variable, timespan) # retrieve pointers to all messages in the file messages = self.gribs.select() first_msg = messages[0] first_hour = first_msg.validDate missing = float(first_msg.missingValue) units = first_msg.units if debug: print '\nfirst message :' print ' anal date :', first_msg.analDate print 'forecast hour :', first_msg.forecastTime print 'forecast date :', first_hour print 'missing value :', missing print ' units :', units print '\n 2nd message :' print ' anal date :', messages[1].analDate print 'forecast hour :', messages[1].forecastTime print 'forecast date :', messages[1].validDate print '\nlast message :', len(messages) print ' anal date :', messages[-1].analDate print 'forecast hour :', messages[-1].forecastTime print 'forecast date :', messages[-1].validDate num_hours = hoursInTimespan(messages[-1].validDate, first_hour, True) if debug: print ' time span :', num_hours print '\n' # parameters for reshaping the grib arrays grid_shape_2D, grib_indexes, grid_mask = \ self.gribToGridParameters(grid_source, grid_region) grid = N.empty((num_hours, ) + grid_shape_2D, dtype=float) grid.fill(N.nan) times = [] prev_time = first_hour prev_index = None for msg in messages: values = reshapeGrid(msg, missing_value, grib_indexes, grid_shape_2D, grid_mask) next_time = msg.validDate next_index = hoursInTimespan(next_time, first_hour, inclusive=False) grid[next_index, :, :] = values next_record = ('ndfd', next_time, values) if fill_gaps and prev_index: fill = self.variableConfig(variable, timespan).fill_gaps_with gap_info = self.fillTimeGap(prev_record, next_record, fill) for src, fcast_time, values in gap_info: index = hoursInTimespan(fcast_time, prev_time, False) grid[index, :, :] = values if debug: print ' gap date :', index, fcast_time if debug: print 'forecast date :', next_index, next_time prev_record = next_record prev_index = next_index gribs.close() return asUTCTime(first_hour), units, grid
if not os.path.exists(filepath): grid_factory.buildForecastGridFile(update_start, grid_dataset, region=grid_region) update_end = end_date # make sure that update_end file exists when the update spans more than one month if update_end.month > update_start.month: filepath = grid_factory.ndfdGridFilepath(update_end, grid_dataset, grid_region) if not os.path.exists(filepath): grid_factory.buildForecastGridFile(update_end, grid_dataset, region=grid_region) min_fcast_time = tzutils.asUTCTime(datetime.datetime(2099, 12, 31, 23)) max_fcast_time = tzutils.asUTCTime(datetime.datetime(1900, 1, 1, 0)) manager = None prev_date_filepath = None if debug: print 'Processing %s files for :' % grib_variable print ' update start :', update_start print ' update end :', update_end # filter annoying numpy warnings warnings.filterwarnings('ignore', "All-NaN axis encountered") warnings.filterwarnings('ignore', "All-NaN slice encountered") warnings.filterwarnings('ignore', "invalid value encountered in greater") warnings.filterwarnings('ignore', "invalid value encountered in less")
weather_key = 'wetness' # default to current year & month year = now.year month = current_month = now.month num_time_args = len(args) if num_time_args == 0: # no time args, discover them from times in the files start_time = None end_time = None elif num_time_args == 1: month = int(args[0]) start_time = tzutils.asUTCTime(datetime.datetime(year, month, 1, 0)) last_day = lastDayOfMonth(year, month) end_time = tzutils.asUTCTime(datetime.datetime(year, month, last_day, 23)) elif num_time_args == 2: arg_0 = int(args[0]) if arg_0 > 99: year = arg_0 month = int(args[1]) start = tzutils.asUTCTime(datetime.datetime(year, month, 1, 0)) last_day = lastDayOfMonth(year, month) end_time = tzutils.asUTCTime( datetime.datetime(year, month, last_day, 23)) elif 'h' in args[1]: hour = int(args[1].replace('h', '')) start_time = end_time = tzutils.asUTCTime(
target_hour = options.target_hour use_time_in_path = options.use_time_in_path utc_file = options.utc_file verbose = options.verbose or debug if max_hours is not None: max_hours = datetime.timedelta(hours=max_hours) reanalysis = 'rtma' file_variable = args[0].upper() now = datetime.datetime.now() num_date_args = len(args) - 1 if num_date_args == 1: repair_time = asUTCTime( datetime.datetime(now.year, now.month, now.day, int(args[1]))) elif num_date_args == 2: repair_time = asUTCTime( datetime.datetime(now.year, now.month, int(args[1]), int(args[2]))) elif num_date_args == 3: repair_time = asUTCTime( datetime.datetime(now.year, int(args[1]), int(args[2]), int(args[3]))) else: repair_time = asUTCTime( datetime.datetime(int(args[1]), int(args[2]), int(args[3]), int(args[4]))) # create a factory for access to grid files factory = ReanalysisDownloadFactory(reanalysis, grib_source, grib_server) if dev_mode: factory.useDirpathsForMode('dev') factory._initStaticResources_()
def endTimeForDate(factory, threat_key, date): day_ends = datetime.datetime.combine(date + ONE_DAY, factory.threatDayEnds(threat_key)) return tzutils.asUTCTime(day_ends, 'US/Eastern')
def maxReanalysisEndTime(factory, threat_key): now_utc = tzutils.asUTCTime(datetime.datetime.now(), 'US/Eastern') day_ends = endTimeForDate(factory, threat_key, YESTERDAY) if now_utc >= day_ends: return day_ends return day_ends - HOURS_IN_DAY
grib_region = options.grib_region grib_server = options.grib_server grib_source = options.grib_source max_backward = options.max_backward num_hours = options.num_hours only_missing = options.only_missing utc_date = options.utc_date verbose = options.verbose or debug reanalysis = 'rtma' reanalysis_name = 'RTMA' if len(args) > 0: date_args = tuple([int(n) for n in args]) if utc_date: # input date is already UTC corrected end_hour = asUTCTime(datetime.datetime(*date_args)) else: end_hour = asUTCTime(datetime.datetime(*date_args), 'US/Eastern') start_hour = end_hour - datetime.timedelta(hours=num_hours - 1) else: now = datetime.datetime.now() end_hour = asUTCTime( datetime.datetime.combine(now.date(), datetime.time(now.hour)), 'US/Eastern') start_hour = None # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # def utcTimes(utc_time): return {
def dataForRegion(self, fcast_date, variable, timespan, grib_region, grid_region, grid_source, fill_gaps=False, graceful_fail=False, debug=False): """ Returns a sequence containing (relative hour, full forecast date, numpy array) tuples for each message in the grib files for each timezone in the list. Arrays are cleaned so that both masked and missing values are set to N.nan. The shape of each returned array is the same (num_lons x num_lats) Assumes file contains a range of times for a single variable. """ data_records = [] # parameters for reshaping the grib arrays grid_shape_2D, grib_indexes, grid_mask = \ self.gribToGridParameters(grid_source, grid_region) # check whether varible supports filling gaps between records varconfig = self.variableConfig(variable, timespan) fill_method = varconfig.get('fill_method', None) # code for filling gaps between records if fill_gaps and fill_method is not None: prev_record = None if debug: info = (timespan, variable, str(fcast_date)) print '\nReading %s %s grib file for %s' % info # retrieve pointers to all messages in the file self.openGribFile(fcast_date, variable, timespan, grib_region) messages = self.gribs.select() first_msg = messages[0] missing = float(first_msg.missingValue) units = first_msg.units # fill the gap between this timespan and the previous one if not prev_record is None: grid = reshapeGrid(first_msg, missing, grib_indexes, grid_shape_2D, grid_mask) next_record = ('ndfd', asUTCTime(first_msg.validDate), grid) data_records.extend( self.fillTimeGap(prev_record, next_record, varconfig)) # update with records for the current timespan data = self.dataWithoutGaps(messages, varconfig, grib_indexes, grid_shape_2D, grid_mask, debug) data_records.extend(data) # track last record in previous timespan msg = messages[-1] grid = reshapeGrid(msg, missing, grib_indexes, grid_shape_2D, grid_mask) prev_record = ('ndfd', asUTCTime(msg.validDate), grid) self.closeGribfile() # code that preserves gaps between records else: if debug: info = (variable, timespan, str(fcast_date)) print '\nReading %s %s grib file for %s' % info # retrieve pointers to all messages in the file self.openGribFile(fcast_date, variable, timespan, grib_region) messages = self.gribs.select() first_msg = messages[0] missing = float(first_msg.missingValue) units = first_msg.units for msg in messages: grid = reshapeGrid(msg, missing, grib_indexes, grid_shape_2D, grid_mask) this_time = asUTCTime(msg.validDate) data_records.append(('ndfd', this_time, grid)) if debug: stats = (N.nanmin(grid), N.nanmax(grid)) print 'value stats :', msg.validDate, stats self.closeGribfile() return units, data_records
def dataForRegion(self, fcast_date, variable, timespans, grid_region, grid_source, fill_gaps=False, graceful_fail=False, debug=False): """ Returns a sequence containing (relative hour, full forecast date, numpy array) tuples for each message in the grib files for each timezone in the list. Arrays are cleaned so that both masked and missing values are set to N.nan. The shape of each returned array is the same (num_lons x num_lats) Assumes file contains a range of times for a single variable. """ data_records = [] if isinstance(timespans, basestring): timespans = (timespans, ) elif not isinstance(timespans, (tuple, list)): errmsg = '"%s" is an invalid type for timespans argument.' errmsg += '\nArgument type must be one of string, list, tuple.' raise TypeError, errmsg % type(timespan) for timespan in timespans: if timespan not in VALID_TIMESPANS: raise ValueError, BAD_TIMESPAN % timespan # parameters for reshaping the grib arrays grid_shape_2D, grib_indexes, grid_mask = \ self.gribToGridParameters(grid_source, grid_region) # code for filling gaps between records if fill_gaps: prev_record = None fill_method = \ self.variableConfig(variable, timespan).fill_gaps_with for timespan in timespans: self.openGribfile(fcast_date, variable, timespan) # retrieve pointers to all messages in the file messages = self.gribs.select() first_msg = messages[0] missing = float(first_msg.missingValue) units = first_msg.units # fill the gap between this timespan and the previous one if not prev_record is None: grid = reshapeGrid(first_msg, missing, grib_indexes, grid_shape_2D, grid_mask) next_record = ('ndfd', asUTCTime(first_msg.validDate), grid) data_records.extend( self.fillTimeGap(prev_record, next_record, fill_method)) # update with records for the current timespan data = self.dataWithoutGaps(messages, fill_method, missing, grib_indexes, grid_shape_2D, grid_mask, debug) data_records.extend(data) # track last record in previous timespan msg = messages[-1] grid = reshapeGrid(msg, missing, grib_indexes, grid_shape_2D, grid_mask) prev_record = ('ndfd', asUTCTime(msg.validDate), grid) self.closeGribfile() # code that preserves gaps between records else: for timespan in timespans: self.openGribfile(fcast_date, variable, timespan) # retrieve pointers to all messages in the file messages = self.gribs.select() first_msg = messages[0] missing = float(first_msg.missingValue) units = first_msg.units for msg in messages: grid = reshapeGrid(msg, missing, grib_indexes, grid_shape_2D, grid_mask) this_time = asUTCTime(msg.validDate) data_records.append(('ndfd', this_time, grid)) if debug: stats = (N.nanmin(grid), N.nanmax(grid)) print 'value stats :', msg.validDate, stats self.closeGribfile() return units, data_records
def startTimeForDate(factory, threat_key, date): day_starts = datetime.datetime.combine( date, factory.threatDayEnds(threat_key)) + ONE_HOUR return tzutils.asUTCTime(day_starts, 'US/Eastern')
# default to current year & month year = now.year month = current_month = now.month last_day = datetime.date(year, month, lastDayOfMonth(year, month)) last_day_end = tzutils.asUtcTime(datetime.datetime.combine(last_day, datetime.time(hour=23))) num_time_args = len(args) if num_time_args == 0 : # no time args, discover them from times in the files start_time = None end_time = None elif num_time_args == 1: month = int(args[0]) start_time = tzutils.asUTCTime(datetime.datetime(year, month, 1, 0)) end_time = tzutils.asUTCTime(datetime.datetime(year, month, last_day, 23)) elif num_time_args == 2: arg_0 = int(args[0]) if arg_0 > 99: year = arg_0 month = int(args[1]) start = tzutils.asUTCTime(datetime.datetime(year, month, 1, 0)) end_time = tzutils.asUTCTime(datetime.datetime(year, month, last_day, 23)) else: start_time = tzutils.asUTCTime(datetime.datetime(year, month, arg_0, int(args[1]))) end_time = None elif num_time_args == 4: arg_0 = int(args[0])
grib_server = options.grib_server grib_source = options.grib_source max_backward = options.max_backward num_hours = options.num_hours only_missing = options.only_missing utc_date = options.utc_date verbose = options.verbose or debug reanalysis = 'rtma' factory = ReanalysisDownloadFactory(reanalysis, grib_source, grib_server) if dev_mode: factory.useDirpathsForMode('dev') if len(args) > 0: date_args = tuple([int(n) for n in args]) if utc_date: # input date is already UTC corrected end_hour = asUTCTime(datetime.datetime(*date_args)) else: end_hour = asUTCTime(datetime.datetime(*date_args), 'US/Eastern') start_hour = end_hour - datetime.timedelta(hours=num_hours - 1) else: end_hour = asUTCTime(datetime.datetime.now(), 'US/Eastern') start_hour = None # start with data file downloads data_count = 0 data_files = [] if download_data: if start_hour is None: download_hours = determineTimespan(factory, end_hour, 'DATA', grib_region, max_backward, debug)