def __init__(self, base_url=DEFAULT_URL, outliers=OUTLIERS, precip_trace=0.005, accumulated_precip=N.inf, missing=N.inf, reporter_or_filepath=None, version=DEFAULT_VERSION, debug=False, performance=False, **kwargs): self.base_url = base_url self.outliers = outliers self.precip_trace = precip_trace self.accumulated_precip = accumulated_precip self.missing = missing if isinstance(reporter_or_filepath, Reporter): self.reporter = reporter_or_filepath else: self.reporter = Reporter(self.__class__.__name__, reporter_or_filepath) self.reporter.close() self.version = version self.debug = debug self.performance = performance self.keyword_args = kwargs if version < 2.0: self.ignore_awdn = kwargs.get('ignore_awdn', True) self.default_elements = ('pcpn', 'maxt', 'mint') else: self.default_elements = ( { 'name': 'maxt', 'add': [ 'f', ] }, { 'name': 'mint', 'add': [ 'f', ] }, { 'name': 'pcpn', 'add': [ 'f', ] }, ) self.default_metadata = ('uid', 'll', 'elev') self.element_name_map = ELEMENT_NAME_MAP self.temp_elem_ids = ALL_TEMP_ELEMENTS self.precip_elem_ids = ALL_PRECIP_ELEMENTS self.non_numeric_elem_ids = ALL_NON_NUMERIC_ELEMS self.elem_names = TEMP_ELEM_NAMES + PRECIP_ELEM_NAMES self.vx_ids = TEMP_VX_IDS + PRECIP_VX_IDS + NON_NUMERIC_VX_IDS
def __init__(self, station_manager_class, station_data_filepath, elems=(), metadata=(), base_url=None, file_attrs=(), server_reset_wait_time=30, reporter_or_filepath=None, **request_args): self.station_manager_class = station_manager_class self.station_data_filepath = station_data_filepath if elems: self.elements = self._guaranteeRequiredElements(elems) else: self.elements = self.REQUIRED_ELEMS self.element_ids = [ ] for element in self.elements: elem_id = indexableElementID(element) self.element_ids.append(elem_id) if metadata: self.metadata = tuple( set(self.REQUIRED_METADATA) | set(metadata) ) else: self.metadata = self.REQUIRED_METADATA if base_url is None: self.client = AcisMultiStationDataClient() else: self.client = AcisMultiStationDataClient(base_url) self.file_attrs = file_attrs self.server_reset_wait_time = server_reset_wait_time # create a reporter for perfomance and debug if isinstance(reporter_or_filepath, Reporter): self.reporter = reporter_or_filepath else: self.reporter = Reporter(self.__class__.__name__, reporter_or_filepath) self.reporter.close() self.request_args = request_args self.extension_data = { }
def __init__(self, region_bbox, search_radius, c_parm, vicinity, relative_nodes, node_reach, reporter_or_filepath=None): self.c_parm = c_parm self.node_reach = node_reach self.region_bbox = region_bbox self.relative_nodes = relative_nodes self.search_radius = search_radius self.vicinity = vicinity # create a reporter for perfomance and debug if isinstance(reporter_or_filepath, Reporter): self.reporter = reporter_or_filepath else: self.reporter = Reporter(self.__class__.__name__, reporter_or_filepath) self.reporter.close()
class StationDataFileBuilder(object): """ Retrieves station data for a group of states and writes it to a file. """ REQUIRED_METADATA = ('uid','ll','elev') REQUIRED_ELEMS = () def __init__(self, station_manager_class, station_data_filepath, elems=(), metadata=(), base_url=None, file_attrs=(), server_reset_wait_time=30, reporter_or_filepath=None, **request_args): self.station_manager_class = station_manager_class self.station_data_filepath = station_data_filepath if elems: self.elements = self._guaranteeRequiredElements(elems) else: self.elements = self.REQUIRED_ELEMS self.element_ids = [ ] for element in self.elements: elem_id = indexableElementID(element) self.element_ids.append(elem_id) if metadata: self.metadata = tuple( set(self.REQUIRED_METADATA) | set(metadata) ) else: self.metadata = self.REQUIRED_METADATA if base_url is None: self.client = AcisMultiStationDataClient() else: self.client = AcisMultiStationDataClient(base_url) self.file_attrs = file_attrs self.server_reset_wait_time = server_reset_wait_time # create a reporter for perfomance and debug if isinstance(reporter_or_filepath, Reporter): self.reporter = reporter_or_filepath else: self.reporter = Reporter(self.__class__.__name__, reporter_or_filepath) self.reporter.close() self.request_args = request_args self.extension_data = { } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def __call__(self, date, states, max_attempts=1, debug=False, performance=False): state_error = "attempt %d to retrieve stations for %s failed" self.client.debug = debug reporter = self.reporter if performance: msg = "Attempting to download stations for %d states :" reporter.logInfo(msg % len(states)) start_perf = datetime.now() # for performance reporting num_states = len(states) station_data = None required_meta_key = self.REQUIRED_METADATA[0] attempts = 0 total_stations = 0 total_valid = 0 while len(states) > 0 and attempts < max_attempts: attempts += 1 do_over_states = [ ] for state in states: try: results = self.allStationsInState(state, date, performance) except urllib2.HTTPError as e: if attempts >= max_attempts: raise if e.code >= 400 and e.code < 500: reporter.logError('REQUEST : %s' + state_error % (attempts, state)) elif e.code >= 500: reporter.logError('ACIS SERVER : ' + state_error % (attempts, state)) reporter.logError('HTTP response code = %s' % str(e.code)) # recoverable errors if e.code in (500, 502, 503, 504, 598, 599): do_over_states.append(state) reporter.logInfo('waiting for server to clear ...') time.sleep(self.server_reset_wait_time) else: # no recovery path errmsg = 'Build of station data file failed : %s' reporter.reportError(errmsg % self.station_data_filepath) raise except urllib2.URLError as e: if attempts >= max_attempts: raise reporter.logError('urllib2 : ' + state_error % (attempts, state)) reporter.logException('urllib2.URLError') # these errors are temporary and often recoverable do_over_states.append(state) except Exception as e: reporter.logException(state_error % (attempts, state)) # must assume that unknown exceptions are not recoverable raise else: num_stations, state_data = results total_stations += num_stations if state_data: if station_data is not None: for key in station_data: station_data[key] += state_data[key] total_valid += len(state_data[required_meta_key]) else: station_data = state_data total_stations = num_stations total_valid = len(station_data[required_meta_key]) # reset state list to those that failed states = do_over_states if len(do_over_states) > 0: errmsg = "SERVER ERROR : Unable to download data for %d states : " errmsg += str(tuple(do_over_states)) raise RuntimeError, errmsg % len(do_over_states) if total_stations == 0: errmsg = "No station data available at the time of this run" raise LookupError, errmsg if performance: msg = 'Download %d stations from %d states in' reporter.logPerformance(start_perf, msg % (total_stations,num_states)) start_save = datetime.now() # for performance reporting if 'obs_date' not in self.file_attrs: file_attrs = { 'obs_date':date, } file_attrs.update(self.file_attrs) manager = self.newStationFileManager(station_data['lon'], station_data['lat'], file_attrs) else: manager = self.newStationFileManager(station_data['lon'], station_data['lat'], self.file_attrs) del station_data['lon'] del station_data['lat'] num_datasets = len(station_data.keys()) + 2 self._saveDatasets(date, manager, station_data) manager.closeFile() del station_data del self.extension_data self.extension_data = { } if performance: msg = 'Saved %d datasets of %d observations each in' reporter.logPerformance(start_save, msg % (num_datasets,total_valid)) return total_valid, total_stations # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def allStationsInState(self, state, date, performance): start_state = datetime.now() num_stations = 0 num_valid = 0 ncdc_code = postal2ncdc(state) required = set(self.REQUIRED_METADATA) station_data = { } stations = self.client.allStationsInState(state, self.elements, date, self.metadata, **self.request_args) num_stations = len(stations['data']) if num_stations < 1: if performance: self.statePerfSummary(start_state, state, 0, 0) return 0, station_data # list of data elements returned by ACIS elements = deepcopy(stations['elems']) data_keys = [indexableElementID(element) for element in elements] # only keep stations that have all required metadata usable = [row for row in stations['data'] if (set(row['meta'].keys()) & required) == required] if len(usable) < 1: if performance: self.statePerfSummary(start_state, state, num_stations, 0) return 0, station_data del stations meta_keys = usable[0]['meta'].keys() # merge metadata values and data values into a single list merged = [row['meta'].values() + row['data'] for row in usable] del usable # reorganize merged station data into ordered lists of values # corresponding each metadata and obs data key all_keys = meta_keys + data_keys for indx in range(len(all_keys)): station_data[all_keys[indx]] = [row[indx] for row in merged] del merged # break up multi-component values in station data arrays if 'll' in station_data: station_data['lon'] = [value[0] for value in station_data['ll']] station_data['lat'] = [value[1] for value in station_data['ll']] del station_data['ll'] for element in elements: key = indexableElementID(element) data = station_data[key] # additon data descriptors requested if 'add' in element: # observation time if 't' in element['add']: t_key = OBS_TIME_KEYS.get(key, key+'_obs_time') indx = element['add'].index('t') + 1 station_data[t_key] = [obs[indx] for obs in data] # data "correctness" flag if 'f' in element['add']: f_key = OBS_FLAG_KEYS.get(key, key+'_obs_flag') indx = element['add'].index('f') + 1 flags = [evalObsFlag(obs[0],obs[indx]) for obs in data] station_data[f_key] = flags data = [evalObsValue(key,obs[0],obs[indx]) for obs in data] else: data = [evalObsFlag(key,obs[0],' ') for obs in data] else: data = [evalObsFlag(key,obs[0],' ') for obs in data] if key == 'pcpn': bad_indexes = [indx for indx in range(len(data)) if N.isnan(data[indx])] if bad_indexes: print 'WARNING: Invalid precip values at', str(bad_indexes) sys.stdout.flush() station_data[key] = data # handle change in name of dataset sent by ACIS web services # used to be named 'postal', downstream now expects 'state' if 'state' not in station_data: station_data['state'] = [state for stn in station_data['lon']] if 'ncdc' not in station_data: station_data['ncdc'] = [ncdc_code for stn in station_data['lon']] if 'name' in station_data: station_data['name'] = [name.encode('iso-8859-1') for name in station_data['name']] station_data = self.validStations(state, station_data) num_valid = len(station_data[self.REQUIRED_METADATA[0]]) self._processExtensionDatasets(station_data) if performance: self.statePerfSummary(start_state, state, num_stations, num_valid) return num_stations, station_data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def statePerfSummary(self, elapsed_time, state, num_stations, num_valid): msg = 'downloaded %d stations for %s (%d usable) in' self.reporter.logPerformance(elapsed_time, msg % (num_stations, state, num_valid)) def validStations(self, state, station_data): return station_data def newStationFileManager(self, lons, lats, file_attrs): datasets = ( ('lon', N.array(lons), None), ('lat', N.array(lats), None), ) return self.station_manager_class.newFile(self.station_data_filepath, file_attrs, datasets) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def _guaranteeRequiredElements(self, elements): elements = list(elements) elem_ids = [elementID(element) for element in elements] for required in self.REQUIRED_ELEMS: req_id = elementID(required) if req_id not in elem_ids: elements.append(required) return tuple(elements) def _processExtensionDatasets(self, station_data): pass def _saveDataset(self, manager, dataset_name, data, attributes=None): if not isinstance(data, N.ndarray): data = manager._dataAsArray(dataset_name, data) try: manager.createDataset(dataset_name, data, attributes) except Exception as e: errmsg = "Failed to create '%s' dataset." self.reporter.logException(errmsg % dataset_name) raise def _saveDatasets(self, obs_date, manager, data_arrays): manager.setOpenState(True) elems = [elem_id for elem_id in self.element_ids] elems.extend([OBSERVED_PREFIX+elem_id for elem_id in self.element_ids]) for dataset_name, data in data_arrays.items(): if dataset_name in elems: attrs = { 'obs_date':obs_date, } self._saveDataset(manager, dataset_name, data, attrs) else: self._saveDataset(manager, dataset_name, data) for dataset_name, data in self.extension_data.items(): self._saveDataset(manager, dataset_name, data) manager.setOpenState(False)
day = int(args[6]) end_date = datetime(year, month, day) else: end_date = start_date else: start_date = datetime.now() - relativedelta(days=options.days_ago) end_date = start_date target_year = targetYearFromDate(start_date) if target_year is None: exit() log_filepath = options.log_filepath if log_filepath is None: log_filename = '%%s-apple-variety-%s-build.log' % nameToFilepath(variety) log_filepath = buildLogFilepath(target_year, 'apple', log_filename, os.getpid()) reporter = Reporter(PID, log_filepath) process_server = ProcessServer(reporter, variety, build_grids, draw_maps, debug, test_run) date = start_date while date <= end_date: # do not start new date after quit time if quit_time is None or datetime.now() < quit_time: process_server.run(date) else: reason = 'time limit exceeded' exit() date += ONE_DAY reporter.logInfo('Processing ended gracefully')
class BaseAcisDataClient: def __init__(self, base_url=DEFAULT_URL, outliers=OUTLIERS, precip_trace=0.005, accumulated_precip=N.inf, missing=N.inf, reporter_or_filepath=None, version=DEFAULT_VERSION, debug=False, performance=False, **kwargs): self.base_url = base_url self.outliers = outliers self.precip_trace = precip_trace self.accumulated_precip = accumulated_precip self.missing = missing if isinstance(reporter_or_filepath, Reporter): self.reporter = reporter_or_filepath else: self.reporter = Reporter(self.__class__.__name__, reporter_or_filepath) self.reporter.close() self.version = version self.debug = debug self.performance = performance self.keyword_args = kwargs if version < 2.0: self.ignore_awdn = kwargs.get('ignore_awdn', True) self.default_elements = ('pcpn', 'maxt', 'mint') else: self.default_elements = ( { 'name': 'maxt', 'add': [ 'f', ] }, { 'name': 'mint', 'add': [ 'f', ] }, { 'name': 'pcpn', 'add': [ 'f', ] }, ) self.default_metadata = ('uid', 'll', 'elev') self.element_name_map = ELEMENT_NAME_MAP self.temp_elem_ids = ALL_TEMP_ELEMENTS self.precip_elem_ids = ALL_PRECIP_ELEMENTS self.non_numeric_elem_ids = ALL_NON_NUMERIC_ELEMS self.elem_names = TEMP_ELEM_NAMES + PRECIP_ELEM_NAMES self.vx_ids = TEMP_VX_IDS + PRECIP_VX_IDS + NON_NUMERIC_VX_IDS # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def request(self, data_type, method='POST', **kwargs): if self.version >= 2.0 and method != 'POST': errmsg = "only 'POST' method is supported by this version ACIS" raise ValueError, errmsg ERROR_MSG = 'Error processing request : %s %s' # make sure that `elems` is a list and POSTed if necessary elems = self.validateElements( kwargs.get('elems', self.default_elements)) kwargs['elems'] = elems if self.version < 2.0: if 'no_awdn' not in kwargs and self.ignore_awdn: kwargs['no_awdn'] = 1 else: if 'no_awdn' in kwargs: del kwargs['no_awdn'] for date_key in ('date', 'sDate', 'eDate'): date = kwargs.get(date_key, None) if date is None: continue # date key is not present if isinstance(date, (datetime, dt_date)): date = date.strftime('%Y%m%d') elif isinstance(date, (list, tuple)): if date[0] > 31: # year is first element in sequence date = '%d%02d%02d' % date else: # year is last element in sequence date = '%d%02d%02d' % (date[2], date[0], date[1]) elif type(date) not in (str, unicode): raise ValueError, "Bad data type for '%s' argument" % date_key kwargs[date_key] = date url = self.base_url if url.endswith('/'): if data_type.startswith('/'): url += data_type[1:] else: url += data_type else: if data_type.startswith('/'): url += data_type else: url += '/' + data_type if method == 'POST': post_args = json.dumps(kwargs) if self.debug: print 'POST', url print 'params =', post_args post_params = urllib.urlencode({'params': post_args}) req = urllib2.Request(url, post_params, {'Accept': 'application/json'}) url += ' json=' + post_params start_time = datetime.now() try: response = urllib2.urlopen(req) except Exception: self.reporter.logError(ERROR_MSG % (method, url)) raise end_time = datetime.now() else: url += '?' + urllib.urlencode(kwargs) + '&output=json' if self.debug: print 'GET', url start_time = datetime.now() try: response = urllib2.urlopen(url) except Exception: self.reporter.logError(ERROR_MSG % (method, url)) raise end_time = datetime.now() try: response_data = response.read() except Exception: self.reporter.logError(ERROR_MSG % (method, url)) raise end_time = datetime.now() if self.performance: msg = 'Time to retrieve data from ACIS web service =' self.reporter.logPerformance(start_time, msg) return kwargs['elems'], response_data, response # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def dateAsString(self, date): if isinstance(date, (str, unicode)): if '/' in date: return date.replace('/', '-') else: return date elif isinstance(date, (tuple, list)): return self._dateTupleAsString(date) try: return date.strftime('%Y-%m-%d') except: raise ValueError, 'Invalid type for date.' def _dateTupleAsString(self, date): return '%d-%02d-%02d' % date def _validDateRange(self, start_date, end_date): start_date = self._dateAsTuple(start_date) if end_date is not None: end_date = self._dateAsTuple(end_date) return start_date, end_date # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def findOutliers(self, data, min_value, max_value): bogus = [] i = 0 while i < len(data): value = data[i] if value > max_value: bogus.append((i, value)) elif value < min_value and value != -32768.: bogus.append((i, value)) i += 1 return tuple(bogus) def responseAsDict(self, json_string): return json.loads(json_string) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def serializeDataValue(self, element, data): if self.version < 2.0: # do nothing for non-numeric data if element in self.non_numeric_elem_ids: return data # handle characters in numeric data types if data == 'M': return self.missing if element in self.precip_elem_ids: if data == 'T': return self.precip_trace if data == 'S': return self.missing if data.endswith('A'): return self.accumulated_precip # all other numeric values want to be floating point return float(data) else: elem_id = element.get('name', element.get('vX', None)) # do nothing for non-numeric data or unrecognized elements if elem_id is None or elem_id in self.non_numeric_elem_ids: return data # handle characters in numeric data types if data[0] in ('A', 'M', 'S', ' '): data[0] = self.missing elif data[0] == 'T': data[0] = self.precip_trace elif data[0] == 'S': data[0] = self.missing # all other numeric values want to be floating point else: data[0] = float(data[0]) return data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def validateElementDict(self, elem): if 'name' in elem: name = elem['name'] elem_name = self.element_name_map.get(name, name) if elem_name in self.elem_names: elem['name'] = elem_name return elem raise ValueError, "Invalid value for 'name' in element dictionary" elif 'vX' in elem: if elem['vX'] in self.vx_ids: return elem raise ValueError, "Invalid value for 'vX' in element dictionary" errmsg = "Element dictionary must contain either the 'name' or 'vX' key" raise KeyError, errmsg def validateElementString(self, elem): if elem.isdigit() and int(elem) in self.vx_ids: return { 'vX': int(elem), } elif elem in self.elem_names: return { 'name': elem, } name = self.element_name_map.get(elem, None) if name is not None: return { 'name': name, } errmsg = "String contains invalid element identifier '%s'" raise ValueError, errmsg % elem def validateElements(self, elements): if elements is None: return None if isinstance(elements, dict): return (self.validateElementDict(elements), ) elif isinstance(elements, (str, unicode)): return (self.validateElementString(elements), ) elif isinstance(elements, (tuple, list)): valid_elems = [] for elem in elements: if isinstance(elem, (str, unicode)): valid_elems.append(self.validateElementString(elem)) elif isinstance(elem, dict): valid_elems.append(self.validateElementDict(elem)) return tuple(valid_elems) raise ValueError, "Invalid type for element identifier" # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def _pythonObjectsFromJson(self, json_string, response, request_detail=None): """ Convert a json string to Python objects ... handle known instances where server injects badly formed JSON into the stream """ if 'DOCTYPE HTML PUBLIC' in json_string: errmsg = 'SERVER ERROR : ' if 'server encountered an internal error' in json_string: errmsg += 'server encountered an unspecified internal error.' if request_detail is not None: errmsg += '\n' + request_detail ecode = 503 else: ecode = 500 errmsg += 'server returned HTML, not valid JSON.\n' if request_detail is not None: errmsg += request_detail + '\n' errmsg += json_string raise urllib2.HTTPError(response.geturl(), ecode, errmsg, None, None) server_error = 'SERVER ERROR : ' errors = [] if '[Failure instance:' in json_string: found_start = json_string.find('[Failure instance:') while found_start > 0: found_end = json_string.find('\n],', found_start) error = json_string[found_start:found_end + 3] errors.append(''.join(error.splitlines())) before = json_string[:found_start] after = json_string[found_end + 3:] json_string = before + after found_start = json_string.find('[Failure instance:') if errors: errmsg = 'the following errors found in returned JSON string :' print server_error, errmsg for error in errors: print error if request_detail is not None: errmsg = 'Station data block may be incomplete for' print errmsg, request_detail else: errmsg = 'The resulting station data block may be incomplete.' sys.stdout.flush() try: return json.loads(json_string) except: errmsg += 'unable to handle improperly formated JSON from server.\n' errmsg += response.geturl() + '\n' if request_detail is not None: errmsg += request_detail + '\n' errmsg += json_string reportException(errmsg)
class StationBiasTool(object): def __init__(self, region_bbox, search_radius, c_parm, vicinity, relative_nodes, node_reach, reporter_or_filepath=None): self.c_parm = c_parm self.node_reach = node_reach self.region_bbox = region_bbox self.relative_nodes = relative_nodes self.search_radius = search_radius self.vicinity = vicinity # create a reporter for perfomance and debug if isinstance(reporter_or_filepath, Reporter): self.reporter = reporter_or_filepath else: self.reporter = Reporter(self.__class__.__name__, reporter_or_filepath) self.reporter.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def applyBias(self, dem_lons, dem_lats, dem_data, dem_data_units, stn_lons, stn_lats, stn_bias, stn_bias_units, report_rate=1000, debug=False, performance=False): """ Apply the calculated station temperature bias to the grid nodes. """ PERF_MSG = 'processed %d grid nodes in' PERF_MSG_SUFFIX = ' ... total = %d of %d' reporter = self.reporter search_radius = self.search_radius c_parm = self.c_parm vicinity = self.vicinity min_count = report_rate - 1 dem_grid_shape = dem_lons.shape dem_grid_size = dem_lons.size # create empty in-memory arrays for calculated grids biased_data = N.empty(shape=dem_grid_shape, dtype=float) dem_data_bias = N.empty(shape=dem_grid_shape, dtype=float) num_nodes_processed = 0 no_change = 0 start_count = datetime.now() # make sure station and dem data are in the same units if stn_bias_units != dem_data_units: stn_bias = convertUnits(stn_bias, stn_bias_units, dem_data_units) # loop thru the nodes of the raw grid and apply the station bias for x in range(dem_grid_shape[0]): for y in range(dem_grid_shape[1]): if performance: # report performance every 'report_rate' passes thru loop if num_nodes_processed > min_count and\ num_nodes_processed % report_rate == 0: msg = PERF_MSG % (report_rate) sfx = PERF_MSG_SUFFIX % (num_nodes_processed, dem_grid_size) reporter.logPerformance(start_count, msg, sfx) start_count = datetime.now() node_lon = dem_lons[x, y] node_lat = dem_lats[x, y] node_value = dem_data[x, y] if not self._passesApplyBiasTest(node_value, node_lon, node_lat, stn_bias, stn_lons, stn_lats): biased_data[x, y] = dem_data[x, y] dem_data_bias[x, y] = 0. num_nodes_processed += 1 no_change += 1 continue # get indexes of all stations within search radius of grid node # bbox will be different for each grid node bbox = (node_lon - search_radius, node_lon + search_radius, node_lat - search_radius, node_lat + search_radius) indexes = N.where((stn_lons >= bbox[0]) & (stn_lons <= bbox[1]) & (stn_lats >= bbox[2]) & (stn_lats <= bbox[3])) # no stations within search radius if len(indexes[0]) < 1: # NO ADJUSTMENT CAN BE MADE biased_data[x, y] = dem_data[x, y] dem_data_bias[x, y] = 0. num_nodes_processed += 1 no_change += 1 continue # coordinates of all station in search area area_lons = stn_lons[indexes] area_lats = stn_lats[indexes] # test stations for 'nearness' to the grid node bbox = (node_lon - vicinity, node_lon + vicinity, node_lat - vicinity, node_lat + vicinity) nearby = N.where((area_lons >= bbox[0]) & (area_lons <= bbox[1]) & (area_lats >= bbox[2]) & (area_lats <= bbox[3])) # in order to use MQ we must have either 2 'nearby' stations # or 2 in each quadrant surrounding the node if (len(nearby[0]) < 1 and not allQuadrants( node_lon, node_lat, area_lons, area_lats)): # NO ADJUSTMENT CAN BE MADE biased_data[x, y] = dem_data[x, y] dem_data_bias[x, y] = 0. num_nodes_processed += 1 no_change += 1 continue # run multiquadric interpolation on BIAS data_bias = interp.mq(node_lat, node_lon, area_lats, area_lons, stn_bias[indexes], c_parm) if N.isfinite(data_bias): # apply valid bias value = dem_data[x, y] - data_bias else: # invalid bias ... NO ADJUSTMENT CAN BE MADE value = dem_data[x, y] data_bias = 0. no_change += 1 if N.isfinite(value): biased_data[x, y] = value dem_data_bias[x, y] = data_bias else: biased_data[x, y] = dem_data[x, y] dem_data_bias[x, y] = 0. no_change += 1 num_nodes_processed += 1 # log performance for nodes not yet reported unreported = num_nodes_processed % report_rate if performance and unreported > 0: msg = PERF_MSG % (unreported) sfx = PERF_MSG_SUFFIX % (num_nodes_processed, dem_grid_size) reporter.logPerformance(start_count, msg, sfx) return biased_data, dem_data_bias, (num_nodes_processed, no_change) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def calculateBias(self, algorithm, stn_uids, stn_lons, stn_lats, stn_data, stn_data_units, raw_lons, raw_lats, raw_data, raw_data_units, report_rate=100, debug=False, performance=False): """ Calculate the weighted difference between the data value at each station and the nearby grid nodes. It will use multiquadric interpolation except when there are an insufficient number of grid nodes nearby, then it will use a simple inverse distance weighted average. """ # local refernces to instance attributes reporter = self.reporter vicinity = self.vicinity min_count = report_rate - 1 PERF_MSG = 'processed %d stations (%d total) in' # initialize station temperature bias arrays stn_interp_data = [] stn_data_bias = [] num_stations = len(stn_uids) # initialize tracking variables algorithm_counts = [0, 0, 0] station_count = 0 stations_bad_data = 0 stations_outside = 0 insufficient_coverage = 0 bias_not_calculated = 0 start_report = datetime.now() # make sure station and dem data are in the same units if raw_data_units != stn_data_units: raw_data = convertUnits(raw_data, raw_data_units, stn_data_units) # loop though list of stations making adjustments to both station and # grid node temperature extremes for indx in range(num_stations): # the following is good for a limited test loop #for indx in (84,85,278,330,337,345,360,368,444,476): # report performance every 'report_rate' passes thru the loop if performance and (station_count > min_count and station_count % report_rate == 0): reporter.logPerformance( start_report, PERF_MSG % (report_rate, station_count)) start_report = datetime.now() # extract observation data for this station stn_id = stn_uids[indx] stn_lon = stn_lons[indx] stn_lat = stn_lats[indx] stn_info = 'station %d (%s) at [%-9.5f, %-9.5f]' % ( indx, stn_id, stn_lon, stn_lat) # station is not within the bounding boxx for this run if not self._pointInBounds(stn_lon, stn_lat): stn_interp_data.append(N.inf) stn_data_bias.append(N.inf) stations_outside += 1 station_count += 1 continue stn_value = stn_data[indx] # check for invalid data value for this station # this shouldn't happen if station data prep is done right !!! if not N.isfinite(stn_value): # set missing values and skip to next iteration stn_interp_data.append(N.inf) stn_data_bias.append(N.inf) stations_bad_data += 1 station_count += 1 if debug: print 'skipped ', stn_info print '... bad data value', stn_values continue # additional check that may be required by sub-classed data types if not self._passesCalcBiasTest(stn_value, stn_lon, stn_lat, raw_data, raw_lons, raw_lats): stn_interp_data.append(stn_value) stn_data_bias.append(0.) station_count += 1 bias_not_calculated += 1 continue # apply appripriate bias calculation algorithm if algorithm == 'mq': result = self.doMQInterp(stn_lon, stn_lat, stn_info, raw_lons, raw_lats, raw_data, debug) else: result = self.doIDWInterp(stn_lon, stn_lat, stn_info, raw_lons, raw_lats, raw_data, debug) if result is None: # set missing values and skip to next iteration stn_interp_data.append(N.inf) stn_data_bias.append(N.inf) insufficient_coverage += 1 station_count += 1 continue interpolated_value = result[1] data_bias = interpolated_value - stn_value estimated_value = interpolated_value - data_bias stn_data_bias.append(data_bias) stn_interp_data.append(estimated_value) station_count += 1 algorithm_counts[result[0]] += 1 if performance: unreported = station_count % report_rate if unreported > 0: reporter.logPerformance(start_report, PERF_MSG % (unreported, station_count)) # convert the interpolated precip and bias to numpy arrays stn_interp_data = N.array(stn_interp_data, dtype=float) stn_data_bias = N.array(stn_data_bias, dtype=float) indexes = N.where(N.isnan(stn_data_bias) | N.isinf(stn_data_bias)) bad_bias_count = len(indexes[0]) statistics = (station_count, algorithm_counts[2], algorithm_counts[1], bad_bias_count, stations_bad_data, stations_outside, insufficient_coverage, bias_not_calculated) return stn_interp_data, stn_data_bias, statistics # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def doMQInterp(self, stn_lon, stn_lat, stn_info, node_lons, node_lats, node_data, debug): """ determine value at station from values at nearby grid nodes using Multi-Quadric Distance algorithm """ c_param = self.c_parm search_radius = self.search_radius # get indexes of all grid nodes within search radius of station # that have valid data values bbox = (stn_lon - search_radius, stn_lon + search_radius, stn_lat - search_radius, stn_lat + search_radius) indexes = N.where((node_lons >= bbox[0]) & (node_lats >= bbox[2]) & (node_lons <= bbox[1]) & (node_lats <= bbox[3]) & N.isfinite(node_data)) # no grid nodes near this station if len(indexes) == 0 or len(indexes[0]) == 0: # set missing values and skip to next iteration if debug: print 'skipped ', stn_info print ' ... no grid nodes within search radius.' return None min_x = min(indexes[0]) max_x = max(indexes[0]) + 1 min_y = min(indexes[1]) max_y = max(indexes[1]) + 1 # must have at least one node in each quadrant area_lons = node_lons[min_x:max_x, min_y:max_y] area_lons = area_lons.flatten() area_lats = node_lats[min_x:max_x, min_y:max_y] area_lats = area_lats.flatten() area_values = node_data[min_x:max_x, min_y:max_y] area_values = area_values.flatten() num_nodes = len(area_values) # grid nodes are present in all 4 quadrants around staton # so, we can use Multiquadric interpolation if allQuadrants(stn_lon, stn_lat, area_lons, area_lats): interp_value = interp.mq(stn_lat, stn_lon, area_lats, area_lons, area_values, c_param) algorithm = 2 # grid nodes NOT present in all 4 quadrants around staton # so we must use Inverse Distance Weighted Average elif num_temps > 3: interp_value = interp.idw(stn_lat, stn_lon, area_lats, area_lons, area_values) algorithm = 1 # too few nodes, take simple average else: interp_value = area_values.sum() / float(num_nodes) algorithm = 0 return algorithm, interp_value # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def doIDWInterp(self, stn_lon, stn_lat, stn_info, node_lons, node_lats, node_data, debug): """ determine value at station from values at nearby grid nodes using Inverse Distance Weigthed algorithm """ relative_nodes = self.relative_nodes node_reach = self.node_reach # indexes of all grid nodes within search domain indexes = indexesOfNeighborNodes(stn_lon, stn_lat, relative_nodes, node_lons, node_lats, node_reach) rel_lons = node_lons[indexes] rel_lats = node_lats[indexes] rel_data = node_data[indexes] # narrow it doen to those with valid values indexes = N.where(N.isfinite(rel_data)) rel_lons = rel_lons[indexes] rel_lats = rel_lats[indexes] rel_data = rel_data[indexes] num_relatives = len(rel_data) # Inverse Distance Weighted Average if num_relatives > 3: interp_value = interp.idw(stn_lat, stn_lon, rel_lats, rel_lons, rel_data) algorithm = 1 # too few nodes, take simple average elif num_relatives > 0: interp_value = min_temps.sum() / float(total_temps) algorithm = 0 # no valid nearby nodes, no bias ? else: if debug: print 'skipped ', stn_info print "... no valid grid nodes within relative neighborhood" return None return algorithm, interp_value # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def _pointInBounds(self, lon, lat): # check whether input point is within bounds for this run bbox = self.region_bbox if lon < bbox[0]: return False if lon > bbox[2]: return False if lat < bbox[1]: return False if lat > bbox[3]: return False return True def _passesApplyBiasTest(self, node_value, node_lon, node_lat, stn_bias, stn_lons, stn_lats): return True def _passesCalcBiasTest(self, stn_value, stn_lon, stn_lat, raw_data, raw_lons, raw_lats): return True