Exemple #1
0
    def __init__(self,
                 base_url=DEFAULT_URL,
                 outliers=OUTLIERS,
                 precip_trace=0.005,
                 accumulated_precip=N.inf,
                 missing=N.inf,
                 reporter_or_filepath=None,
                 version=DEFAULT_VERSION,
                 debug=False,
                 performance=False,
                 **kwargs):

        self.base_url = base_url
        self.outliers = outliers
        self.precip_trace = precip_trace
        self.accumulated_precip = accumulated_precip
        self.missing = missing
        if isinstance(reporter_or_filepath, Reporter):
            self.reporter = reporter_or_filepath
        else:
            self.reporter = Reporter(self.__class__.__name__,
                                     reporter_or_filepath)
            self.reporter.close()
        self.version = version
        self.debug = debug
        self.performance = performance
        self.keyword_args = kwargs
        if version < 2.0:
            self.ignore_awdn = kwargs.get('ignore_awdn', True)
            self.default_elements = ('pcpn', 'maxt', 'mint')
        else:
            self.default_elements = (
                {
                    'name': 'maxt',
                    'add': [
                        'f',
                    ]
                },
                {
                    'name': 'mint',
                    'add': [
                        'f',
                    ]
                },
                {
                    'name': 'pcpn',
                    'add': [
                        'f',
                    ]
                },
            )
        self.default_metadata = ('uid', 'll', 'elev')

        self.element_name_map = ELEMENT_NAME_MAP
        self.temp_elem_ids = ALL_TEMP_ELEMENTS
        self.precip_elem_ids = ALL_PRECIP_ELEMENTS
        self.non_numeric_elem_ids = ALL_NON_NUMERIC_ELEMS
        self.elem_names = TEMP_ELEM_NAMES + PRECIP_ELEM_NAMES
        self.vx_ids = TEMP_VX_IDS + PRECIP_VX_IDS + NON_NUMERIC_VX_IDS
    def __init__(self, station_manager_class, station_data_filepath, elems=(),
                       metadata=(), base_url=None, file_attrs=(),
                       server_reset_wait_time=30, reporter_or_filepath=None,
                       **request_args):
        self.station_manager_class = station_manager_class
        self.station_data_filepath = station_data_filepath

        if elems:
             self.elements = self._guaranteeRequiredElements(elems)
        else:
            self.elements = self.REQUIRED_ELEMS

        self.element_ids = [ ]
        for element in self.elements:
            elem_id = indexableElementID(element)
            self.element_ids.append(elem_id)

        if metadata:
            self.metadata = tuple( set(self.REQUIRED_METADATA) | set(metadata) )
        else:
            self.metadata = self.REQUIRED_METADATA

        if base_url is None:
            self.client = AcisMultiStationDataClient()
        else:
            self.client = AcisMultiStationDataClient(base_url)

        self.file_attrs = file_attrs
        self.server_reset_wait_time = server_reset_wait_time

        # create a reporter for perfomance and debug
        if isinstance(reporter_or_filepath, Reporter):
            self.reporter = reporter_or_filepath
        else:
            self.reporter = Reporter(self.__class__.__name__,
                                     reporter_or_filepath)
            self.reporter.close()

        self.request_args = request_args
        self.extension_data = { }
    def __init__(self,
                 region_bbox,
                 search_radius,
                 c_parm,
                 vicinity,
                 relative_nodes,
                 node_reach,
                 reporter_or_filepath=None):

        self.c_parm = c_parm
        self.node_reach = node_reach
        self.region_bbox = region_bbox
        self.relative_nodes = relative_nodes
        self.search_radius = search_radius
        self.vicinity = vicinity

        # create a reporter for perfomance and debug
        if isinstance(reporter_or_filepath, Reporter):
            self.reporter = reporter_or_filepath
        else:
            self.reporter = Reporter(self.__class__.__name__,
                                     reporter_or_filepath)
            self.reporter.close()
class StationDataFileBuilder(object):
    """ Retrieves station data for a group of states and writes it to a file.
    """

    REQUIRED_METADATA = ('uid','ll','elev')
    REQUIRED_ELEMS = ()

    def __init__(self, station_manager_class, station_data_filepath, elems=(),
                       metadata=(), base_url=None, file_attrs=(),
                       server_reset_wait_time=30, reporter_or_filepath=None,
                       **request_args):
        self.station_manager_class = station_manager_class
        self.station_data_filepath = station_data_filepath

        if elems:
             self.elements = self._guaranteeRequiredElements(elems)
        else:
            self.elements = self.REQUIRED_ELEMS

        self.element_ids = [ ]
        for element in self.elements:
            elem_id = indexableElementID(element)
            self.element_ids.append(elem_id)

        if metadata:
            self.metadata = tuple( set(self.REQUIRED_METADATA) | set(metadata) )
        else:
            self.metadata = self.REQUIRED_METADATA

        if base_url is None:
            self.client = AcisMultiStationDataClient()
        else:
            self.client = AcisMultiStationDataClient(base_url)

        self.file_attrs = file_attrs
        self.server_reset_wait_time = server_reset_wait_time

        # create a reporter for perfomance and debug
        if isinstance(reporter_or_filepath, Reporter):
            self.reporter = reporter_or_filepath
        else:
            self.reporter = Reporter(self.__class__.__name__,
                                     reporter_or_filepath)
            self.reporter.close()

        self.request_args = request_args
        self.extension_data = { }

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def __call__(self, date, states, max_attempts=1, debug=False,
                       performance=False):
        state_error = "attempt %d to retrieve stations for %s failed"
        self.client.debug = debug
        reporter = self.reporter
        
        if performance:
            msg = "Attempting to download stations for %d states :"
            reporter.logInfo(msg % len(states))

        start_perf = datetime.now() # for performance reporting

        num_states = len(states)
        station_data = None
        required_meta_key = self.REQUIRED_METADATA[0]

        attempts = 0
        total_stations = 0
        total_valid = 0
        while len(states) > 0 and attempts < max_attempts:
            attempts += 1
            do_over_states = [ ]

            for state in states:
                try:
                    results = self.allStationsInState(state, date, performance)
                except urllib2.HTTPError as e:
                    if attempts >= max_attempts: raise
                    
                    if e.code >= 400 and e.code < 500:
                        reporter.logError('REQUEST : %s' +
                                          state_error % (attempts, state))
                    elif e.code >= 500:
                        reporter.logError('ACIS SERVER : ' +
                                          state_error % (attempts, state))
                    reporter.logError('HTTP response code = %s' % str(e.code))

                    # recoverable errors
                    if e.code in (500, 502, 503, 504, 598, 599):
                        do_over_states.append(state)
                        reporter.logInfo('waiting for server to clear ...')
                        time.sleep(self.server_reset_wait_time)

                    else: # no recovery path
                        errmsg = 'Build of station data file failed : %s'
                        reporter.reportError(errmsg %
                                             self.station_data_filepath)
                        raise

                except urllib2.URLError as e:
                    if attempts >= max_attempts: raise
                    
                    reporter.logError('urllib2 : ' +
                                      state_error % (attempts, state))
                    reporter.logException('urllib2.URLError')
                    # these errors are temporary and often recoverable
                    do_over_states.append(state)

                except Exception as e:
                    reporter.logException(state_error % (attempts, state))
                    # must assume that unknown exceptions are not recoverable
                    raise

                else:
                    num_stations, state_data = results
                    total_stations += num_stations
                    if state_data:
                        if station_data is not None:
                            for key in station_data:
                                station_data[key] += state_data[key]
                            total_valid += len(state_data[required_meta_key])
                        else:
                            station_data = state_data
                            total_stations = num_stations
                            total_valid = len(station_data[required_meta_key])

            # reset state list to those that failed
            states = do_over_states

        if len(do_over_states) > 0:
            errmsg = "SERVER ERROR : Unable to download data for %d states : "
            errmsg += str(tuple(do_over_states))
            raise RuntimeError, errmsg % len(do_over_states)

        if total_stations == 0:
            errmsg = "No station data available at the time of this run"
            raise LookupError, errmsg

        if performance:
            msg = 'Download %d stations from %d states in'
            reporter.logPerformance(start_perf,
                                    msg % (total_stations,num_states))

        start_save = datetime.now() # for performance reporting
        if 'obs_date' not in self.file_attrs:
            file_attrs = { 'obs_date':date, }
            file_attrs.update(self.file_attrs)
            manager = self.newStationFileManager(station_data['lon'],
                                                 station_data['lat'],
                                                 file_attrs)
        else:
            manager = self.newStationFileManager(station_data['lon'],
                                                 station_data['lat'],
                                                 self.file_attrs)
        del station_data['lon']
        del station_data['lat']

        num_datasets = len(station_data.keys()) + 2
        self._saveDatasets(date, manager, station_data)
        manager.closeFile()
        del station_data
        del self.extension_data
        self.extension_data = { }

        if performance:
            msg = 'Saved %d datasets of %d observations each in'
            reporter.logPerformance(start_save,
                                    msg % (num_datasets,total_valid))

        return total_valid, total_stations

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def allStationsInState(self, state, date, performance):
        start_state = datetime.now()
        num_stations = 0
        num_valid = 0
        ncdc_code = postal2ncdc(state)
        required = set(self.REQUIRED_METADATA)
        station_data = { }

        stations = self.client.allStationsInState(state, self.elements, date, 
                                                  self.metadata,
                                                  **self.request_args)
        num_stations = len(stations['data'])
        if num_stations < 1:
            if performance:
                self.statePerfSummary(start_state, state, 0, 0) 
            return 0, station_data

        # list of data elements returned by ACIS
        elements = deepcopy(stations['elems'])
        data_keys = [indexableElementID(element) for element in elements]

        # only keep stations that have all required metadata
        usable = [row for row in stations['data']
                  if (set(row['meta'].keys()) & required) == required]
        if len(usable) < 1:
            if performance:
                self.statePerfSummary(start_state, state, num_stations, 0) 
            return 0, station_data

        del stations
        meta_keys = usable[0]['meta'].keys()

        # merge metadata values and data values into a single list
        merged = [row['meta'].values() + row['data'] for row in usable]
        del usable

        # reorganize merged station data into ordered lists of values
        # corresponding each metadata and obs data key
        all_keys = meta_keys + data_keys

        for indx in range(len(all_keys)):
            station_data[all_keys[indx]] = [row[indx] for row in merged]
        del merged

        # break up multi-component values in station data arrays
        if 'll' in station_data:
            station_data['lon'] = [value[0] for value in station_data['ll']]
            station_data['lat'] = [value[1] for value in station_data['ll']]
            del station_data['ll']

        for element in elements:
            key = indexableElementID(element)
            data = station_data[key]

            # additon data descriptors requested
            if 'add' in element:
                # observation time
                if 't' in element['add']:
                    t_key = OBS_TIME_KEYS.get(key, key+'_obs_time')
                    indx = element['add'].index('t') + 1
                    station_data[t_key] = [obs[indx] for obs in data]
                # data "correctness" flag
                if 'f' in element['add']:
                    f_key = OBS_FLAG_KEYS.get(key, key+'_obs_flag')
                    indx = element['add'].index('f') + 1
                    flags = [evalObsFlag(obs[0],obs[indx]) for obs in data]
                    station_data[f_key] = flags
                    data = [evalObsValue(key,obs[0],obs[indx]) for obs in data]
                else:
                    data = [evalObsFlag(key,obs[0],' ') for obs in data]
            else:
                data = [evalObsFlag(key,obs[0],' ') for obs in data]
            if key == 'pcpn':
                bad_indexes = [indx for indx in range(len(data)) 
                                    if N.isnan(data[indx])]
                if bad_indexes:
                    print 'WARNING: Invalid precip values at', str(bad_indexes)
                    sys.stdout.flush()

            station_data[key] = data

        # handle change in name of dataset sent by ACIS web services
        # used to be named 'postal', downstream now expects 'state'
        if 'state' not in station_data:
            station_data['state'] = [state for stn in station_data['lon']]
        if 'ncdc' not in station_data:
            station_data['ncdc'] = [ncdc_code for stn in station_data['lon']]
        if 'name' in station_data:
            station_data['name'] = [name.encode('iso-8859-1')
                                    for name in station_data['name']]

        station_data = self.validStations(state, station_data)
        num_valid = len(station_data[self.REQUIRED_METADATA[0]])

        self._processExtensionDatasets(station_data)

        if performance:
            self.statePerfSummary(start_state, state, num_stations, num_valid) 

        return num_stations, station_data

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def statePerfSummary(self, elapsed_time, state, num_stations, num_valid):
        msg = 'downloaded %d stations for %s (%d usable) in'
        self.reporter.logPerformance(elapsed_time,
                                     msg % (num_stations, state, num_valid))

    def validStations(self, state, station_data):
        return station_data

    def newStationFileManager(self, lons, lats, file_attrs):
        datasets = ( ('lon', N.array(lons), None),
                     ('lat', N.array(lats), None),
                   )
        return self.station_manager_class.newFile(self.station_data_filepath,
                                                  file_attrs, datasets)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def _guaranteeRequiredElements(self, elements):
        elements = list(elements)
        elem_ids = [elementID(element) for element in elements]
        for required in self.REQUIRED_ELEMS:
            req_id = elementID(required)
            if req_id not in elem_ids:
                elements.append(required)
        return tuple(elements)

    def _processExtensionDatasets(self, station_data):
        pass

    def _saveDataset(self, manager, dataset_name, data, attributes=None):
        if not isinstance(data, N.ndarray):
            data = manager._dataAsArray(dataset_name, data)
        try:
            manager.createDataset(dataset_name, data, attributes)
        except Exception as e:
            errmsg = "Failed to create '%s' dataset."
            self.reporter.logException(errmsg % dataset_name)
            raise

    def _saveDatasets(self, obs_date, manager, data_arrays):
        manager.setOpenState(True)

        elems = [elem_id for elem_id in self.element_ids]
        elems.extend([OBSERVED_PREFIX+elem_id for elem_id in self.element_ids])

        for dataset_name, data in data_arrays.items():
            if dataset_name in elems:
                attrs = { 'obs_date':obs_date, }
                self._saveDataset(manager, dataset_name, data, attrs)
            else:
                self._saveDataset(manager, dataset_name, data)
        
        for dataset_name, data in self.extension_data.items():
            self._saveDataset(manager, dataset_name, data)

        manager.setOpenState(False)
Exemple #5
0
        day = int(args[6])
        end_date = datetime(year, month, day)
    else:
        end_date = start_date
else:
    start_date = datetime.now() - relativedelta(days=options.days_ago)
    end_date = start_date
target_year = targetYearFromDate(start_date)
if target_year is None: exit()

log_filepath = options.log_filepath
if log_filepath is None:
    log_filename = '%%s-apple-variety-%s-build.log' % nameToFilepath(variety)
    log_filepath = buildLogFilepath(target_year, 'apple', log_filename,
                                    os.getpid())
reporter = Reporter(PID, log_filepath)
process_server = ProcessServer(reporter, variety, build_grids, draw_maps,
                               debug, test_run)

date = start_date
while date <= end_date:
    # do not start new date after quit time
    if quit_time is None or datetime.now() < quit_time:
        process_server.run(date)
    else:
        reason = 'time limit exceeded'
        exit()
    date += ONE_DAY

reporter.logInfo('Processing ended gracefully')
Exemple #6
0
class BaseAcisDataClient:
    def __init__(self,
                 base_url=DEFAULT_URL,
                 outliers=OUTLIERS,
                 precip_trace=0.005,
                 accumulated_precip=N.inf,
                 missing=N.inf,
                 reporter_or_filepath=None,
                 version=DEFAULT_VERSION,
                 debug=False,
                 performance=False,
                 **kwargs):

        self.base_url = base_url
        self.outliers = outliers
        self.precip_trace = precip_trace
        self.accumulated_precip = accumulated_precip
        self.missing = missing
        if isinstance(reporter_or_filepath, Reporter):
            self.reporter = reporter_or_filepath
        else:
            self.reporter = Reporter(self.__class__.__name__,
                                     reporter_or_filepath)
            self.reporter.close()
        self.version = version
        self.debug = debug
        self.performance = performance
        self.keyword_args = kwargs
        if version < 2.0:
            self.ignore_awdn = kwargs.get('ignore_awdn', True)
            self.default_elements = ('pcpn', 'maxt', 'mint')
        else:
            self.default_elements = (
                {
                    'name': 'maxt',
                    'add': [
                        'f',
                    ]
                },
                {
                    'name': 'mint',
                    'add': [
                        'f',
                    ]
                },
                {
                    'name': 'pcpn',
                    'add': [
                        'f',
                    ]
                },
            )
        self.default_metadata = ('uid', 'll', 'elev')

        self.element_name_map = ELEMENT_NAME_MAP
        self.temp_elem_ids = ALL_TEMP_ELEMENTS
        self.precip_elem_ids = ALL_PRECIP_ELEMENTS
        self.non_numeric_elem_ids = ALL_NON_NUMERIC_ELEMS
        self.elem_names = TEMP_ELEM_NAMES + PRECIP_ELEM_NAMES
        self.vx_ids = TEMP_VX_IDS + PRECIP_VX_IDS + NON_NUMERIC_VX_IDS

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def request(self, data_type, method='POST', **kwargs):
        if self.version >= 2.0 and method != 'POST':
            errmsg = "only 'POST' method is supported by this version ACIS"
            raise ValueError, errmsg

        ERROR_MSG = 'Error processing request : %s %s'
        # make sure that `elems` is a list and POSTed if necessary
        elems = self.validateElements(
            kwargs.get('elems', self.default_elements))
        kwargs['elems'] = elems

        if self.version < 2.0:
            if 'no_awdn' not in kwargs and self.ignore_awdn:
                kwargs['no_awdn'] = 1
        else:
            if 'no_awdn' in kwargs:
                del kwargs['no_awdn']

        for date_key in ('date', 'sDate', 'eDate'):
            date = kwargs.get(date_key, None)
            if date is None: continue  # date key is not present

            if isinstance(date, (datetime, dt_date)):
                date = date.strftime('%Y%m%d')
            elif isinstance(date, (list, tuple)):
                if date[0] > 31:  # year is first element in sequence
                    date = '%d%02d%02d' % date
                else:  # year is last element in sequence
                    date = '%d%02d%02d' % (date[2], date[0], date[1])
            elif type(date) not in (str, unicode):
                raise ValueError, "Bad data type for '%s' argument" % date_key

            kwargs[date_key] = date

        url = self.base_url
        if url.endswith('/'):
            if data_type.startswith('/'):
                url += data_type[1:]
            else:
                url += data_type
        else:
            if data_type.startswith('/'):
                url += data_type
            else:
                url += '/' + data_type

        if method == 'POST':
            post_args = json.dumps(kwargs)
            if self.debug:
                print 'POST', url
                print 'params =', post_args
            post_params = urllib.urlencode({'params': post_args})
            req = urllib2.Request(url, post_params,
                                  {'Accept': 'application/json'})
            url += ' json=' + post_params
            start_time = datetime.now()
            try:
                response = urllib2.urlopen(req)
            except Exception:
                self.reporter.logError(ERROR_MSG % (method, url))
                raise
            end_time = datetime.now()

        else:
            url += '?' + urllib.urlencode(kwargs) + '&output=json'
            if self.debug:
                print 'GET', url
            start_time = datetime.now()
            try:
                response = urllib2.urlopen(url)
            except Exception:
                self.reporter.logError(ERROR_MSG % (method, url))
                raise
            end_time = datetime.now()

        try:
            response_data = response.read()
        except Exception:
            self.reporter.logError(ERROR_MSG % (method, url))
            raise
        end_time = datetime.now()

        if self.performance:
            msg = 'Time to retrieve data from ACIS web service ='
            self.reporter.logPerformance(start_time, msg)

        return kwargs['elems'], response_data, response

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def dateAsString(self, date):
        if isinstance(date, (str, unicode)):
            if '/' in date:
                return date.replace('/', '-')
            else:
                return date
        elif isinstance(date, (tuple, list)):
            return self._dateTupleAsString(date)
        try:
            return date.strftime('%Y-%m-%d')
        except:
            raise ValueError, 'Invalid type for date.'

    def _dateTupleAsString(self, date):
        return '%d-%02d-%02d' % date

    def _validDateRange(self, start_date, end_date):
        start_date = self._dateAsTuple(start_date)
        if end_date is not None:
            end_date = self._dateAsTuple(end_date)
        return start_date, end_date

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def findOutliers(self, data, min_value, max_value):
        bogus = []
        i = 0
        while i < len(data):
            value = data[i]
            if value > max_value:
                bogus.append((i, value))
            elif value < min_value and value != -32768.:
                bogus.append((i, value))
            i += 1
        return tuple(bogus)

    def responseAsDict(self, json_string):
        return json.loads(json_string)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def serializeDataValue(self, element, data):
        if self.version < 2.0:
            # do nothing for non-numeric data
            if element in self.non_numeric_elem_ids:
                return data
            # handle characters in numeric data types
            if data == 'M': return self.missing
            if element in self.precip_elem_ids:
                if data == 'T': return self.precip_trace
                if data == 'S': return self.missing
                if data.endswith('A'): return self.accumulated_precip
            # all other numeric values want to be floating point
            return float(data)

        else:
            elem_id = element.get('name', element.get('vX', None))
            # do nothing for non-numeric data or unrecognized elements
            if elem_id is None or elem_id in self.non_numeric_elem_ids:
                return data
            # handle characters in numeric data types
            if data[0] in ('A', 'M', 'S', ' '): data[0] = self.missing
            elif data[0] == 'T': data[0] = self.precip_trace
            elif data[0] == 'S':
                data[0] = self.missing
                # all other numeric values want to be floating point
            else:
                data[0] = float(data[0])
            return data

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def validateElementDict(self, elem):
        if 'name' in elem:
            name = elem['name']
            elem_name = self.element_name_map.get(name, name)
            if elem_name in self.elem_names:
                elem['name'] = elem_name
                return elem
            raise ValueError, "Invalid value for 'name' in element dictionary"
        elif 'vX' in elem:
            if elem['vX'] in self.vx_ids:
                return elem
            raise ValueError, "Invalid value for 'vX' in element dictionary"

        errmsg = "Element dictionary must contain either the 'name' or 'vX' key"
        raise KeyError, errmsg

    def validateElementString(self, elem):
        if elem.isdigit() and int(elem) in self.vx_ids:
            return {
                'vX': int(elem),
            }
        elif elem in self.elem_names:
            return {
                'name': elem,
            }
        name = self.element_name_map.get(elem, None)
        if name is not None:
            return {
                'name': name,
            }

        errmsg = "String contains invalid element identifier '%s'"
        raise ValueError, errmsg % elem

    def validateElements(self, elements):
        if elements is None: return None
        if isinstance(elements, dict):
            return (self.validateElementDict(elements), )

        elif isinstance(elements, (str, unicode)):
            return (self.validateElementString(elements), )

        elif isinstance(elements, (tuple, list)):
            valid_elems = []
            for elem in elements:
                if isinstance(elem, (str, unicode)):
                    valid_elems.append(self.validateElementString(elem))
                elif isinstance(elem, dict):
                    valid_elems.append(self.validateElementDict(elem))
            return tuple(valid_elems)

        raise ValueError, "Invalid type for element identifier"

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def _pythonObjectsFromJson(self,
                               json_string,
                               response,
                               request_detail=None):
        """ Convert a json string to Python objects ... handle known instances
        where server injects badly formed JSON into the stream
        """
        if 'DOCTYPE HTML PUBLIC' in json_string:
            errmsg = 'SERVER ERROR : '
            if 'server encountered an internal error' in json_string:
                errmsg += 'server encountered an unspecified internal error.'
                if request_detail is not None:
                    errmsg += '\n' + request_detail
                ecode = 503
            else:
                ecode = 500
                errmsg += 'server returned HTML, not valid JSON.\n'
                if request_detail is not None:
                    errmsg += request_detail + '\n'
                errmsg += json_string
            raise urllib2.HTTPError(response.geturl(), ecode, errmsg, None,
                                    None)

        server_error = 'SERVER ERROR : '
        errors = []
        if '[Failure instance:' in json_string:
            found_start = json_string.find('[Failure instance:')
            while found_start > 0:
                found_end = json_string.find('\n],', found_start)
                error = json_string[found_start:found_end + 3]
                errors.append(''.join(error.splitlines()))
                before = json_string[:found_start]
                after = json_string[found_end + 3:]
                json_string = before + after
                found_start = json_string.find('[Failure instance:')

        if errors:
            errmsg = 'the following errors found in returned JSON string :'
            print server_error, errmsg
            for error in errors:
                print error
            if request_detail is not None:
                errmsg = 'Station data block may be incomplete for'
                print errmsg, request_detail
            else:
                errmsg = 'The resulting station data block may be incomplete.'
            sys.stdout.flush()

        try:
            return json.loads(json_string)
        except:
            errmsg += 'unable to handle improperly formated JSON from server.\n'
            errmsg += response.geturl() + '\n'
            if request_detail is not None:
                errmsg += request_detail + '\n'
            errmsg += json_string
            reportException(errmsg)
class StationBiasTool(object):
    def __init__(self,
                 region_bbox,
                 search_radius,
                 c_parm,
                 vicinity,
                 relative_nodes,
                 node_reach,
                 reporter_or_filepath=None):

        self.c_parm = c_parm
        self.node_reach = node_reach
        self.region_bbox = region_bbox
        self.relative_nodes = relative_nodes
        self.search_radius = search_radius
        self.vicinity = vicinity

        # create a reporter for perfomance and debug
        if isinstance(reporter_or_filepath, Reporter):
            self.reporter = reporter_or_filepath
        else:
            self.reporter = Reporter(self.__class__.__name__,
                                     reporter_or_filepath)
            self.reporter.close()

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def applyBias(self,
                  dem_lons,
                  dem_lats,
                  dem_data,
                  dem_data_units,
                  stn_lons,
                  stn_lats,
                  stn_bias,
                  stn_bias_units,
                  report_rate=1000,
                  debug=False,
                  performance=False):
        """ Apply the calculated station temperature bias to the grid nodes. 
        """
        PERF_MSG = 'processed %d grid nodes in'
        PERF_MSG_SUFFIX = ' ... total = %d of %d'
        reporter = self.reporter

        search_radius = self.search_radius
        c_parm = self.c_parm
        vicinity = self.vicinity

        min_count = report_rate - 1

        dem_grid_shape = dem_lons.shape
        dem_grid_size = dem_lons.size

        # create empty in-memory arrays for calculated grids
        biased_data = N.empty(shape=dem_grid_shape, dtype=float)
        dem_data_bias = N.empty(shape=dem_grid_shape, dtype=float)

        num_nodes_processed = 0
        no_change = 0
        start_count = datetime.now()

        # make sure station and dem data are in the same units
        if stn_bias_units != dem_data_units:
            stn_bias = convertUnits(stn_bias, stn_bias_units, dem_data_units)

        # loop thru the nodes of the raw grid and apply the station bias
        for x in range(dem_grid_shape[0]):
            for y in range(dem_grid_shape[1]):
                if performance:
                    # report performance every 'report_rate' passes thru loop
                    if num_nodes_processed > min_count and\
                       num_nodes_processed % report_rate == 0:
                        msg = PERF_MSG % (report_rate)
                        sfx = PERF_MSG_SUFFIX % (num_nodes_processed,
                                                 dem_grid_size)
                        reporter.logPerformance(start_count, msg, sfx)
                        start_count = datetime.now()

                node_lon = dem_lons[x, y]
                node_lat = dem_lats[x, y]
                node_value = dem_data[x, y]
                if not self._passesApplyBiasTest(node_value, node_lon,
                                                 node_lat, stn_bias, stn_lons,
                                                 stn_lats):
                    biased_data[x, y] = dem_data[x, y]
                    dem_data_bias[x, y] = 0.
                    num_nodes_processed += 1
                    no_change += 1
                    continue

                # get indexes of all stations within search radius of grid node
                # bbox will be different for each grid node
                bbox = (node_lon - search_radius, node_lon + search_radius,
                        node_lat - search_radius, node_lat + search_radius)
                indexes = N.where((stn_lons >= bbox[0]) & (stn_lons <= bbox[1])
                                  & (stn_lats >= bbox[2])
                                  & (stn_lats <= bbox[3]))

                # no stations within search radius
                if len(indexes[0]) < 1:
                    # NO ADJUSTMENT CAN BE MADE
                    biased_data[x, y] = dem_data[x, y]
                    dem_data_bias[x, y] = 0.
                    num_nodes_processed += 1
                    no_change += 1
                    continue

                # coordinates of all station in search area
                area_lons = stn_lons[indexes]
                area_lats = stn_lats[indexes]

                # test stations for 'nearness' to the grid node
                bbox = (node_lon - vicinity, node_lon + vicinity,
                        node_lat - vicinity, node_lat + vicinity)
                nearby = N.where((area_lons >= bbox[0])
                                 & (area_lons <= bbox[1])
                                 & (area_lats >= bbox[2])
                                 & (area_lats <= bbox[3]))

                # in order to use MQ we must have either 2 'nearby' stations
                # or 2 in each quadrant surrounding the node
                if (len(nearby[0]) < 1 and not allQuadrants(
                        node_lon, node_lat, area_lons, area_lats)):
                    # NO ADJUSTMENT CAN BE MADE
                    biased_data[x, y] = dem_data[x, y]
                    dem_data_bias[x, y] = 0.
                    num_nodes_processed += 1
                    no_change += 1
                    continue

                # run multiquadric interpolation on BIAS
                data_bias = interp.mq(node_lat, node_lon, area_lats, area_lons,
                                      stn_bias[indexes], c_parm)
                if N.isfinite(data_bias):
                    # apply valid bias
                    value = dem_data[x, y] - data_bias
                else:
                    # invalid bias ... NO ADJUSTMENT CAN BE MADE
                    value = dem_data[x, y]
                    data_bias = 0.
                    no_change += 1

                if N.isfinite(value):
                    biased_data[x, y] = value
                    dem_data_bias[x, y] = data_bias
                else:
                    biased_data[x, y] = dem_data[x, y]
                    dem_data_bias[x, y] = 0.
                    no_change += 1

                num_nodes_processed += 1

        # log performance for nodes not yet reported
        unreported = num_nodes_processed % report_rate
        if performance and unreported > 0:
            msg = PERF_MSG % (unreported)
            sfx = PERF_MSG_SUFFIX % (num_nodes_processed, dem_grid_size)
            reporter.logPerformance(start_count, msg, sfx)

        return biased_data, dem_data_bias, (num_nodes_processed, no_change)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def calculateBias(self,
                      algorithm,
                      stn_uids,
                      stn_lons,
                      stn_lats,
                      stn_data,
                      stn_data_units,
                      raw_lons,
                      raw_lats,
                      raw_data,
                      raw_data_units,
                      report_rate=100,
                      debug=False,
                      performance=False):
        """ Calculate the weighted difference between the data value at
        each station and the nearby grid nodes. It will use multiquadric
        interpolation except when there are an insufficient number of grid
        nodes nearby, then it will use a simple inverse distance weighted
        average.
        """
        # local refernces to instance attributes
        reporter = self.reporter
        vicinity = self.vicinity

        min_count = report_rate - 1
        PERF_MSG = 'processed %d stations (%d total) in'

        # initialize station temperature bias arrays
        stn_interp_data = []
        stn_data_bias = []
        num_stations = len(stn_uids)

        # initialize tracking variables
        algorithm_counts = [0, 0, 0]
        station_count = 0
        stations_bad_data = 0
        stations_outside = 0
        insufficient_coverage = 0
        bias_not_calculated = 0
        start_report = datetime.now()

        # make sure station and dem data are in the same units
        if raw_data_units != stn_data_units:
            raw_data = convertUnits(raw_data, raw_data_units, stn_data_units)

        # loop though list of stations making adjustments to both station and
        # grid node temperature extremes
        for indx in range(num_stations):
            # the following is good for a limited test loop
            #for indx in (84,85,278,330,337,345,360,368,444,476):
            # report performance every 'report_rate' passes thru the loop
            if performance and (station_count > min_count
                                and station_count % report_rate == 0):
                reporter.logPerformance(
                    start_report, PERF_MSG % (report_rate, station_count))
                start_report = datetime.now()

            # extract observation data for this station
            stn_id = stn_uids[indx]
            stn_lon = stn_lons[indx]
            stn_lat = stn_lats[indx]
            stn_info = 'station %d (%s) at [%-9.5f, %-9.5f]' % (
                indx, stn_id, stn_lon, stn_lat)

            # station is not within the bounding boxx for this run
            if not self._pointInBounds(stn_lon, stn_lat):
                stn_interp_data.append(N.inf)
                stn_data_bias.append(N.inf)
                stations_outside += 1
                station_count += 1
                continue

            stn_value = stn_data[indx]
            # check for invalid data value for this station
            # this shouldn't happen if station data prep is done right !!!
            if not N.isfinite(stn_value):
                # set missing values and skip to next iteration
                stn_interp_data.append(N.inf)
                stn_data_bias.append(N.inf)
                stations_bad_data += 1
                station_count += 1
                if debug:
                    print 'skipped ', stn_info
                    print '... bad data value', stn_values
                continue

            # additional check that may be required by sub-classed data types
            if not self._passesCalcBiasTest(stn_value, stn_lon, stn_lat,
                                            raw_data, raw_lons, raw_lats):
                stn_interp_data.append(stn_value)
                stn_data_bias.append(0.)
                station_count += 1
                bias_not_calculated += 1
                continue

            # apply appripriate bias calculation algorithm
            if algorithm == 'mq':
                result = self.doMQInterp(stn_lon, stn_lat, stn_info, raw_lons,
                                         raw_lats, raw_data, debug)
            else:
                result = self.doIDWInterp(stn_lon, stn_lat, stn_info, raw_lons,
                                          raw_lats, raw_data, debug)

            if result is None:
                # set missing values and skip to next iteration
                stn_interp_data.append(N.inf)
                stn_data_bias.append(N.inf)
                insufficient_coverage += 1
                station_count += 1
                continue

            interpolated_value = result[1]
            data_bias = interpolated_value - stn_value
            estimated_value = interpolated_value - data_bias

            stn_data_bias.append(data_bias)
            stn_interp_data.append(estimated_value)

            station_count += 1
            algorithm_counts[result[0]] += 1

        if performance:
            unreported = station_count % report_rate
            if unreported > 0:
                reporter.logPerformance(start_report,
                                        PERF_MSG % (unreported, station_count))

        # convert the interpolated precip and bias to numpy arrays
        stn_interp_data = N.array(stn_interp_data, dtype=float)
        stn_data_bias = N.array(stn_data_bias, dtype=float)
        indexes = N.where(N.isnan(stn_data_bias) | N.isinf(stn_data_bias))
        bad_bias_count = len(indexes[0])

        statistics = (station_count, algorithm_counts[2], algorithm_counts[1],
                      bad_bias_count, stations_bad_data, stations_outside,
                      insufficient_coverage, bias_not_calculated)
        return stn_interp_data, stn_data_bias, statistics

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def doMQInterp(self, stn_lon, stn_lat, stn_info, node_lons, node_lats,
                   node_data, debug):
        """ determine value at station from values at nearby grid nodes
        using Multi-Quadric Distance algorithm
        """
        c_param = self.c_parm
        search_radius = self.search_radius

        # get indexes of all grid nodes within search radius of station
        # that have valid data values
        bbox = (stn_lon - search_radius, stn_lon + search_radius,
                stn_lat - search_radius, stn_lat + search_radius)
        indexes = N.where((node_lons >= bbox[0]) & (node_lats >= bbox[2])
                          & (node_lons <= bbox[1]) & (node_lats <= bbox[3])
                          & N.isfinite(node_data))

        # no grid nodes near this station
        if len(indexes) == 0 or len(indexes[0]) == 0:
            # set missing values and skip to next iteration
            if debug:
                print 'skipped ', stn_info
                print ' ... no grid nodes within search radius.'
            return None

        min_x = min(indexes[0])
        max_x = max(indexes[0]) + 1
        min_y = min(indexes[1])
        max_y = max(indexes[1]) + 1

        # must have at least one node in each quadrant
        area_lons = node_lons[min_x:max_x, min_y:max_y]
        area_lons = area_lons.flatten()

        area_lats = node_lats[min_x:max_x, min_y:max_y]
        area_lats = area_lats.flatten()

        area_values = node_data[min_x:max_x, min_y:max_y]
        area_values = area_values.flatten()
        num_nodes = len(area_values)

        # grid nodes are present in all 4 quadrants around staton
        # so, we can use Multiquadric interpolation
        if allQuadrants(stn_lon, stn_lat, area_lons, area_lats):
            interp_value = interp.mq(stn_lat, stn_lon, area_lats, area_lons,
                                     area_values, c_param)
            algorithm = 2

        # grid nodes NOT present in all 4 quadrants around staton
        # so we must use Inverse Distance Weighted Average
        elif num_temps > 3:
            interp_value = interp.idw(stn_lat, stn_lon, area_lats, area_lons,
                                      area_values)
            algorithm = 1
        # too few nodes, take simple average
        else:
            interp_value = area_values.sum() / float(num_nodes)
            algorithm = 0

        return algorithm, interp_value

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def doIDWInterp(self, stn_lon, stn_lat, stn_info, node_lons, node_lats,
                    node_data, debug):
        """ determine value at station from values at nearby grid nodes
        using Inverse Distance Weigthed algorithm
        """
        relative_nodes = self.relative_nodes
        node_reach = self.node_reach

        # indexes of all grid nodes within search domain
        indexes = indexesOfNeighborNodes(stn_lon, stn_lat, relative_nodes,
                                         node_lons, node_lats, node_reach)
        rel_lons = node_lons[indexes]
        rel_lats = node_lats[indexes]
        rel_data = node_data[indexes]

        # narrow it doen to those with valid values
        indexes = N.where(N.isfinite(rel_data))
        rel_lons = rel_lons[indexes]
        rel_lats = rel_lats[indexes]
        rel_data = rel_data[indexes]

        num_relatives = len(rel_data)
        # Inverse Distance Weighted Average
        if num_relatives > 3:
            interp_value = interp.idw(stn_lat, stn_lon, rel_lats, rel_lons,
                                      rel_data)
            algorithm = 1
        # too few nodes, take simple average
        elif num_relatives > 0:
            interp_value = min_temps.sum() / float(total_temps)
            algorithm = 0
        # no valid nearby nodes, no bias ?
        else:
            if debug:
                print 'skipped ', stn_info
                print "... no valid grid nodes within relative neighborhood"
            return None

        return algorithm, interp_value

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    def _pointInBounds(self, lon, lat):
        # check whether input point is within bounds for this run
        bbox = self.region_bbox
        if lon < bbox[0]: return False
        if lon > bbox[2]: return False
        if lat < bbox[1]: return False
        if lat > bbox[3]: return False
        return True

    def _passesApplyBiasTest(self, node_value, node_lon, node_lat, stn_bias,
                             stn_lons, stn_lats):
        return True

    def _passesCalcBiasTest(self, stn_value, stn_lon, stn_lat, raw_data,
                            raw_lons, raw_lats):
        return True