class HydroForecastIO(WebserviceLoader, XmlParser, TimeSeriesMapper, BaseIO): """ Loads hydrograph forecast data (next 3 days) from weather.gov """ ########################################### # valid is the time of the forecast in UTC # primary is the stage in ft # secondary is the flow in kcfs # It appears to return 3 days forecast in the future ########################################### start_date = DateOpt(ignored=True) end_date = DateOpt(ignored=True) state = FilterOpt(ignored=True) county = FilterOpt(ignored=True) station = FilterOpt(url_param='gage') parameter = FilterOpt(ignored=True) basin = FilterOpt(ignored=True) root_tag = "forecast" date_formats = ['%Y-%m-%dT%H:%M:%S'] url = 'http://water.weather.gov/ahps2/hydrograph_to_xml.php' def parse_item(self, elem): valid = elem.find('valid') primary = elem.find('primary') secondary = elem.find('secondary') return { 'date': valid.text.replace('-00:00', ''), primary.attrib['name']: primary.text, secondary.attrib['name']: secondary.text, }
class NwisLoader(WebserviceLoader): """ Base class for loading data from the USGS NWIS REST services. """ # Map WebserviceLoader options to NWIS equivalents start_date = DateOpt(url_param='startDT') end_date = DateOpt(url_param='endDT') state = FilterOpt(url_param='stateCd') county = FilterOpt(url_param='countyCd', multi=True) basin = FilterOpt(url_param='huc', multi=True) station = FilterOpt(url_param='site', multi=True) parameter = FilterOpt(url_param='parameterCd', multi=True) # Additional options unique to NWIS sitetype = ChoiceOpt( url_param='siteType', multi=True, choices=list(SITE_TYPES.keys()), ) # Each NWIS webservice uses the same base URL, with a service path service = None @property def url(self): return "http://waterservices.usgs.gov/nwis/%s/" % self.service
class HourlyDataIO(TimeSeriesMapper, SnotelIO): """ Wrapper for getHourlyData(), used internally by StationHourlyDataIO """ data_function = 'getHourlyData' # TimeSeriesMapper configuration date_formats = ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M'] # Applicable WebserviceLoader default options station = FilterOpt(required=True, url_param='stationTriplets') parameter = FilterOpt(required=True, url_param='elementCd') start_date = DateOpt(required=True, url_param='beginDate') end_date = DateOpt(required=True, url_param='endDate') # Additional options begin_hour = FilterOpt(url_param='beginHour') end_hour = FilterOpt(url_param='endHour') # HeightDepth parameters don't seem to be necessary. default_params = { 'ordinal': 1, } def load(self): super(HourlyDataIO, self).load() if self.data and 'values' in self.data[0]: self.data = as_list(self.data[0]['values']) else: raise NoData
class StationMetaIO(AcisIO): """ Retrieves metadata about the climate stations in a region. See http://data.rcc-acis.org/doc/#title8 """ namespace = "meta" # For wq.io.parsers.text.JsonParser path = "StnMeta" # These options are not required for StationMetaIO start_date = DateOpt(url_param='sdate') end_date = DateOpt(url_param='edate') parameter = ParameterOpt() def parse(self): """ Convert ACIS 'll' value into separate latitude and longitude. """ super(AcisIO, self).parse() # This is more of a "mapping" step than a "parsing" step, but mappers # only allow one-to-one mapping from input fields to output fields. for row in self.data: if 'meta' in row: row = row['meta'] if 'll' in row: row['longitude'], row['latitude'] = row['ll'] del row['ll'] def map_value(self, field, value): """ Clean up some values returned from the web service. (overrides wq.io.mappers.BaseMapper) """ if field == 'sids': # Site identifiers are returned as "[id] [auth_id]"; # Map to auth name for easier usability ids = {} for idinfo in value: id, auth = idinfo.split(' ') auth = AUTHORITY_BY_ID[auth] ids[auth['name']] = id return ids if field == 'valid_daterange': # Date ranges for each element are returned in an array # (sorted by the order the elements were were requested); # Convert to dictionary with element id as key elems, complex = self.getlist('parameter') ranges = {} for elem, val in zip(elems, value): if val: start, end = val ranges[elem] = (parse_date(start), parse_date(end)) else: ranges[elem] = None, None return ranges return value
class RegionDailyDataIO(StationIO): """ All-in-one IO for loading site metadata and daily data for a region (i.e. a state, county, or basin). Internally calls: - getStations() - getStationMetadata() - getStationElements() - getData() The outer IO is a list of sites in the region - derived from StationIO, but with an extra "data" property on each station pointing to an inner time series IO for each site. The inner IO is based on StationDailyDataIO but flattened to avoid multiple levels of nesting. parameter is optional but recommended (otherwise all available data for all sites will be returned). Usage: sites = RegionDailyDataIO( basin='17060105', start_date='2014-07-01', end_date='2014-07-31', parameter='TAVG', ) for site in sites: print site.name for row in site.data: print " ", row.date, row.value, row.storedunitcd """ nested = True # Applicable WebserviceLoader default options start_date = DateOpt(required=True) end_date = DateOpt(required=True) parameter = FilterOpt(url_param='elementCds') @property def params(self): params = super(RegionDailyDataIO, self).params # Start and end date are actually only used by inner io. del params['start_date'] del params['end_date'] return params def load(self): super(RegionDailyDataIO, self).load() for station in self.data: station['data'] = flattened( StationDailyDataIO, station=station['stationTriplet'], start_date=self.getvalue('start_date'), end_date=self.getvalue('end_date'), parameter=self.getvalue('parameter'), debug=self.debug, )
class StationDataIO(StationElementIO): """ Base class for StationDailyDataIO and StationHourlyDataIO. Retrieves all data for a station that matches the specified duration by calling the specified inner_io_class. """ nested = True # Applicable WebserviceLoader default options start_date = DateOpt(url_param='beginDate', required=True) end_date = DateOpt(url_param='endDate', required=True) parameter = FilterOpt() inner_io_class = None duration = None @property def params(self): params = super(StationDataIO, self).params # Parameter filter (if any) is applied *after* the initial request params.pop('parameter', None) return params def load(self): super(StationDataIO, self).load() data = [] for row in self.data: # Only include records matching the specified duration # and parameter if row['duration'] != self.duration: continue elem = self.getvalue('parameter') if elem and row['elementCd'] != elem: continue # getStationElements() sometimes returns parameters that don't # actually have data for the requested timeframe - silently catch # the exception and remove parameter from results. try: row['data'] = self.inner_io_class( station=row['stationTriplet'], parameter=row['elementCd'], start_date=self.getvalue('start_date'), end_date=self.getvalue('end_date'), debug=self.debug, ) except NoData: continue data.append(row) self.data = data
class SnotelIO(WebserviceLoader, BaseParser, TupleMapper, BaseIO): """ Base class for accessing SNOTEL AWDB SOAP web services. """ webservice_name = "awdbWebService" data_function = None # Override Default WebserviceLoader options start_date = DateOpt(ignored=True) end_date = DateOpt(ignored=True) state = FilterOpt(ignored=True) basin = FilterOpt(ignored=True) county = FilterOpt(ignored=True) station = FilterOpt(ignored=True) parameter = FilterOpt(ignored=True) def load(self): if self.debug: self.print_debug() params = self.params fn = getattr(get_server(), self.data_function) self.data = fn(**params) if len(self.data) == 0: self.data = [] else: self.data = as_list(self.data) if isinstance(self.data[0], SudsObject): parse = asdict else: parse = str self.data = [parse(row) for row in self.data] # Some records may have additional fields; loop through entire # array to ensure all field names are accounted for. (Otherwise BaseIO # will guess field names using only the first record.) scan_fields = True def print_debug(self): print('%s.%s(%s)' % ( self.webservice_name, self.data_function, ','.join( '%s=%s' % (key, val) for key, val in self.params.items() ) ))
class ForecastDataIO(SnotelIO): data_function = 'getForecast' station = FilterOpt(required=True, url_param='stationTriplets') parameter = FilterOpt(required=True, url_param='elementCd') forecast_period = FilterOpt(required=True, url_param='forecastPeriod') publication_date = DateOpt(required=True, url_param='publicationDate')
class StationElementIO(SnotelIO): """ Wrapper for getStationElements(), incorporating element names from getElements() """ data_function = 'getStationElements' # Applicable WebserviceLoader default options start_date = DateOpt(url_param='beginDate') end_date = DateOpt(url_param='endDate') station = FilterOpt(required=True, url_param='stationTriplet') def load(self): super(StationElementIO, self).load() names = ElementIO.get_names() for elem in self.data: elem['element_name'] = names[elem['elementCd']]
class HourlyDataIO(TimeSeriesMapper, SnotelIO): """ Wrapper for getHourlyData(), used internally by StationHourlyDataIO """ data_function = 'getHourlyData' # TimeSeriesMapper configuration date_formats = ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M'] # Applicable WebserviceLoader default options station = FilterOpt(required=True, url_param='stationTriplets', multi=True) parameter = FilterOpt(required=True, url_param='elementCd') start_date = DateOpt(required=True, url_param='beginDate') end_date = DateOpt(required=True, url_param='endDate') # Additional options begin_hour = FilterOpt(url_param='beginHour') end_hour = FilterOpt(url_param='endHour') # HeightDepth parameters don't seem to be necessary. default_params = { 'ordinal': 1, } def load(self): super(HourlyDataIO, self).load() d = { } # since mutiple triplets can be used, then go through each station for data in self.data: stationTriplet = data['stationTriplet'] if data and 'values' in data: data = [asdict(row) for row in as_list(data['values'])] else: data = [] d[stationTriplet] = data self.data = d
class DailyDataIO(SnotelIO): """ Wrapper for getData(), used internally by StationDailyDataIO """ data_function = 'getData' # Applicable WebserviceLoader default options station = FilterOpt(required=True, url_param='stationTriplets') parameter = FilterOpt(required=True, url_param='elementCd') start_date = DateOpt(required=True, url_param='beginDate') end_date = DateOpt(required=True, url_param='endDate') # HeightDepth parameters don't seem to be necessary. default_params = { 'ordinal': 1, 'duration': 'DAILY', 'getFlags': 'true', 'alwaysReturnDailyFeb29': 'false', } def parse(self): data = self.data[0] if not data or 'values' not in data: raise NoData bd = data['beginDate'] ed = data['endDate'] dates = fill_date_range(bd, ed, date_format='%Y-%m-%d %H:%M:%S') vals = as_list(data['values']) flags = as_list(data['flags']) self.data = [{ 'date': date, 'value': val, 'flag': flag } for date, val, flag in zip(dates, vals, flags)]
class CocorahsIO(WebserviceLoader, XmlParser, TimeSeriesMapper, BaseIO): """ Retrieves CoCoRaHS observations from data.cocorahs.org Usage: data = CocorahsIO(state='MN', county='HN') for row in data: print row.stationname, row.observationdate.date(), row.totalprecipamt """ # Customize date parameters start_date = DateOpt( required=True, date_only=False, url_param="StartDate", ) end_date = DateOpt( date_only=False, url_param="EndDate", ) # These region filters are supported state = FilterOpt(required=True) county = FilterOpt() # Other filters are ignored basin = FilterOpt(ignored=True) station = FilterOpt(ignored=True) parameter = FilterOpt(ignored=True) # CoCoRaHS-specific options datetype = ChoiceOpt( url_param="ReportDateType", default="reportdate", choices=["reportdate", "timestamp"], ) reporttype = ChoiceOpt( url_param="ReportType", default="Daily", choices=["Daily", "MultiDay"], ) # Configuration for wq.io base classes url = "http://data.cocorahs.org/cocorahs/export/exportreports.aspx" root_tag = 'Cocorahs' date_formats = [ '%Y-%m-%d %I:%M %p', '%Y-%m-%d', '%I:%M %p', '%m/%d/%Y %I:%M %p' ] key_fields = [ "stationnumber", "stationname", "latitude", "longitude", "datetimestamp", "observationdate", "observationtime", "entrydatetime", ] # These params apply to every request default_params = { 'dtf': "1", 'Format': "XML", 'TimesInGMT': "False", 'responsefields': "all" } @property def item_tag(self): if self.getvalue('reporttype') == "Daily": return 'DailyPrecipReports/DailyPrecipReport' else: # i.e. self.getvalue('reporttype') == "MultiDay" return 'MultiDayPrecipReports/MultiDayPrecipReport' def serialize_params(self, params, complex): params = super(CocorahsIO, self).serialize_params(params, complex) fmt = '%m/%d/%Y' # Different date parameters and formats depending on use case if 'EndDate' in params: # Date range (usually used with datetype=reportdate) params['StartDate'] = self.getvalue('start_date').strftime(fmt) params['EndDate'] = self.getvalue('end_date').strftime(fmt) else: # Only start date (usually used with datetype=timestamp) params['Date'] = self.getvalue('start_date').strftime(fmt + " %I:%M %p") del params['StartDate'] return params def map_value(self, field, value): value = super(CocorahsIO, self).map_value(field, value) # CoCoRaHS empty dates are represented as 1/1/0001 if isinstance(value, datetime) and value.year == 1: return None return value
class EnsembleForecastIO(ZipWebserviceLoader, EnsembleCsvParser, TupleMapper, BaseIO): """ Load ensemble forecast zip files from the CNRFC website. - start_date and basin are required to specify the zip file; - station and end_date can be used to filter the downloaded data. """ nested = True start_date = DateOpt(required=True) end_date = DateOpt() # Region filters state = FilterOpt(ignored=True) county = FilterOpt(ignored=True) # FIXME: this isn't actually a HUC8 basin basin = FilterOpt(required=True) station = FilterOpt(multi=True) parameter = FilterOpt(ignored=True) region = ChoiceOpt(default="cnrfc", choices=["cnrfc"]) urls = { "cnrfc": ("http://www.cnrfc.noaa.gov/csv/" + "{date}12_{basin}_hefs_csv_daily.zip") } @property def params(self): # Don't actually need params, but ensure validation logic is called params = super(EnsembleForecastIO, self).params return None @property def url(self): url = self.urls[self.getvalue("region")] return url.format( date=self.getvalue("start_date").strftime("%Y%m%d"), basin=self.getvalue("basin"), ) def parse(self): super(EnsembleForecastIO, self).parse() # Optionally filter by station id site_filter = self.getvalue('station') date_filter = self.getvalue('end_date') if not site_filter: return self.data = [item for item in self.data if item['site'] in site_filter] if not date_filter: return date_filter = date_filter.strftime('%Y-%m-%d') + " 23:59:59" for item in self.data: item['data'] = [ row for row in item['data'] if row['date'] <= date_filter ] def usable_item(self, item): item = item.copy() item['data'] = TimeSeriesIO(data=item['data']) return super(EnsembleForecastIO, self).usable_item(item)
class SnotelIO(WebserviceLoader, BaseParser, TupleMapper, BaseIO): """ Base class for accessing SNOTEL AWDB SOAP web services. """ webservice_name = "awdbWebService" data_function = None # Override Default WebserviceLoader options start_date = DateOpt(ignored=True) end_date = DateOpt(ignored=True) state = FilterOpt(ignored=True) basin = FilterOpt(ignored=True) county = FilterOpt(ignored=True) station = FilterOpt(ignored=True) parameter = FilterOpt(ignored=True) def load(self): if self.debug: self.print_debug() params = self.params fn = getattr(get_server(), self.data_function) self.data = fn(**params) if len(self.data) == 0: self.data = [] else: self.data = as_list(self.data) if isinstance(self.data[0], SudsObject): parse = asdict else: parse = str self.data = [parse(row) for row in self.data] def serialize_params(self, params, complex_type): """ The AWDB NRCS webservice allows for multiple parameters, need to have the ability to query multiple stations at a time for effeciency. If one of the parameters is a list, the client will be able to make multiple types for the list. Therefore, overwrite the `serialize_params` to not join vals if it's a list """ if complex_type: raise NotImplementedError("Cannot serialize %s!" % params) else: return { self.get_url_param(key): val for key, val in params.items() } # Some records may have additional fields; loop through entire # array to ensure all field names are accounted for. (Otherwise BaseIO # will guess field names using only the first record.) scan_fields = True def print_debug(self): print('%s.%s(%s)' % (self.webservice_name, self.data_function, ','.join('%s=%s' % (key, val) for key, val in self.params.items())))
class StationDataIO(StationMetaIO): """ Retrieve daily time series data from the climate stations in a region. See http://data.rcc-acis.org/doc/#title19 """ nested = True namespace = "data" # For wq.io.parsers.text.JsonParser path = "MultiStnData" # Specify ACIS-defined URL parameters for start/end date start_date = DateOpt(required=True, url_param='sdate') end_date = DateOpt(required=True, url_param='edate') parameter = ParameterOpt(required=True) # Additional information for daily results add = ChoiceOpt(multi=True, choices=ADD_IDS) def get_field_names(self): """ ACIS web service returns "meta" and "data" for each station; Use meta attributes as field names """ field_names = super(StationDataIO, self).get_field_names() if field_names == ['meta', 'data']: meta_fields = self.data[0]['meta'].keys() if set(meta_fields) < set(self.getvalue('meta')): meta_fields = self.getvalue('meta') field_names = list(meta_fields) + ['data'] return field_names def serialize_params(self, params, complex): # If set, apply "add" option to each requested element / parameter # (Rather than as a top-level URL param) if 'add' in params: complex = True elems = [] for elem in params.get('parameter', []): if not isinstance(elem, dict): elem = {'name': elem} elem['add'] = ",".join(params['add']) elems.append(elem) params['parameter'] = elems del params['add'] return super(StationDataIO, self).serialize_params(params, complex) def usable_item(self, data): """ ACIS web service returns "meta" and "data" for each station; use meta attributes as item values, and add an IO for iterating over "data" """ # Use metadata as item item = data['meta'] # Add nested IO for data elems, elems_is_complex = self.getlist('parameter') if elems_is_complex: elems = [elem['name'] for elem in elems] add, add_is_complex = self.getlist('add') item['data'] = DataIO( data=data['data'], parameter=elems, add=add, start_date=self.getvalue('start_date'), end_date=self.getvalue('end_date'), ) # TupleMapper will convert item to namedtuple return super(StationDataIO, self).usable_item(item)