Example #1
0
class HydroForecastIO(WebserviceLoader, XmlParser, TimeSeriesMapper, BaseIO):
    """
    Loads hydrograph forecast data (next 3 days) from weather.gov
    """

    ###########################################
    # valid is the time of the forecast in UTC
    # primary is the stage in ft
    # secondary is the flow in kcfs
    # It appears to return 3 days forecast in the future
    ###########################################

    start_date = DateOpt(ignored=True)
    end_date = DateOpt(ignored=True)
    state = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)
    station = FilterOpt(url_param='gage')
    parameter = FilterOpt(ignored=True)
    basin = FilterOpt(ignored=True)

    root_tag = "forecast"
    date_formats = ['%Y-%m-%dT%H:%M:%S']
    url = 'http://water.weather.gov/ahps2/hydrograph_to_xml.php'

    def parse_item(self, elem):
        valid = elem.find('valid')
        primary = elem.find('primary')
        secondary = elem.find('secondary')
        return {
            'date': valid.text.replace('-00:00', ''),
            primary.attrib['name']: primary.text,
            secondary.attrib['name']: secondary.text,
        }
Example #2
0
class NwisLoader(WebserviceLoader):
    """
    Base class for loading data from the USGS NWIS REST services.
    """

    # Map WebserviceLoader options to NWIS equivalents
    start_date = DateOpt(url_param='startDT')
    end_date = DateOpt(url_param='endDT')

    state = FilterOpt(url_param='stateCd')
    county = FilterOpt(url_param='countyCd', multi=True)
    basin = FilterOpt(url_param='huc', multi=True)

    station = FilterOpt(url_param='site', multi=True)
    parameter = FilterOpt(url_param='parameterCd', multi=True)

    # Additional options unique to NWIS
    sitetype = ChoiceOpt(
        url_param='siteType',
        multi=True,
        choices=list(SITE_TYPES.keys()),
    )

    # Each NWIS webservice uses the same base URL, with a service path
    service = None

    @property
    def url(self):
        return "http://waterservices.usgs.gov/nwis/%s/" % self.service
Example #3
0
class HourlyDataIO(TimeSeriesMapper, SnotelIO):
    """
    Wrapper for getHourlyData(), used internally by StationHourlyDataIO
    """
    data_function = 'getHourlyData'

    # TimeSeriesMapper configuration
    date_formats = ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M']

    # Applicable WebserviceLoader default options
    station = FilterOpt(required=True, url_param='stationTriplets')
    parameter = FilterOpt(required=True, url_param='elementCd')
    start_date = DateOpt(required=True, url_param='beginDate')
    end_date = DateOpt(required=True, url_param='endDate')

    # Additional options
    begin_hour = FilterOpt(url_param='beginHour')
    end_hour = FilterOpt(url_param='endHour')

    # HeightDepth parameters don't seem to be necessary.

    default_params = {
        'ordinal': 1,
    }

    def load(self):
        super(HourlyDataIO, self).load()
        if self.data and 'values' in self.data[0]:
            self.data = as_list(self.data[0]['values'])
        else:
            raise NoData
Example #4
0
class StationMetaIO(AcisIO):
    """
    Retrieves metadata about the climate stations in a region.
    See http://data.rcc-acis.org/doc/#title8
    """

    namespace = "meta"  # For wq.io.parsers.text.JsonParser
    path = "StnMeta"

    # These options are not required for StationMetaIO
    start_date = DateOpt(url_param='sdate')
    end_date = DateOpt(url_param='edate')
    parameter = ParameterOpt()

    def parse(self):
        """
        Convert ACIS 'll' value into separate latitude and longitude.
        """
        super(AcisIO, self).parse()

        # This is more of a "mapping" step than a "parsing" step, but mappers
        # only allow one-to-one mapping from input fields to output fields.
        for row in self.data:
            if 'meta' in row:
                row = row['meta']
            if 'll' in row:
                row['longitude'], row['latitude'] = row['ll']
                del row['ll']

    def map_value(self, field, value):
        """
        Clean up some values returned from the web service.
        (overrides wq.io.mappers.BaseMapper)
        """

        if field == 'sids':
            # Site identifiers are returned as "[id] [auth_id]";
            # Map to auth name for easier usability
            ids = {}
            for idinfo in value:
                id, auth = idinfo.split(' ')
                auth = AUTHORITY_BY_ID[auth]
                ids[auth['name']] = id
            return ids

        if field == 'valid_daterange':
            # Date ranges for each element are returned in an array
            # (sorted by the order the elements were were requested);
            # Convert to dictionary with element id as key
            elems, complex = self.getlist('parameter')
            ranges = {}
            for elem, val in zip(elems, value):
                if val:
                    start, end = val
                    ranges[elem] = (parse_date(start), parse_date(end))
                else:
                    ranges[elem] = None, None
            return ranges
        return value
Example #5
0
class RegionDailyDataIO(StationIO):
    """
    All-in-one IO for loading site metadata and daily data for a region (i.e. a
    state, county, or basin).  Internally calls:
      - getStations()
      - getStationMetadata()
      - getStationElements()
      - getData()

    The outer IO is a list of sites in the region - derived from StationIO, but
    with an extra "data" property on each station pointing to an inner time
    series IO for each site.  The inner IO is based on StationDailyDataIO but
    flattened to avoid multiple levels of nesting.  parameter is optional but
    recommended (otherwise all available data for all sites will be returned).

    Usage:

    sites = RegionDailyDataIO(
        basin='17060105',
        start_date='2014-07-01',
        end_date='2014-07-31',
        parameter='TAVG',
    )
    for site in sites:
        print site.name
        for row in site.data:
            print "   ", row.date, row.value, row.storedunitcd

    """

    nested = True

    # Applicable WebserviceLoader default options
    start_date = DateOpt(required=True)
    end_date = DateOpt(required=True)
    parameter = FilterOpt(url_param='elementCds')

    @property
    def params(self):
        params = super(RegionDailyDataIO, self).params
        # Start and end date are actually only used by inner io.
        del params['start_date']
        del params['end_date']
        return params

    def load(self):
        super(RegionDailyDataIO, self).load()
        for station in self.data:
            station['data'] = flattened(
                StationDailyDataIO,
                station=station['stationTriplet'],
                start_date=self.getvalue('start_date'),
                end_date=self.getvalue('end_date'),
                parameter=self.getvalue('parameter'),
                debug=self.debug,
            )
Example #6
0
class StationDataIO(StationElementIO):
    """
    Base class for StationDailyDataIO and StationHourlyDataIO.  Retrieves all
    data for a station that matches the specified duration by calling the
    specified inner_io_class.
    """
    nested = True

    # Applicable WebserviceLoader default options
    start_date = DateOpt(url_param='beginDate', required=True)
    end_date = DateOpt(url_param='endDate', required=True)
    parameter = FilterOpt()

    inner_io_class = None
    duration = None

    @property
    def params(self):
        params = super(StationDataIO, self).params
        # Parameter filter (if any) is applied *after* the initial request
        params.pop('parameter', None)
        return params

    def load(self):
        super(StationDataIO, self).load()
        data = []
        for row in self.data:

            # Only include records matching the specified duration
            # and parameter
            if row['duration'] != self.duration:
                continue
            elem = self.getvalue('parameter')
            if elem and row['elementCd'] != elem:
                continue

            # getStationElements() sometimes returns parameters that don't
            # actually have data for the requested timeframe - silently catch
            # the exception and remove parameter from results.
            try:
                row['data'] = self.inner_io_class(
                    station=row['stationTriplet'],
                    parameter=row['elementCd'],
                    start_date=self.getvalue('start_date'),
                    end_date=self.getvalue('end_date'),
                    debug=self.debug,
                )
            except NoData:
                continue

            data.append(row)

        self.data = data
Example #7
0
class SnotelIO(WebserviceLoader, BaseParser, TupleMapper, BaseIO):
    """
    Base class for accessing SNOTEL AWDB SOAP web services.
    """

    webservice_name = "awdbWebService"
    data_function = None

    # Override Default WebserviceLoader options
    start_date = DateOpt(ignored=True)
    end_date = DateOpt(ignored=True)
    state = FilterOpt(ignored=True)
    basin = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)
    station = FilterOpt(ignored=True)
    parameter = FilterOpt(ignored=True)

    def load(self):
        if self.debug:
            self.print_debug()
        params = self.params
        fn = getattr(get_server(), self.data_function)
        self.data = fn(**params)
        if len(self.data) == 0:
            self.data = []
        else:
            self.data = as_list(self.data)
            if isinstance(self.data[0], SudsObject):
                parse = asdict
            else:
                parse = str
            self.data = [parse(row) for row in self.data]

    # Some records may have additional fields; loop through entire
    # array to ensure all field names are accounted for.  (Otherwise BaseIO
    # will guess field names using only the first record.)
    scan_fields = True

    def print_debug(self):
        print('%s.%s(%s)' % (
            self.webservice_name,
            self.data_function,
            ','.join(
                '%s=%s' % (key, val)
                for key, val in self.params.items()
            )
        ))
Example #8
0
class ForecastDataIO(SnotelIO):
    data_function = 'getForecast'

    station = FilterOpt(required=True, url_param='stationTriplets')
    parameter = FilterOpt(required=True, url_param='elementCd')

    forecast_period = FilterOpt(required=True, url_param='forecastPeriod')
    publication_date = DateOpt(required=True, url_param='publicationDate')
Example #9
0
class StationElementIO(SnotelIO):
    """
    Wrapper for getStationElements(), incorporating element names from
    getElements()
    """

    data_function = 'getStationElements'

    # Applicable WebserviceLoader default options
    start_date = DateOpt(url_param='beginDate')
    end_date = DateOpt(url_param='endDate')
    station = FilterOpt(required=True, url_param='stationTriplet')

    def load(self):
        super(StationElementIO, self).load()
        names = ElementIO.get_names()
        for elem in self.data:
            elem['element_name'] = names[elem['elementCd']]
Example #10
0
class HourlyDataIO(TimeSeriesMapper, SnotelIO):
    """
    Wrapper for getHourlyData(), used internally by StationHourlyDataIO
    """
    data_function = 'getHourlyData'

    # TimeSeriesMapper configuration
    date_formats = ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M']

    # Applicable WebserviceLoader default options
    station = FilterOpt(required=True, url_param='stationTriplets', multi=True)
    parameter = FilterOpt(required=True, url_param='elementCd')
    start_date = DateOpt(required=True, url_param='beginDate')
    end_date = DateOpt(required=True, url_param='endDate')

    # Additional options
    begin_hour = FilterOpt(url_param='beginHour')
    end_hour = FilterOpt(url_param='endHour')

    # HeightDepth parameters don't seem to be necessary.

    default_params = {
        'ordinal': 1,
    }

    def load(self):
        super(HourlyDataIO, self).load()
        d = {
        }  # since mutiple triplets can be used, then go through each station
        for data in self.data:
            stationTriplet = data['stationTriplet']
            if data and 'values' in data:
                data = [asdict(row) for row in as_list(data['values'])]
            else:
                data = []
            d[stationTriplet] = data
        self.data = d
Example #11
0
class DailyDataIO(SnotelIO):
    """
    Wrapper for getData(), used internally by StationDailyDataIO
    """
    data_function = 'getData'

    # Applicable WebserviceLoader default options
    station = FilterOpt(required=True, url_param='stationTriplets')
    parameter = FilterOpt(required=True, url_param='elementCd')
    start_date = DateOpt(required=True, url_param='beginDate')
    end_date = DateOpt(required=True, url_param='endDate')

    # HeightDepth parameters don't seem to be necessary.

    default_params = {
        'ordinal': 1,
        'duration': 'DAILY',
        'getFlags': 'true',
        'alwaysReturnDailyFeb29': 'false',
    }

    def parse(self):
        data = self.data[0]
        if not data or 'values' not in data:
            raise NoData
        bd = data['beginDate']
        ed = data['endDate']
        dates = fill_date_range(bd, ed, date_format='%Y-%m-%d %H:%M:%S')
        vals = as_list(data['values'])
        flags = as_list(data['flags'])

        self.data = [{
            'date': date,
            'value': val,
            'flag': flag
        } for date, val, flag in zip(dates, vals, flags)]
Example #12
0
class CocorahsIO(WebserviceLoader, XmlParser, TimeSeriesMapper, BaseIO):
    """
    Retrieves CoCoRaHS observations from data.cocorahs.org

    Usage:

    data = CocorahsIO(state='MN', county='HN')
    for row in data:
        print row.stationname, row.observationdate.date(), row.totalprecipamt
    """

    # Customize date parameters
    start_date = DateOpt(
        required=True,
        date_only=False,
        url_param="StartDate",
    )
    end_date = DateOpt(
        date_only=False,
        url_param="EndDate",
    )

    # These region filters are supported
    state = FilterOpt(required=True)
    county = FilterOpt()

    # Other filters are ignored
    basin = FilterOpt(ignored=True)
    station = FilterOpt(ignored=True)
    parameter = FilterOpt(ignored=True)

    # CoCoRaHS-specific options
    datetype = ChoiceOpt(
        url_param="ReportDateType",
        default="reportdate",
        choices=["reportdate", "timestamp"],
    )
    reporttype = ChoiceOpt(
        url_param="ReportType",
        default="Daily",
        choices=["Daily", "MultiDay"],
    )

    # Configuration for wq.io base classes
    url = "http://data.cocorahs.org/cocorahs/export/exportreports.aspx"

    root_tag = 'Cocorahs'

    date_formats = [
        '%Y-%m-%d %I:%M %p', '%Y-%m-%d', '%I:%M %p', '%m/%d/%Y %I:%M %p'
    ]

    key_fields = [
        "stationnumber",
        "stationname",
        "latitude",
        "longitude",
        "datetimestamp",
        "observationdate",
        "observationtime",
        "entrydatetime",
    ]

    # These params apply to every request
    default_params = {
        'dtf': "1",
        'Format': "XML",
        'TimesInGMT': "False",
        'responsefields': "all"
    }

    @property
    def item_tag(self):
        if self.getvalue('reporttype') == "Daily":
            return 'DailyPrecipReports/DailyPrecipReport'
        else:
            # i.e. self.getvalue('reporttype') == "MultiDay"
            return 'MultiDayPrecipReports/MultiDayPrecipReport'

    def serialize_params(self, params, complex):
        params = super(CocorahsIO, self).serialize_params(params, complex)
        fmt = '%m/%d/%Y'

        # Different date parameters and formats depending on use case
        if 'EndDate' in params:
            # Date range (usually used with datetype=reportdate)
            params['StartDate'] = self.getvalue('start_date').strftime(fmt)
            params['EndDate'] = self.getvalue('end_date').strftime(fmt)
        else:
            # Only start date (usually used with datetype=timestamp)
            params['Date'] = self.getvalue('start_date').strftime(fmt +
                                                                  " %I:%M %p")
            del params['StartDate']
        return params

    def map_value(self, field, value):
        value = super(CocorahsIO, self).map_value(field, value)
        # CoCoRaHS empty dates are represented as 1/1/0001
        if isinstance(value, datetime) and value.year == 1:
            return None
        return value
Example #13
0
class EnsembleForecastIO(ZipWebserviceLoader, EnsembleCsvParser, TupleMapper,
                         BaseIO):
    """
    Load ensemble forecast zip files from the CNRFC website.
     - start_date and basin are required to specify the zip file;
     - station and end_date can be used to filter the downloaded data.
    """

    nested = True

    start_date = DateOpt(required=True)
    end_date = DateOpt()

    # Region filters
    state = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)

    # FIXME: this isn't actually a HUC8 basin
    basin = FilterOpt(required=True)

    station = FilterOpt(multi=True)
    parameter = FilterOpt(ignored=True)

    region = ChoiceOpt(default="cnrfc", choices=["cnrfc"])

    urls = {
        "cnrfc": ("http://www.cnrfc.noaa.gov/csv/" +
                  "{date}12_{basin}_hefs_csv_daily.zip")
    }

    @property
    def params(self):
        # Don't actually need params, but ensure validation logic is called
        params = super(EnsembleForecastIO, self).params
        return None

    @property
    def url(self):
        url = self.urls[self.getvalue("region")]
        return url.format(
            date=self.getvalue("start_date").strftime("%Y%m%d"),
            basin=self.getvalue("basin"),
        )

    def parse(self):
        super(EnsembleForecastIO, self).parse()

        # Optionally filter by station id
        site_filter = self.getvalue('station')
        date_filter = self.getvalue('end_date')
        if not site_filter:
            return
        self.data = [item for item in self.data if item['site'] in site_filter]
        if not date_filter:
            return
        date_filter = date_filter.strftime('%Y-%m-%d') + " 23:59:59"
        for item in self.data:
            item['data'] = [
                row for row in item['data'] if row['date'] <= date_filter
            ]

    def usable_item(self, item):
        item = item.copy()
        item['data'] = TimeSeriesIO(data=item['data'])
        return super(EnsembleForecastIO, self).usable_item(item)
Example #14
0
class SnotelIO(WebserviceLoader, BaseParser, TupleMapper, BaseIO):
    """
    Base class for accessing SNOTEL AWDB SOAP web services.
    """

    webservice_name = "awdbWebService"
    data_function = None

    # Override Default WebserviceLoader options
    start_date = DateOpt(ignored=True)
    end_date = DateOpt(ignored=True)
    state = FilterOpt(ignored=True)
    basin = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)
    station = FilterOpt(ignored=True)
    parameter = FilterOpt(ignored=True)

    def load(self):
        if self.debug:
            self.print_debug()
        params = self.params
        fn = getattr(get_server(), self.data_function)
        self.data = fn(**params)
        if len(self.data) == 0:
            self.data = []
        else:
            self.data = as_list(self.data)
            if isinstance(self.data[0], SudsObject):
                parse = asdict
            else:
                parse = str
            self.data = [parse(row) for row in self.data]

    def serialize_params(self, params, complex_type):
        """
        The AWDB NRCS webservice allows for multiple parameters, need
        to have the ability to query multiple stations at a time for
        effeciency. If one of the parameters is a list, the client will
        be able to make multiple types for the list.
        
        Therefore, overwrite the `serialize_params` to not join vals if
        it's a list
        """

        if complex_type:
            raise NotImplementedError("Cannot serialize %s!" % params)
        else:
            return {
                self.get_url_param(key): val
                for key, val in params.items()
            }

    # Some records may have additional fields; loop through entire
    # array to ensure all field names are accounted for.  (Otherwise BaseIO
    # will guess field names using only the first record.)
    scan_fields = True

    def print_debug(self):
        print('%s.%s(%s)' % (self.webservice_name, self.data_function,
                             ','.join('%s=%s' % (key, val)
                                      for key, val in self.params.items())))
Example #15
0
class StationDataIO(StationMetaIO):
    """
    Retrieve daily time series data from the climate stations in a region.
    See http://data.rcc-acis.org/doc/#title19
    """

    nested = True

    namespace = "data"  # For wq.io.parsers.text.JsonParser
    path = "MultiStnData"

    # Specify ACIS-defined URL parameters for start/end date
    start_date = DateOpt(required=True, url_param='sdate')
    end_date = DateOpt(required=True, url_param='edate')

    parameter = ParameterOpt(required=True)

    # Additional information for daily results
    add = ChoiceOpt(multi=True, choices=ADD_IDS)

    def get_field_names(self):
        """
        ACIS web service returns "meta" and "data" for each station;
        Use meta attributes as field names
        """
        field_names = super(StationDataIO, self).get_field_names()
        if field_names == ['meta', 'data']:
            meta_fields = self.data[0]['meta'].keys()
            if set(meta_fields) < set(self.getvalue('meta')):
                meta_fields = self.getvalue('meta')
            field_names = list(meta_fields) + ['data']
        return field_names

    def serialize_params(self, params, complex):
        # If set, apply "add" option to each requested element / parameter
        # (Rather than as a top-level URL param)
        if 'add' in params:
            complex = True
            elems = []
            for elem in params.get('parameter', []):
                if not isinstance(elem, dict):
                    elem = {'name': elem}
                elem['add'] = ",".join(params['add'])
                elems.append(elem)
            params['parameter'] = elems
            del params['add']
        return super(StationDataIO, self).serialize_params(params, complex)

    def usable_item(self, data):
        """
        ACIS web service returns "meta" and "data" for each station; use meta
        attributes as item values, and add an IO for iterating over "data"
        """

        # Use metadata as item
        item = data['meta']

        # Add nested IO for data
        elems, elems_is_complex = self.getlist('parameter')
        if elems_is_complex:
            elems = [elem['name'] for elem in elems]

        add, add_is_complex = self.getlist('add')
        item['data'] = DataIO(
            data=data['data'],
            parameter=elems,
            add=add,
            start_date=self.getvalue('start_date'),
            end_date=self.getvalue('end_date'),
        )

        # TupleMapper will convert item to namedtuple
        return super(StationDataIO, self).usable_item(item)