Ejemplo n.º 1
0
class NwisLoader(WebserviceLoader):
    """
    Base class for loading data from the USGS NWIS REST services.
    """

    # Map WebserviceLoader options to NWIS equivalents
    start_date = DateOpt(url_param='startDT')
    end_date = DateOpt(url_param='endDT')

    state = FilterOpt(url_param='stateCd')
    county = FilterOpt(url_param='countyCd', multi=True)
    basin = FilterOpt(url_param='huc', multi=True)

    station = FilterOpt(url_param='site', multi=True)
    parameter = FilterOpt(url_param='parameterCd', multi=True)

    # Additional options unique to NWIS
    sitetype = ChoiceOpt(
        url_param='siteType',
        multi=True,
        choices=list(SITE_TYPES.keys()),
    )

    # Each NWIS webservice uses the same base URL, with a service path
    service = None

    @property
    def url(self):
        return "http://waterservices.usgs.gov/nwis/%s/" % self.service
Ejemplo n.º 2
0
class HydroForecastIO(WebserviceLoader, XmlParser, TimeSeriesMapper, BaseIO):
    """
    Loads hydrograph forecast data (next 3 days) from weather.gov
    """

    ###########################################
    # valid is the time of the forecast in UTC
    # primary is the stage in ft
    # secondary is the flow in kcfs
    # It appears to return 3 days forecast in the future
    ###########################################

    start_date = DateOpt(ignored=True)
    end_date = DateOpt(ignored=True)
    state = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)
    station = FilterOpt(url_param='gage')
    parameter = FilterOpt(ignored=True)
    basin = FilterOpt(ignored=True)

    root_tag = "forecast"
    date_formats = ['%Y-%m-%dT%H:%M:%S']
    url = 'http://water.weather.gov/ahps2/hydrograph_to_xml.php'

    def parse_item(self, elem):
        valid = elem.find('valid')
        primary = elem.find('primary')
        secondary = elem.find('secondary')
        return {
            'date': valid.text.replace('-00:00', ''),
            primary.attrib['name']: primary.text,
            secondary.attrib['name']: secondary.text,
        }
Ejemplo n.º 3
0
class ForecastIO(SnotelIO):
    data_function = 'getForecasts'

    station = FilterOpt(required=True, url_param='stationTriplet')
    parameter = FilterOpt(required=True, url_param='elementCd')

    forecast_period = FilterOpt(required=True, url_param='forecastPeriod')
Ejemplo n.º 4
0
class HourlyDataIO(TimeSeriesMapper, SnotelIO):
    """
    Wrapper for getHourlyData(), used internally by StationHourlyDataIO
    """
    data_function = 'getHourlyData'

    # TimeSeriesMapper configuration
    date_formats = ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M']

    # Applicable WebserviceLoader default options
    station = FilterOpt(required=True, url_param='stationTriplets')
    parameter = FilterOpt(required=True, url_param='elementCd')
    start_date = DateOpt(required=True, url_param='beginDate')
    end_date = DateOpt(required=True, url_param='endDate')

    # Additional options
    begin_hour = FilterOpt(url_param='beginHour')
    end_hour = FilterOpt(url_param='endHour')

    # HeightDepth parameters don't seem to be necessary.

    default_params = {
        'ordinal': 1,
    }

    def load(self):
        super(HourlyDataIO, self).load()
        if self.data and 'values' in self.data[0]:
            self.data = as_list(self.data[0]['values'])
        else:
            raise NoData
Ejemplo n.º 5
0
class ForecastDataIO(SnotelIO):
    data_function = 'getForecast'

    station = FilterOpt(required=True, url_param='stationTriplets')
    parameter = FilterOpt(required=True, url_param='elementCd')

    forecast_period = FilterOpt(required=True, url_param='forecastPeriod')
    publication_date = DateOpt(required=True, url_param='publicationDate')
Ejemplo n.º 6
0
class MultiStationDailyIO(HydrometLoader, TupleMapper, BaseIO):
    """
    Retrieves daily values for one or more USBR Hydromet/Agrimet sites.
    (Internally calls DailyDataIO for each site.)

    Usage:

    data = MultiStationDailyIO(station=['ACAO'], parameter=['GD','QD'])
    for s in data:
        print s.station
        for row in s.data:
            print row.date, row.gd, row.qd
    """

    nested = True

    station = FilterOpt(required=True, multi=True)

    # Customize load function with nested IOs
    def load(self):
        self.data = [{
            'station': station,
            'data': DailyDataIO(
                station=station,
                parameter=self.getvalue('parameter'),
                start_date=self.getvalue('start_date'),
                end_date=self.getvalue('end_date'),
                debug=self.debug,
            )
        } for station in self.getvalue('station')]

    def parse(self):
        pass
Ejemplo n.º 7
0
class SnotelIO(WebserviceLoader, BaseParser, TupleMapper, BaseIO):
    """
    Base class for accessing SNOTEL AWDB SOAP web services.
    """

    webservice_name = "awdbWebService"
    data_function = None

    # Override Default WebserviceLoader options
    start_date = DateOpt(ignored=True)
    end_date = DateOpt(ignored=True)
    state = FilterOpt(ignored=True)
    basin = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)
    station = FilterOpt(ignored=True)
    parameter = FilterOpt(ignored=True)

    def load(self):
        if self.debug:
            self.print_debug()
        params = self.params
        fn = getattr(get_server(), self.data_function)
        self.data = fn(**params)
        if len(self.data) == 0:
            self.data = []
        else:
            self.data = as_list(self.data)
            if isinstance(self.data[0], SudsObject):
                parse = asdict
            else:
                parse = str
            self.data = [parse(row) for row in self.data]

    # Some records may have additional fields; loop through entire
    # array to ensure all field names are accounted for.  (Otherwise BaseIO
    # will guess field names using only the first record.)
    scan_fields = True

    def print_debug(self):
        print('%s.%s(%s)' % (
            self.webservice_name,
            self.data_function,
            ','.join(
                '%s=%s' % (key, val)
                for key, val in self.params.items()
            )
        ))
Ejemplo n.º 8
0
class HydrometLoader(WebserviceLoader):
    """
    Shared options for Hydromet IO classes.
    """

    # start_date and end_date are the same as WebserviceLoader defaults

    # None of the default region filters will work
    state = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)
    basin = FilterOpt(ignored=True)

    # Instead, specify a four-letter Hydromet station code (one per request)
    station = FilterOpt(required=True)

    # Hydromet parameter codes are also required (multiple allowed)
    parameter = FilterOpt(required=True, multi=True)
Ejemplo n.º 9
0
class StationMetaIO(SnotelIO):
    """
    Wrapper for getStationMetadata() - used internally by StationIO.
    """

    data_function = 'getStationMetadata'

    station = FilterOpt(required=True, url_param='stationTriplet')
Ejemplo n.º 10
0
class RegionDailyDataIO(StationIO):
    """
    All-in-one IO for loading site metadata and daily data for a region (i.e. a
    state, county, or basin).  Internally calls:
      - getStations()
      - getStationMetadata()
      - getStationElements()
      - getData()

    The outer IO is a list of sites in the region - derived from StationIO, but
    with an extra "data" property on each station pointing to an inner time
    series IO for each site.  The inner IO is based on StationDailyDataIO but
    flattened to avoid multiple levels of nesting.  parameter is optional but
    recommended (otherwise all available data for all sites will be returned).

    Usage:

    sites = RegionDailyDataIO(
        basin='17060105',
        start_date='2014-07-01',
        end_date='2014-07-31',
        parameter='TAVG',
    )
    for site in sites:
        print site.name
        for row in site.data:
            print "   ", row.date, row.value, row.storedunitcd

    """

    nested = True

    # Applicable WebserviceLoader default options
    start_date = DateOpt(required=True)
    end_date = DateOpt(required=True)
    parameter = FilterOpt(url_param='elementCds')

    @property
    def params(self):
        params = super(RegionDailyDataIO, self).params
        # Start and end date are actually only used by inner io.
        del params['start_date']
        del params['end_date']
        return params

    def load(self):
        super(RegionDailyDataIO, self).load()
        for station in self.data:
            station['data'] = flattened(
                StationDailyDataIO,
                station=station['stationTriplet'],
                start_date=self.getvalue('start_date'),
                end_date=self.getvalue('end_date'),
                parameter=self.getvalue('parameter'),
                debug=self.debug,
            )
Ejemplo n.º 11
0
class StationMetaMultipleIO(SnotelIO):
    """
    Wrapper for getStationMetadataMultiple() - used internally by StationIO.
    """

    data_function = 'getStationMetadataMultiple'

    stations = FilterOpt(required=True,
                         url_param='stationTriplets',
                         multi=True)
Ejemplo n.º 12
0
class AcisIO(WebserviceLoader, JsonParser, TupleMapper, BaseIO):
    """
    Base class for loading data from ACIS web services
    See http://data.rcc-acis.org/doc/
    """

    path = None  # ACIS web service path

    # (Re-)define some default WebserviceLoader options
    state = FilterOpt(multi=True)
    county = FilterOpt(multi=True)
    basin = FilterOpt(multi=True)
    station = FilterOpt(ignored=True)

    # Additional ACIS-specific option
    meta = ChoiceOpt(
        multi=True,
        choices=ALL_META_FIELDS,
        default=DEFAULT_META_FIELDS,
    )

    @property
    def url(self):
        """
        URL for wq.io.loaders.NetLoader
        """
        return "http://data.rcc-acis.org/%s" % self.path

    def serialize_params(self, params, complex):
        if complex:
            # ACIS web service supports JSON object as "params" parameter
            nparams = {}
            for key, val in params.items():
                url_param = self.get_url_param(key)
                if len(val) == 1 and isinstance(val[0], basestring):
                    val = val[0]
                nparams[url_param] = val
            return {'params': json.dumps(nparams)}
        else:
            # Simpler queries can use traditional URL parameters
            return super(AcisIO, self).serialize_params(params)
Ejemplo n.º 13
0
class StationDataIO(StationElementIO):
    """
    Base class for StationDailyDataIO and StationHourlyDataIO.  Retrieves all
    data for a station that matches the specified duration by calling the
    specified inner_io_class.
    """
    nested = True

    # Applicable WebserviceLoader default options
    start_date = DateOpt(url_param='beginDate', required=True)
    end_date = DateOpt(url_param='endDate', required=True)
    parameter = FilterOpt()

    inner_io_class = None
    duration = None

    @property
    def params(self):
        params = super(StationDataIO, self).params
        # Parameter filter (if any) is applied *after* the initial request
        params.pop('parameter', None)
        return params

    def load(self):
        super(StationDataIO, self).load()
        data = []
        for row in self.data:

            # Only include records matching the specified duration
            # and parameter
            if row['duration'] != self.duration:
                continue
            elem = self.getvalue('parameter')
            if elem and row['elementCd'] != elem:
                continue

            # getStationElements() sometimes returns parameters that don't
            # actually have data for the requested timeframe - silently catch
            # the exception and remove parameter from results.
            try:
                row['data'] = self.inner_io_class(
                    station=row['stationTriplet'],
                    parameter=row['elementCd'],
                    start_date=self.getvalue('start_date'),
                    end_date=self.getvalue('end_date'),
                    debug=self.debug,
                )
            except NoData:
                continue

            data.append(row)

        self.data = data
Ejemplo n.º 14
0
class AgrimetRecentIO(InstantDataIO):
    """
    Load recent Agrimet data (all available parameters)

    Usage:

    data = AgrimetRecentIO(station='ABEI')
    for row in data:
        print row.datetime, row.ob, row.wd
    """
    script = "agrimet.pl"

    start_date = FilterOpt(ignored=True)
    end_date = FilterOpt(ignored=True)
    parameter = FilterOpt(ignored=True)

    @property
    def params(self):
        return urlencode(
            OrderedDict([('cbtt', self.getvalue('station')),
                         ('interval', 'instant'), ('format', 2),
                         ('back', 360)]))
Ejemplo n.º 15
0
class StationIO(SnotelIO):
    """
    Retrieve metadata for all stations in a region.  Leverages both
    getStations() and getStationMetadata().
    """

    data_function = 'getStations'

    # Applicable WebserviceLoader default options
    state = FilterOpt(url_param='stateCds', multi=True)
    county = FilterOpt(url_param='countyNames', multi=True)
    basin = FilterOpt(url_param='hucs', multi=True)
    parameter = FilterOpt(url_param='elementCds', multi=True)

    # Additional options
    min_latitude = FilterOpt(url_param='minLatitude')
    max_latitude = FilterOpt(url_param='maxLatitude')
    min_elevation = FilterOpt(url_param='minElevation')
    max_elevation = FilterOpt(url_param='maxElevation')
    ordinals = FilterOpt(url_param='ordinals')

    # This is not the same as station (stationTriplet)
    station_ids = FilterOpt(url_param='stationIds', multi=True)

    # heightDepths = FilterOpt(url_param='heightDepths')
    # This parameter is submitted as
    # <heightDepths><value>value</value><unitCd>unit</unitCd></heightDepths>
    # Left out since it doesn't seem important and isn't well-documented

    default_params = {
        'logicalAnd': 'true',
    }

    def load(self):
        super(StationIO, self).load()
        self.data = [
            StationMetaIO(station=station, debug=self.debug).data[0]
            for station in self.data
        ]
Ejemplo n.º 16
0
class HourlyDataIO(TimeSeriesMapper, SnotelIO):
    """
    Wrapper for getHourlyData(), used internally by StationHourlyDataIO
    """
    data_function = 'getHourlyData'

    # TimeSeriesMapper configuration
    date_formats = ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M']

    # Applicable WebserviceLoader default options
    station = FilterOpt(required=True, url_param='stationTriplets', multi=True)
    parameter = FilterOpt(required=True, url_param='elementCd')
    start_date = DateOpt(required=True, url_param='beginDate')
    end_date = DateOpt(required=True, url_param='endDate')

    # Additional options
    begin_hour = FilterOpt(url_param='beginHour')
    end_hour = FilterOpt(url_param='endHour')

    # HeightDepth parameters don't seem to be necessary.

    default_params = {
        'ordinal': 1,
    }

    def load(self):
        super(HourlyDataIO, self).load()
        d = {
        }  # since mutiple triplets can be used, then go through each station
        for data in self.data:
            stationTriplet = data['stationTriplet']
            if data and 'values' in data:
                data = [asdict(row) for row in as_list(data['values'])]
            else:
                data = []
            d[stationTriplet] = data
        self.data = d
Ejemplo n.º 17
0
class DailyDataIO(SnotelIO):
    """
    Wrapper for getData(), used internally by StationDailyDataIO
    """
    data_function = 'getData'

    # Applicable WebserviceLoader default options
    station = FilterOpt(required=True, url_param='stationTriplets')
    parameter = FilterOpt(required=True, url_param='elementCd')
    start_date = DateOpt(required=True, url_param='beginDate')
    end_date = DateOpt(required=True, url_param='endDate')

    # HeightDepth parameters don't seem to be necessary.

    default_params = {
        'ordinal': 1,
        'duration': 'DAILY',
        'getFlags': 'true',
        'alwaysReturnDailyFeb29': 'false',
    }

    def parse(self):
        data = self.data[0]
        if not data or 'values' not in data:
            raise NoData
        bd = data['beginDate']
        ed = data['endDate']
        dates = fill_date_range(bd, ed, date_format='%Y-%m-%d %H:%M:%S')
        vals = as_list(data['values'])
        flags = as_list(data['flags'])

        self.data = [{
            'date': date,
            'value': val,
            'flag': flag
        } for date, val, flag in zip(dates, vals, flags)]
Ejemplo n.º 18
0
class StationElementIO(SnotelIO):
    """
    Wrapper for getStationElements(), incorporating element names from
    getElements()
    """

    data_function = 'getStationElements'

    # Applicable WebserviceLoader default options
    start_date = DateOpt(url_param='beginDate')
    end_date = DateOpt(url_param='endDate')
    station = FilterOpt(required=True, url_param='stationTriplet')

    def load(self):
        super(StationElementIO, self).load()
        names = ElementIO.get_names()
        for elem in self.data:
            elem['element_name'] = names[elem['elementCd']]
Ejemplo n.º 19
0
class CocorahsIO(WebserviceLoader, XmlParser, TimeSeriesMapper, BaseIO):
    """
    Retrieves CoCoRaHS observations from data.cocorahs.org

    Usage:

    data = CocorahsIO(state='MN', county='HN')
    for row in data:
        print row.stationname, row.observationdate.date(), row.totalprecipamt
    """

    # Customize date parameters
    start_date = DateOpt(
        required=True,
        date_only=False,
        url_param="StartDate",
    )
    end_date = DateOpt(
        date_only=False,
        url_param="EndDate",
    )

    # These region filters are supported
    state = FilterOpt(required=True)
    county = FilterOpt()

    # Other filters are ignored
    basin = FilterOpt(ignored=True)
    station = FilterOpt(ignored=True)
    parameter = FilterOpt(ignored=True)

    # CoCoRaHS-specific options
    datetype = ChoiceOpt(
        url_param="ReportDateType",
        default="reportdate",
        choices=["reportdate", "timestamp"],
    )
    reporttype = ChoiceOpt(
        url_param="ReportType",
        default="Daily",
        choices=["Daily", "MultiDay"],
    )

    # Configuration for wq.io base classes
    url = "http://data.cocorahs.org/cocorahs/export/exportreports.aspx"

    root_tag = 'Cocorahs'

    date_formats = [
        '%Y-%m-%d %I:%M %p', '%Y-%m-%d', '%I:%M %p', '%m/%d/%Y %I:%M %p'
    ]

    key_fields = [
        "stationnumber",
        "stationname",
        "latitude",
        "longitude",
        "datetimestamp",
        "observationdate",
        "observationtime",
        "entrydatetime",
    ]

    # These params apply to every request
    default_params = {
        'dtf': "1",
        'Format': "XML",
        'TimesInGMT': "False",
        'responsefields': "all"
    }

    @property
    def item_tag(self):
        if self.getvalue('reporttype') == "Daily":
            return 'DailyPrecipReports/DailyPrecipReport'
        else:
            # i.e. self.getvalue('reporttype') == "MultiDay"
            return 'MultiDayPrecipReports/MultiDayPrecipReport'

    def serialize_params(self, params, complex):
        params = super(CocorahsIO, self).serialize_params(params, complex)
        fmt = '%m/%d/%Y'

        # Different date parameters and formats depending on use case
        if 'EndDate' in params:
            # Date range (usually used with datetype=reportdate)
            params['StartDate'] = self.getvalue('start_date').strftime(fmt)
            params['EndDate'] = self.getvalue('end_date').strftime(fmt)
        else:
            # Only start date (usually used with datetype=timestamp)
            params['Date'] = self.getvalue('start_date').strftime(fmt +
                                                                  " %I:%M %p")
            del params['StartDate']
        return params

    def map_value(self, field, value):
        value = super(CocorahsIO, self).map_value(field, value)
        # CoCoRaHS empty dates are represented as 1/1/0001
        if isinstance(value, datetime) and value.year == 1:
            return None
        return value
Ejemplo n.º 20
0
class EnsembleForecastIO(ZipWebserviceLoader, EnsembleCsvParser, TupleMapper,
                         BaseIO):
    """
    Load ensemble forecast zip files from the CNRFC website.
     - start_date and basin are required to specify the zip file;
     - station and end_date can be used to filter the downloaded data.
    """

    nested = True

    start_date = DateOpt(required=True)
    end_date = DateOpt()

    # Region filters
    state = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)

    # FIXME: this isn't actually a HUC8 basin
    basin = FilterOpt(required=True)

    station = FilterOpt(multi=True)
    parameter = FilterOpt(ignored=True)

    region = ChoiceOpt(default="cnrfc", choices=["cnrfc"])

    urls = {
        "cnrfc": ("http://www.cnrfc.noaa.gov/csv/" +
                  "{date}12_{basin}_hefs_csv_daily.zip")
    }

    @property
    def params(self):
        # Don't actually need params, but ensure validation logic is called
        params = super(EnsembleForecastIO, self).params
        return None

    @property
    def url(self):
        url = self.urls[self.getvalue("region")]
        return url.format(
            date=self.getvalue("start_date").strftime("%Y%m%d"),
            basin=self.getvalue("basin"),
        )

    def parse(self):
        super(EnsembleForecastIO, self).parse()

        # Optionally filter by station id
        site_filter = self.getvalue('station')
        date_filter = self.getvalue('end_date')
        if not site_filter:
            return
        self.data = [item for item in self.data if item['site'] in site_filter]
        if not date_filter:
            return
        date_filter = date_filter.strftime('%Y-%m-%d') + " 23:59:59"
        for item in self.data:
            item['data'] = [
                row for row in item['data'] if row['date'] <= date_filter
            ]

    def usable_item(self, item):
        item = item.copy()
        item['data'] = TimeSeriesIO(data=item['data'])
        return super(EnsembleForecastIO, self).usable_item(item)
Ejemplo n.º 21
0
class SnotelIO(WebserviceLoader, BaseParser, TupleMapper, BaseIO):
    """
    Base class for accessing SNOTEL AWDB SOAP web services.
    """

    webservice_name = "awdbWebService"
    data_function = None

    # Override Default WebserviceLoader options
    start_date = DateOpt(ignored=True)
    end_date = DateOpt(ignored=True)
    state = FilterOpt(ignored=True)
    basin = FilterOpt(ignored=True)
    county = FilterOpt(ignored=True)
    station = FilterOpt(ignored=True)
    parameter = FilterOpt(ignored=True)

    def load(self):
        if self.debug:
            self.print_debug()
        params = self.params
        fn = getattr(get_server(), self.data_function)
        self.data = fn(**params)
        if len(self.data) == 0:
            self.data = []
        else:
            self.data = as_list(self.data)
            if isinstance(self.data[0], SudsObject):
                parse = asdict
            else:
                parse = str
            self.data = [parse(row) for row in self.data]

    def serialize_params(self, params, complex_type):
        """
        The AWDB NRCS webservice allows for multiple parameters, need
        to have the ability to query multiple stations at a time for
        effeciency. If one of the parameters is a list, the client will
        be able to make multiple types for the list.
        
        Therefore, overwrite the `serialize_params` to not join vals if
        it's a list
        """

        if complex_type:
            raise NotImplementedError("Cannot serialize %s!" % params)
        else:
            return {
                self.get_url_param(key): val
                for key, val in params.items()
            }

    # Some records may have additional fields; loop through entire
    # array to ensure all field names are accounted for.  (Otherwise BaseIO
    # will guess field names using only the first record.)
    scan_fields = True

    def print_debug(self):
        print('%s.%s(%s)' % (self.webservice_name, self.data_function,
                             ','.join('%s=%s' % (key, val)
                                      for key, val in self.params.items())))