Ejemplo n.º 1
0
class Hads(Collector):
    def __init__(self, **kwargs):
        super(Hads, self).__init__()

        self.states_url = kwargs.get(
            'states_url', "https://hads.ncep.noaa.gov/hads/goog_earth/")
        self.metadata_url = kwargs.get(
            'metadata_url',
            "https://hads.ncep.noaa.gov/nexhads2/servlet/DCPInfo")
        self.obs_retrieval_url = kwargs.get(
            'obs_retrieval_url',
            "https://hads.ncep.noaa.gov/nexhads2/servlet/DecodedData")

        self.station_codes = None
        self.parser = HadsParser()

    def clear(self):
        super(Hads, self).clear()

        self.station_codes = None

    @Collector.bbox.setter
    def bbox(self, bbox):
        Collector.bbox.fset(self, bbox)
        self.station_codes = None

    @Collector.features.setter
    def features(self, features):
        Collector.features.fset(self, features)
        self.station_codes = None

    def list_variables(self):
        """
        List available variables and applies any filters.
        """
        station_codes = self._get_station_codes()
        station_codes = self._apply_features_filter(station_codes)
        variables = self._list_variables(station_codes)

        if hasattr(self, '_variables') and self.variables is not None:
            variables.intersection_update(set(self.variables))

        return list(variables)

    def _list_variables(self, station_codes):
        """
        Internal helper to list the variables for the given station codes.
        """
        # sample output from obs retrieval:
        #
        # DD9452D0
        #     HP(SRBM5)
        #         2013-07-22 19:30 45.97
        #     HT(SRBM5)
        #         2013-07-22 19:30 44.29
        #     PC(SRBM5)
        #         2013-07-22 19:30 36.19
        #
        rvar = re.compile("""\n\s([A-Z]{2}[A-Z0-9]{0,1})\(\w+\)""")

        variables = set()
        resp = requests.post(self.obs_retrieval_url,
                             data={
                                 'state': 'nil',
                                 'hsa': 'nil',
                                 'of': '3',
                                 'extraids': " ".join(station_codes),
                                 'sinceday': -1
                             })
        resp.raise_for_status()

        list(map(variables.add, rvar.findall(resp.text)))
        return variables

    def list_features(self):
        station_codes = self._get_station_codes()
        station_codes = self._apply_features_filter(station_codes)

        return station_codes

    def collect(self):
        var_filter = None
        if hasattr(self, '_variables'):
            var_filter = self._variables

        time_extents = (self.start_time
                        if hasattr(self, 'start_time') else None,
                        self.end_time if hasattr(self, 'end_time') else None)

        metadata, raw_data = self.raw()
        return self.parser.parse(metadata, raw_data, var_filter, time_extents)

    def raw(self, format=None):
        """
        Returns a tuple of (metadata, raw data)
        """
        station_codes = self._apply_features_filter(self._get_station_codes())
        metadata = self._get_metadata(station_codes)
        raw_data = self._get_raw_data(station_codes)

        return (metadata, raw_data)

    def _apply_features_filter(self, station_codes):
        """
        If the features filter is set, this will return the intersection of
        those filter items and the given station codes.
        """
        # apply features filter
        if hasattr(self, 'features') and self.features is not None:
            station_codes = set(station_codes)
            station_codes = list(station_codes.intersection(set(
                self.features)))

        return station_codes

    def _get_metadata(self, station_codes):
        resp = requests.post(self.metadata_url,
                             data={
                                 'state': 'nil',
                                 'hsa': 'nil',
                                 'of': '1',
                                 'extraids': " ".join(station_codes),
                                 'data': "Get Meta Data"
                             })
        resp.raise_for_status()
        return resp.text

    def _get_station_codes(self, force=False):
        """
        Gets and caches a list of station codes optionally within a bbox.

        Will return the cached version if it exists unless force is True.
        """
        if not force and self.station_codes is not None:
            return self.station_codes

        state_urls = self._get_state_urls()

        # filter by bounding box against a shapefile
        state_matches = None

        if self.bbox:
            with collection(
                    os.path.join(
                        "resources",
                        "ne_50m_admin_1_states_provinces_lakes_shp.shp"),
                    "r") as c:
                geom_matches = [
                    x['properties'] for x in c.filter(bbox=self.bbox)
                ]
                state_matches = [
                    x['postal'] if x['admin'] != 'Canada' else 'CN'
                    for x in geom_matches
                ]

        self.station_codes = []

        for state_url in state_urls:
            if state_matches is not None:
                state_abbr = state_url.split("/")[-1].split(".")[0]
                if state_abbr not in state_matches:
                    continue

            self.station_codes.extend(self._get_stations_for_state(state_url))

        if self.bbox:
            # retreive metadata for all stations to properly filter them
            metadata = self._get_metadata(self.station_codes)
            parsed_metadata = self.parser._parse_metadata(metadata)

            def in_bbox(code):
                lat = parsed_metadata[code]['latitude']
                lon = parsed_metadata[code]['longitude']

                return lon >= self.bbox[0] and lon <= self.bbox[
                    2] and lat >= self.bbox[1] and lat <= self.bbox[3]

            self.station_codes = list(filter(in_bbox, self.station_codes))

        return self.station_codes

    def _get_state_urls(self):
        root = BeautifulSoup(requests.get(self.states_url).text)
        areas = root.find_all("area")
        return list(set([x.attrs.get('href', None) for x in areas]))

    def _get_stations_for_state(self, state_url):
        state_root = BeautifulSoup(requests.get(state_url).text)
        return [
            x for x in [
                x.attrs['href'].split("nesdis_id=")[-1]
                for x in state_root.find_all('a')
            ] if len(x) > 0
        ]

    def _get_raw_data(self, station_codes):
        since = 7
        if hasattr(self, 'start_time') and self.start_time is not None:
            # calc delta between now and start_time
            timediff = datetime.utcnow().replace(
                tzinfo=pytz.utc) - self.start_time

            if timediff.days == 0:
                if timediff.seconds / 60 / 60 > 0:
                    since = -(timediff.seconds / 60 / 60)
                elif timediff.seconds / 60 > 0:
                    since = -1  # 1 hour minimum resolution
            else:
                since = min(7, timediff.days)  # max of 7 days

        resp = requests.post(self.obs_retrieval_url,
                             data={
                                 'state': 'nil',
                                 'hsa': 'nil',
                                 'of': '1',
                                 'extraids': " ".join(station_codes),
                                 'sinceday': since
                             })
        resp.raise_for_status()

        return resp.text
Ejemplo n.º 2
0
class HadsParserTest(unittest.TestCase):
    def setUp(self):
        self.hp = HadsParser()

        # captured metadata 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268']
        self.metadata = u'|17BC752E|WKGR1|CHIPUXET RIVER AT WEST KINGSTON|41 28 56|-71 33 06|BOX|RI|USGS01|SI|83  |002840|60|HG|15,-9|0.01,-9|0.0|13|0.0|-0.01|VB|60,-9|0.3124,-9|0.311|28|0.0|0.0|\r\n|CE4D0268|FOXR1|FOXPOINT HURRICANE BARRIER|41 48 57|-71 24 07|BOX|RI|CENED1|SU|161 |000100|30|HM|60,-9|0.01,-9|0.0|1|0.0|0.0|PA|60,-9|0.01,-9|0.0|1|0.0|0.0|TA|60,-9|0.1,-9|0.0|1|0.0|0.0|US|60,-9|1,-9|0.0|1|0.0|0.0|UD|60,-9|1,-9|0.0|1|0.0|0.0|\r\n|DD182264|USQR1|USQUEPAUG RIVER NEAR USQUEPAUG|41 28 36|-71 36 19|BOX|RI|USGS01|SI|83  |005830|60|HG|15,-9|0.01,-9|0.0|13|0.0|0.0|VB|60,-9|0.3124,-9|0.311|58|0.0|0.0|\r\n'

        # captured data 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268']
        self.raw_data = u'CE4D0268|FOXR1|HM|2013-07-26 16:30|4.94|  |\r\nCE4D0268|FOXR1|HM|2013-07-26 17:00|4.41|  |\r\nCE4D0268|FOXR1|PA|2013-07-26 16:30|29.93|  |\r\nCE4D0268|FOXR1|PA|2013-07-26 17:00|29.93|  |\r\nCE4D0268|FOXR1|TA|2013-07-26 16:30|66.20|  |\r\nCE4D0268|FOXR1|TA|2013-07-26 17:00|67.30|  |\r\nCE4D0268|FOXR1|US|2013-07-26 16:30|5.00|  |\r\nCE4D0268|FOXR1|US|2013-07-26 17:00|8.00|  |\r\nCE4D0268|FOXR1|UD|2013-07-26 16:30|358.00|  |\r\nCE4D0268|FOXR1|UD|2013-07-26 17:00|353.00|  |\r\nDD182264|USQR1|HG|2013-07-26 16:30|3.07|  |\r\nDD182264|USQR1|HG|2013-07-26 16:45|3.07|  |\r\n'

    def test_parse(self):
        station_collection = self.hp.parse(self.metadata, self.raw_data, None,
                                           (None, None))
        assert isinstance(station_collection, StationCollection)

        station_collection.calculate_bounds()
        assert station_collection.size == 3

    def test__parse_metadata(self):
        res = {
            u'17BC752E': {
                'channel': u'83  ',
                'hsa': u'BOX',
                'init_transmit': u'002840',
                'latitude': 41.48222222222222,
                'location_text': u'CHIPUXET RIVER AT WEST KINGSTON',
                'longitude': -70.44833333333334,
                'manufacturer': u'SI',
                'nesdis_id': u'17BC752E',
                'nwsli': u'WKGR1',
                'owner': u'USGS01',
                'state': u'RI',
                'variables': {
                    u'HG': {
                        'base_elevation': 0.0,
                        'coefficient': u'0.01,-9',
                        'constant': u'0.0',
                        'data_interval': u'15,-9',
                        'gauge_correction': -0.01,
                        'time_offset': u'13'
                    },
                    u'VB': {
                        'base_elevation': 0.0,
                        'coefficient': u'0.3124,-9',
                        'constant': u'0.311',
                        'data_interval': u'60,-9',
                        'gauge_correction': 0.0,
                        'time_offset': u'28'
                    }
                }
            },
            u'CE4D0268': {
                'channel': u'161 ',
                'hsa': u'BOX',
                'init_transmit': u'000100',
                'latitude': 41.81583333333333,
                'location_text': u'FOXPOINT HURRICANE BARRIER',
                'longitude': -70.59805555555556,
                'manufacturer': u'SU',
                'nesdis_id': u'CE4D0268',
                'nwsli': u'FOXR1',
                'owner': u'CENED1',
                'state': u'RI',
                'variables': {
                    u'HM': {
                        'base_elevation': 0.0,
                        'coefficient': u'0.01,-9',
                        'constant': u'0.0',
                        'data_interval': u'60,-9',
                        'gauge_correction': 0.0,
                        'time_offset': u'1'
                    },
                    u'PA': {
                        'base_elevation': 0.0,
                        'coefficient': u'0.01,-9',
                        'constant': u'0.0',
                        'data_interval': u'60,-9',
                        'gauge_correction': 0.0,
                        'time_offset': u'1'
                    },
                    u'TA': {
                        'base_elevation': 0.0,
                        'coefficient': u'0.1,-9',
                        'constant': u'0.0',
                        'data_interval': u'60,-9',
                        'gauge_correction': 0.0,
                        'time_offset': u'1'
                    },
                    u'UD': {
                        'base_elevation': 0.0,
                        'coefficient': u'1,-9',
                        'constant': u'0.0',
                        'data_interval': u'60,-9',
                        'gauge_correction': 0.0,
                        'time_offset': u'1'
                    },
                    u'US': {
                        'base_elevation': 0.0,
                        'coefficient': u'1,-9',
                        'constant': u'0.0',
                        'data_interval': u'60,-9',
                        'gauge_correction': 0.0,
                        'time_offset': u'1'
                    }
                }
            },
            u'DD182264': {
                'channel': u'83  ',
                'hsa': u'BOX',
                'init_transmit': u'005830',
                'latitude': 41.47666666666667,
                'location_text': u'USQUEPAUG RIVER NEAR USQUEPAUG',
                'longitude': -70.39472222222223,
                'manufacturer': u'SI',
                'nesdis_id': u'DD182264',
                'nwsli': u'USQR1',
                'owner': u'USGS01',
                'state': u'RI',
                'variables': {
                    u'HG': {
                        'base_elevation': 0.0,
                        'coefficient': u'0.01,-9',
                        'constant': u'0.0',
                        'data_interval': u'15,-9',
                        'gauge_correction': 0.0,
                        'time_offset': u'13'
                    },
                    u'VB': {
                        'base_elevation': 0.0,
                        'coefficient': u'0.3124,-9',
                        'constant': u'0.311',
                        'data_interval': u'60,-9',
                        'gauge_correction': 0.0,
                        'time_offset': u'58'
                    }
                }
            }
        }

        parsed = self.hp._parse_metadata(self.metadata)
        assert parsed == res

    def test__parse_data(self):
        res = {
            u'CE4D0268':
            [(u'HM', datetime.datetime(2013, 7, 26, 16, 30), u'4.94'),
             (u'HM', datetime.datetime(2013, 7, 26, 17, 0), u'4.41'),
             (u'PA', datetime.datetime(2013, 7, 26, 16, 30), u'29.93'),
             (u'PA', datetime.datetime(2013, 7, 26, 17, 0), u'29.93'),
             (u'TA', datetime.datetime(2013, 7, 26, 16, 30), u'66.20'),
             (u'TA', datetime.datetime(2013, 7, 26, 17, 0), u'67.30'),
             (u'US', datetime.datetime(2013, 7, 26, 16, 30), u'5.00'),
             (u'US', datetime.datetime(2013, 7, 26, 17, 0), u'8.00'),
             (u'UD', datetime.datetime(2013, 7, 26, 16, 30), u'358.00'),
             (u'UD', datetime.datetime(2013, 7, 26, 17, 0), u'353.00')],
            u'DD182264':
            [(u'HG', datetime.datetime(2013, 7, 26, 16, 30), u'3.07'),
             (u'HG', datetime.datetime(2013, 7, 26, 16, 45), u'3.07')]
        }

        parsed = self.hp._parse_data(self.raw_data, None, (None, None))
        assert parsed == res

    def test__parse_data_with_var_filter(self):
        res = {
            u'CE4D0268':
            [(u'HM', datetime.datetime(2013, 7, 26, 16, 30), u'4.94'),
             (u'HM', datetime.datetime(2013, 7, 26, 17, 0), u'4.41'),
             (u'UD', datetime.datetime(2013, 7, 26, 16, 30), u'358.00'),
             (u'UD', datetime.datetime(2013, 7, 26, 17, 0), u'353.00')]
        }

        parsed = self.hp._parse_data(self.raw_data, [u'HM', u'UD'],
                                     (None, None))
Ejemplo n.º 3
0
class Hads(Collector):
    def __init__(self, **kwargs):
        super(Hads, self).__init__()

        self.states_url         = kwargs.get('states_url', "http://amazon.nws.noaa.gov/hads/goog_earth/")
        self.metadata_url       = kwargs.get('metadata_url',  "http://amazon.nws.noaa.gov/nexhads2/servlet/DCPInfo")
        self.obs_retrieval_url  = kwargs.get('obs_retrieval_url', "http://amazon.nws.noaa.gov/nexhads2/servlet/DecodedData")

        self.station_codes      = None
        self.parser             = HadsParser()

    def clear(self):
        super(Hads, self).clear()

        self.station_codes      = None

    @Collector.bbox.setter
    def bbox(self, bbox):
        Collector.bbox.fset(self, bbox)
        self.station_codes      = None

    @Collector.features.setter
    def features(self, features):
        Collector.features.fset(self, features)
        self.station_codes      = None

    def list_variables(self):
        """
        List available variables and applies any filters.
        """
        station_codes = self._get_station_codes()
        station_codes = self._apply_features_filter(station_codes)
        variables = self._list_variables(station_codes)

        if hasattr(self, '_variables') and self.variables is not None:
            variables.intersection_update(set(self.variables))

        return list(variables)

    def _list_variables(self, station_codes):
        """
        Internal helper to list the variables for the given station codes.
        """
        # sample output from obs retrieval:
        #
        # DD9452D0
        #     HP(SRBM5)
        #         2013-07-22 19:30 45.97
        #     HT(SRBM5)
        #         2013-07-22 19:30 44.29
        #     PC(SRBM5)
        #         2013-07-22 19:30 36.19
        #
        rvar = re.compile("""\n\s([A-Z]{2}[A-Z0-9]{0,1})\(\w+\)""")

        variables = set()
        resp = requests.post(self.obs_retrieval_url, data={'state' : 'nil',
                                                           'hsa'   : 'nil',
                                                           'of'    : '3',
                                                           'extraids' : " ".join(station_codes),
                                                           'sinceday' : -1})
        resp.raise_for_status()

        map(variables.add, rvar.findall(resp.text))
        return variables

    def list_features(self):
        station_codes = self._get_station_codes()
        station_codes = self._apply_features_filter(station_codes)

        return station_codes

    def collect(self):
        var_filter = None
        if hasattr(self, '_variables'):
            var_filter = self._variables

        time_extents = (self.start_time if hasattr(self, 'start_time') else None, self.end_time if hasattr(self, 'end_time') else None)

        metadata, raw_data = self.raw()
        return self.parser.parse(metadata, raw_data, var_filter, time_extents)

    def raw(self, format=None):
        """
        Returns a tuple of (metadata, raw data)
        """
        station_codes = self._apply_features_filter(self._get_station_codes())
        metadata      = self._get_metadata(station_codes)
        raw_data      = self._get_raw_data(station_codes)

        return (metadata, raw_data)

    def _apply_features_filter(self, station_codes):
        """
        If the features filter is set, this will return the intersection of
        those filter items and the given station codes.
        """
        # apply features filter
        if hasattr(self, 'features') and self.features is not None:
            station_codes = set(station_codes)
            station_codes = list(station_codes.intersection(set(self.features)))

        return station_codes

    def _get_metadata(self, station_codes):
        resp = requests.post(self.metadata_url, data={'state'    : 'nil',
                                                      'hsa'      : 'nil',
                                                      'of'       : '1',
                                                      'extraids' : " ".join(station_codes),
                                                      'data'     : "Get Meta Data"})
        resp.raise_for_status()
        return resp.text

    def _get_station_codes(self, force=False):
        """
        Gets and caches a list of station codes optionally within a bbox.

        Will return the cached version if it exists unless force is True.
        """
        if not force and self.station_codes is not None:
            return self.station_codes

        state_urls = self._get_state_urls()

        # filter by bounding box against a shapefile
        state_matches = None

        if self.bbox:
            with collection(os.path.join("resources", "ne_50m_admin_1_states_provinces_lakes_shp.shp"), "r") as c:
                geom_matches = map(lambda x: x['properties'], c.filter(bbox=self.bbox))
                state_matches = map(lambda x: x['postal'] if x['admin'] != 'Canada' else u'CN', geom_matches)

        self.station_codes = []

        for state_url in state_urls:
            if state_matches is not None:
                state_abbr = state_url.split("/")[-1].split(".")[0]
                if state_abbr not in state_matches:
                    continue

            self.station_codes.extend(self._get_stations_for_state(state_url))

        if self.bbox:
            # retreive metadata for all stations to properly filter them
            metadata        = self._get_metadata(self.station_codes)
            parsed_metadata = self.parser._parse_metadata(metadata)

            def in_bbox(code):
                lat = parsed_metadata[code]['latitude']
                lon = parsed_metadata[code]['longitude']

                return lon >= self.bbox[0] and lon <= self.bbox[2] and lat >= self.bbox[1] and lat <= self.bbox[3]

            self.station_codes = filter(in_bbox, self.station_codes)

        return self.station_codes

    def _get_state_urls(self):
        root = BeautifulSoup(requests.get(self.states_url).text)
        areas = root.find_all("area")
        return list(set(map(lambda x: x.attrs.get('href', None), areas)))

    def _get_stations_for_state(self, state_url):
        #print state_url
        state_root = BeautifulSoup(requests.get(state_url).text)
        return filter(lambda x: len(x) > 0, map(lambda x: x.attrs['href'].split("nesdis_id=")[-1], state_root.find_all('a')))

    def _get_raw_data(self, station_codes):
        since = 7
        if hasattr(self, 'start_time') and self.start_time is not None:
            # calc delta between now and start_time
            timediff = datetime.utcnow().replace(tzinfo=pytz.utc) - self.start_time

            if timediff.days == 0:
                if timediff.seconds / 60 / 60 > 0:
                    since = -(timediff.seconds / 60 / 60)
                elif timediff.seconds / 60 > 0:
                    since = -1  # 1 hour minimum resolution
            else:
                since = min(7, timediff.days)       # max of 7 days

        resp = requests.post(self.obs_retrieval_url, data={'state'    : 'nil',
                                                           'hsa'      : 'nil',
                                                           'of'       : '1',
                                                           'extraids' : " ".join(station_codes),
                                                           'sinceday' : since})
        resp.raise_for_status()

        return resp.text
Ejemplo n.º 4
0
class HadsParserTest(unittest.TestCase):
    def setUp(self):
        self.hp = HadsParser()

        # captured metadata 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268']
        self.metadata = u"|17BC752E|WKGR1|CHIPUXET RIVER AT WEST KINGSTON|41 28 56|-71 33 06|BOX|RI|USGS01|SI|83  |002840|60|HG|15,-9|0.01,-9|0.0|13|0.0|-0.01|VB|60,-9|0.3124,-9|0.311|28|0.0|0.0|\r\n|CE4D0268|FOXR1|FOXPOINT HURRICANE BARRIER|41 48 57|-71 24 07|BOX|RI|CENED1|SU|161 |000100|30|HM|60,-9|0.01,-9|0.0|1|0.0|0.0|PA|60,-9|0.01,-9|0.0|1|0.0|0.0|TA|60,-9|0.1,-9|0.0|1|0.0|0.0|US|60,-9|1,-9|0.0|1|0.0|0.0|UD|60,-9|1,-9|0.0|1|0.0|0.0|\r\n|DD182264|USQR1|USQUEPAUG RIVER NEAR USQUEPAUG|41 28 36|-71 36 19|BOX|RI|USGS01|SI|83  |005830|60|HG|15,-9|0.01,-9|0.0|13|0.0|0.0|VB|60,-9|0.3124,-9|0.311|58|0.0|0.0|\r\n"  # noqa

        # captured data 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268']
        self.raw_data = u"CE4D0268|FOXR1|HM|2013-07-26 16:30|4.94|  |\r\nCE4D0268|FOXR1|HM|2013-07-26 17:00|4.41|  |\r\nCE4D0268|FOXR1|PA|2013-07-26 16:30|29.93|  |\r\nCE4D0268|FOXR1|PA|2013-07-26 17:00|29.93|  |\r\nCE4D0268|FOXR1|TA|2013-07-26 16:30|66.20|  |\r\nCE4D0268|FOXR1|TA|2013-07-26 17:00|67.30|  |\r\nCE4D0268|FOXR1|US|2013-07-26 16:30|5.00|  |\r\nCE4D0268|FOXR1|US|2013-07-26 17:00|8.00|  |\r\nCE4D0268|FOXR1|UD|2013-07-26 16:30|358.00|  |\r\nCE4D0268|FOXR1|UD|2013-07-26 17:00|353.00|  |\r\nDD182264|USQR1|HG|2013-07-26 16:30|3.07|  |\r\nDD182264|USQR1|HG|2013-07-26 16:45|3.07|  |\r\n"  # noqa

    def test_parse(self):
        station_collection = self.hp.parse(
            self.metadata, self.raw_data, None, (None, None)
        )
        assert isinstance(station_collection, StationCollection)

        station_collection.calculate_bounds()
        assert station_collection.size == 3

    def test__parse_metadata(self):
        res = {
            u"17BC752E": {
                "channel": u"83  ",
                "hsa": u"BOX",
                "init_transmit": u"002840",
                "latitude": 41.48222222222222,
                "location_text": u"CHIPUXET RIVER AT WEST KINGSTON",
                "longitude": -70.44833333333334,
                "manufacturer": u"SI",
                "nesdis_id": u"17BC752E",
                "nwsli": u"WKGR1",
                "owner": u"USGS01",
                "state": u"RI",
                "variables": {
                    u"HG": {
                        "base_elevation": 0.0,
                        "coefficient": u"0.01,-9",
                        "constant": u"0.0",
                        "data_interval": u"15,-9",
                        "gauge_correction": -0.01,
                        "time_offset": u"13",
                    },
                    u"VB": {
                        "base_elevation": 0.0,
                        "coefficient": u"0.3124,-9",
                        "constant": u"0.311",
                        "data_interval": u"60,-9",
                        "gauge_correction": 0.0,
                        "time_offset": u"28",
                    },
                },
            },
            u"CE4D0268": {
                "channel": u"161 ",
                "hsa": u"BOX",
                "init_transmit": u"000100",
                "latitude": 41.81583333333333,
                "location_text": u"FOXPOINT HURRICANE BARRIER",
                "longitude": -70.59805555555556,
                "manufacturer": u"SU",
                "nesdis_id": u"CE4D0268",
                "nwsli": u"FOXR1",
                "owner": u"CENED1",
                "state": u"RI",
                "variables": {
                    u"HM": {
                        "base_elevation": 0.0,
                        "coefficient": u"0.01,-9",
                        "constant": u"0.0",
                        "data_interval": u"60,-9",
                        "gauge_correction": 0.0,
                        "time_offset": u"1",
                    },
                    u"PA": {
                        "base_elevation": 0.0,
                        "coefficient": u"0.01,-9",
                        "constant": u"0.0",
                        "data_interval": u"60,-9",
                        "gauge_correction": 0.0,
                        "time_offset": u"1",
                    },
                    u"TA": {
                        "base_elevation": 0.0,
                        "coefficient": u"0.1,-9",
                        "constant": u"0.0",
                        "data_interval": u"60,-9",
                        "gauge_correction": 0.0,
                        "time_offset": u"1",
                    },
                    u"UD": {
                        "base_elevation": 0.0,
                        "coefficient": u"1,-9",
                        "constant": u"0.0",
                        "data_interval": u"60,-9",
                        "gauge_correction": 0.0,
                        "time_offset": u"1",
                    },
                    u"US": {
                        "base_elevation": 0.0,
                        "coefficient": u"1,-9",
                        "constant": u"0.0",
                        "data_interval": u"60,-9",
                        "gauge_correction": 0.0,
                        "time_offset": u"1",
                    },
                },
            },
            u"DD182264": {
                "channel": u"83  ",
                "hsa": u"BOX",
                "init_transmit": u"005830",
                "latitude": 41.47666666666667,
                "location_text": u"USQUEPAUG RIVER NEAR USQUEPAUG",
                "longitude": -70.39472222222223,
                "manufacturer": u"SI",
                "nesdis_id": u"DD182264",
                "nwsli": u"USQR1",
                "owner": u"USGS01",
                "state": u"RI",
                "variables": {
                    u"HG": {
                        "base_elevation": 0.0,
                        "coefficient": u"0.01,-9",
                        "constant": u"0.0",
                        "data_interval": u"15,-9",
                        "gauge_correction": 0.0,
                        "time_offset": u"13",
                    },
                    u"VB": {
                        "base_elevation": 0.0,
                        "coefficient": u"0.3124,-9",
                        "constant": u"0.311",
                        "data_interval": u"60,-9",
                        "gauge_correction": 0.0,
                        "time_offset": u"58",
                    },
                },
            },
        }

        parsed = self.hp._parse_metadata(self.metadata)
        assert parsed == res

    def test__parse_data(self):
        res = {
            u"CE4D0268": [
                (
                    u"HM",
                    datetime.datetime(2013, 7, 26, 16, 30).replace(
                        tzinfo=pytz.utc
                    ),
                    4.94,
                ),
                (
                    u"HM",
                    datetime.datetime(2013, 7, 26, 17, 0).replace(
                        tzinfo=pytz.utc
                    ),
                    4.41,
                ),
                (
                    u"PA",
                    datetime.datetime(2013, 7, 26, 16, 30).replace(
                        tzinfo=pytz.utc
                    ),
                    29.93,
                ),
                (
                    u"PA",
                    datetime.datetime(2013, 7, 26, 17, 0).replace(
                        tzinfo=pytz.utc
                    ),
                    29.93,
                ),
                (
                    u"TA",
                    datetime.datetime(2013, 7, 26, 16, 30).replace(
                        tzinfo=pytz.utc
                    ),
                    66.20,
                ),
                (
                    u"TA",
                    datetime.datetime(2013, 7, 26, 17, 0).replace(
                        tzinfo=pytz.utc
                    ),
                    67.30,
                ),
                (
                    u"US",
                    datetime.datetime(2013, 7, 26, 16, 30).replace(
                        tzinfo=pytz.utc
                    ),
                    5.00,
                ),
                (
                    u"US",
                    datetime.datetime(2013, 7, 26, 17, 0).replace(
                        tzinfo=pytz.utc
                    ),
                    8.00,
                ),
                (
                    u"UD",
                    datetime.datetime(2013, 7, 26, 16, 30).replace(
                        tzinfo=pytz.utc
                    ),
                    358.00,
                ),
                (
                    u"UD",
                    datetime.datetime(2013, 7, 26, 17, 0).replace(
                        tzinfo=pytz.utc
                    ),
                    353.00,
                ),
            ],
            u"DD182264": [
                (
                    u"HG",
                    datetime.datetime(2013, 7, 26, 16, 30).replace(
                        tzinfo=pytz.utc
                    ),
                    3.07,
                ),
                (
                    u"HG",
                    datetime.datetime(2013, 7, 26, 16, 45).replace(
                        tzinfo=pytz.utc
                    ),
                    3.07,
                ),
            ],
        }

        parsed = self.hp._parse_data(self.raw_data, None, (None, None))
        assert parsed == res

    def test__parse_data_with_var_filter(self):
        res = {
            u"CE4D0268": [
                (
                    u"HM",
                    datetime.datetime(2013, 7, 26, 16, 30).replace(
                        tzinfo=pytz.utc
                    ),
                    4.94,
                ),
                (
                    u"HM",
                    datetime.datetime(2013, 7, 26, 17, 0).replace(
                        tzinfo=pytz.utc
                    ),
                    4.41,
                ),
                (
                    u"UD",
                    datetime.datetime(2013, 7, 26, 16, 30).replace(
                        tzinfo=pytz.utc
                    ),
                    358.00,
                ),
                (
                    u"UD",
                    datetime.datetime(2013, 7, 26, 17, 0).replace(
                        tzinfo=pytz.utc
                    ),
                    353.00,
                ),
            ]
        }

        parsed = self.hp._parse_data(
            self.raw_data, [u"HM", u"UD"], (None, None)
        )
        assert parsed == res
Ejemplo n.º 5
0
class HadsParserTest(unittest.TestCase):

    def setUp(self):
        self.hp = HadsParser()

        # captured metadata 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268']
        self.metadata = u'|17BC752E|WKGR1|CHIPUXET RIVER AT WEST KINGSTON|41 28 56|-71 33 06|BOX|RI|USGS01|SI|83  |002840|60|HG|15,-9|0.01,-9|0.0|13|0.0|-0.01|VB|60,-9|0.3124,-9|0.311|28|0.0|0.0|\r\n|CE4D0268|FOXR1|FOXPOINT HURRICANE BARRIER|41 48 57|-71 24 07|BOX|RI|CENED1|SU|161 |000100|30|HM|60,-9|0.01,-9|0.0|1|0.0|0.0|PA|60,-9|0.01,-9|0.0|1|0.0|0.0|TA|60,-9|0.1,-9|0.0|1|0.0|0.0|US|60,-9|1,-9|0.0|1|0.0|0.0|UD|60,-9|1,-9|0.0|1|0.0|0.0|\r\n|DD182264|USQR1|USQUEPAUG RIVER NEAR USQUEPAUG|41 28 36|-71 36 19|BOX|RI|USGS01|SI|83  |005830|60|HG|15,-9|0.01,-9|0.0|13|0.0|0.0|VB|60,-9|0.3124,-9|0.311|58|0.0|0.0|\r\n'

        # captured data 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268']
        self.raw_data = u'CE4D0268|FOXR1|HM|2013-07-26 16:30|4.94|  |\r\nCE4D0268|FOXR1|HM|2013-07-26 17:00|4.41|  |\r\nCE4D0268|FOXR1|PA|2013-07-26 16:30|29.93|  |\r\nCE4D0268|FOXR1|PA|2013-07-26 17:00|29.93|  |\r\nCE4D0268|FOXR1|TA|2013-07-26 16:30|66.20|  |\r\nCE4D0268|FOXR1|TA|2013-07-26 17:00|67.30|  |\r\nCE4D0268|FOXR1|US|2013-07-26 16:30|5.00|  |\r\nCE4D0268|FOXR1|US|2013-07-26 17:00|8.00|  |\r\nCE4D0268|FOXR1|UD|2013-07-26 16:30|358.00|  |\r\nCE4D0268|FOXR1|UD|2013-07-26 17:00|353.00|  |\r\nDD182264|USQR1|HG|2013-07-26 16:30|3.07|  |\r\nDD182264|USQR1|HG|2013-07-26 16:45|3.07|  |\r\n'

    def test_parse(self):
        station_collection = self.hp.parse(self.metadata, self.raw_data, None, (None, None))
        assert isinstance(station_collection, StationCollection)

        station_collection.calculate_bounds()
        assert station_collection.size == 3

    def test__parse_metadata(self):
        res = {u'17BC752E': {'channel': u'83  ',
                             'hsa': u'BOX',
                             'init_transmit': u'002840',
                             'latitude': 41.48222222222222,
                             'location_text': u'CHIPUXET RIVER AT WEST KINGSTON',
                             'longitude': -70.44833333333334,
                             'manufacturer': u'SI',
                             'nesdis_id': u'17BC752E',
                             'nwsli': u'WKGR1',
                             'owner': u'USGS01',
                             'state': u'RI',
                             'variables': {u'HG': {'base_elevation': 0.0,
                                                   'coefficient': u'0.01,-9',
                                                   'constant': u'0.0',
                                                   'data_interval': u'15,-9',
                                                   'gauge_correction': -0.01,
                                                   'time_offset': u'13'},
                                           u'VB': {'base_elevation': 0.0,
                                                   'coefficient': u'0.3124,-9',
                                                   'constant': u'0.311',
                                                   'data_interval': u'60,-9',
                                                   'gauge_correction': 0.0,
                                                   'time_offset': u'28'}}},
               u'CE4D0268': {'channel': u'161 ',
                             'hsa': u'BOX',
                             'init_transmit': u'000100',
                             'latitude': 41.81583333333333,
                             'location_text': u'FOXPOINT HURRICANE BARRIER',
                             'longitude': -70.59805555555556,
                             'manufacturer': u'SU',
                             'nesdis_id': u'CE4D0268',
                             'nwsli': u'FOXR1',
                             'owner': u'CENED1',
                             'state': u'RI',
                             'variables': {u'HM': {'base_elevation': 0.0,
                                                   'coefficient': u'0.01,-9',
                                                   'constant': u'0.0',
                                                   'data_interval': u'60,-9',
                                                   'gauge_correction': 0.0,
                                                   'time_offset': u'1'},
                                           u'PA': {'base_elevation': 0.0,
                                                   'coefficient': u'0.01,-9',
                                                   'constant': u'0.0',
                                                   'data_interval': u'60,-9',
                                                   'gauge_correction': 0.0,
                                                   'time_offset': u'1'},
                                           u'TA': {'base_elevation': 0.0,
                                                   'coefficient': u'0.1,-9',
                                                   'constant': u'0.0',
                                                   'data_interval': u'60,-9',
                                                   'gauge_correction': 0.0,
                                                   'time_offset': u'1'},
                                           u'UD': {'base_elevation': 0.0,
                                                   'coefficient': u'1,-9',
                                                   'constant': u'0.0',
                                                   'data_interval': u'60,-9',
                                                   'gauge_correction': 0.0,
                                                   'time_offset': u'1'},
                                           u'US': {'base_elevation': 0.0,
                                                   'coefficient': u'1,-9',
                                                   'constant': u'0.0',
                                                   'data_interval': u'60,-9',
                                                   'gauge_correction': 0.0,
                                                   'time_offset': u'1'}}},
               u'DD182264': {'channel': u'83  ',
                             'hsa': u'BOX',
                             'init_transmit': u'005830',
                             'latitude': 41.47666666666667,
                             'location_text': u'USQUEPAUG RIVER NEAR USQUEPAUG',
                             'longitude': -70.39472222222223,
                             'manufacturer': u'SI',
                             'nesdis_id': u'DD182264',
                             'nwsli': u'USQR1',
                             'owner': u'USGS01',
                             'state': u'RI',
                             'variables': {u'HG': {'base_elevation': 0.0,
                                                   'coefficient': u'0.01,-9',
                                                   'constant': u'0.0',
                                                   'data_interval': u'15,-9',
                                                   'gauge_correction': 0.0,
                                                   'time_offset': u'13'},
                                           u'VB': {'base_elevation': 0.0,
                                                   'coefficient': u'0.3124,-9',
                                                   'constant': u'0.311',
                                                   'data_interval': u'60,-9',
                                                   'gauge_correction': 0.0,
                                                   'time_offset': u'58'}}}}

        parsed = self.hp._parse_metadata(self.metadata)
        assert parsed == res

    def test__parse_data(self):
        res = {u'CE4D0268': [(u'HM', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 4.94),
                             (u'HM', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 4.41),
                             (u'PA', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 29.93),
                             (u'PA', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 29.93),
                             (u'TA', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 66.20),
                             (u'TA', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 67.30),
                             (u'US', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 5.00),
                             (u'US', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 8.00),
                             (u'UD', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 358.00),
                             (u'UD', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 353.00)],
               u'DD182264': [(u'HG', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 3.07),
                             (u'HG', datetime.datetime(2013, 7, 26, 16, 45).replace(tzinfo=pytz.utc), 3.07)]}

        parsed = self.hp._parse_data(self.raw_data, None, (None, None))
        assert parsed == res

    def test__parse_data_with_var_filter(self):
        res = {u'CE4D0268': [(u'HM', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 4.94),
                             (u'HM', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 4.41),
                             (u'UD', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 358.00),
                             (u'UD', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 353.00)]}

        parsed = self.hp._parse_data(self.raw_data, [u'HM', u'UD'], (None, None))
        assert parsed == res
Ejemplo n.º 6
0
Archivo: hads.py Proyecto: ioos/pyoos
class Hads(Collector):
    def __init__(self, **kwargs):
        super(Hads, self).__init__()

        self.states_url = kwargs.get(
            "states_url", "https://hads.ncep.noaa.gov/hads/goog_earth/"
        )
        self.metadata_url = kwargs.get(
            "metadata_url",
            "https://hads.ncep.noaa.gov/nexhads2/servlet/DCPInfo",
        )
        self.obs_retrieval_url = kwargs.get(
            "obs_retrieval_url",
            "https://hads.ncep.noaa.gov/nexhads2/servlet/DecodedData",
        )

        self.station_codes = None
        self.parser = HadsParser()

    def clear(self):
        super(Hads, self).clear()

        self.station_codes = None

    @Collector.bbox.setter
    def bbox(self, bbox):
        Collector.bbox.fset(self, bbox)
        self.station_codes = None

    @Collector.features.setter
    def features(self, features):
        Collector.features.fset(self, features)
        self.station_codes = None

    def list_variables(self):
        """
        List available variables and applies any filters.
        """
        station_codes = self._get_station_codes()
        station_codes = self._apply_features_filter(station_codes)
        variables = self._list_variables(station_codes)

        if hasattr(self, "_variables") and self.variables is not None:
            variables.intersection_update(set(self.variables))

        return list(variables)

    def _list_variables(self, station_codes):
        """
        Internal helper to list the variables for the given station codes.
        """
        # sample output from obs retrieval:
        #
        # DD9452D0
        #     HP(SRBM5)
        #         2013-07-22 19:30 45.97
        #     HT(SRBM5)
        #         2013-07-22 19:30 44.29
        #     PC(SRBM5)
        #         2013-07-22 19:30 36.19
        #
        rvar = re.compile(r"\n\s([A-Z]{2}[A-Z0-9]{0,1})\(\w+\)")

        variables = set()
        resp = requests.post(
            self.obs_retrieval_url,
            data={
                "state": "nil",
                "hsa": "nil",
                "of": "3",
                "extraids": " ".join(station_codes),
                "sinceday": -1,
            },
        )
        resp.raise_for_status()

        list(map(variables.add, rvar.findall(resp.text)))
        return variables

    def list_features(self):
        station_codes = self._get_station_codes()
        station_codes = self._apply_features_filter(station_codes)

        return station_codes

    def collect(self, **kwargs):
        var_filter = None
        if hasattr(self, "_variables"):
            var_filter = self._variables

        time_extents = (
            self.start_time if hasattr(self, "start_time") else None,
            self.end_time if hasattr(self, "end_time") else None,
        )

        metadata, raw_data = self.raw(**kwargs)
        return self.parser.parse(metadata, raw_data, var_filter, time_extents)

    def raw(self, format=None, **kwargs):
        """
        Returns a tuple of (metadata, raw data)
        """
        station_codes = self._apply_features_filter(self._get_station_codes())
        metadata = self._get_metadata(station_codes, **kwargs)
        raw_data = self._get_raw_data(station_codes, **kwargs)

        return (metadata, raw_data)

    def _apply_features_filter(self, station_codes):
        """
        If the features filter is set, this will return the intersection of
        those filter items and the given station codes.
        """
        # apply features filter
        if hasattr(self, "features") and self.features is not None:
            station_codes = set(station_codes)
            station_codes = list(
                station_codes.intersection(set(self.features))
            )

        return station_codes

    def _get_metadata(self, station_codes, **kwargs):
        if "verify" in kwargs:
            verify_cert = kwargs["verify"]
        else:
            verify_cert = True  # the default for requests

        resp = requests.post(
            self.metadata_url,
            data={
                "state": "nil",
                "hsa": "nil",
                "of": "1",
                "extraids": " ".join(station_codes),
                "data": "Get Meta Data",
            },
            verify=verify_cert,
        )
        resp.raise_for_status()
        return resp.text

    def _get_station_codes(self, force=False):
        """
        Gets and caches a list of station codes optionally within a bbox.

        Will return the cached version if it exists unless force is True.
        """
        if not force and self.station_codes is not None:
            return self.station_codes

        state_urls = self._get_state_urls()

        # filter by bounding box against a shapefile
        state_matches = None

        if self.bbox:
            with collection(
                os.path.join(
                    "resources",
                    "ne_50m_admin_1_states_provinces_lakes_shp.shp",
                ),
                "r",
            ) as c:
                geom_matches = [
                    x["properties"] for x in c.filter(bbox=self.bbox)
                ]
                state_matches = [
                    x["postal"] if x["admin"] != "Canada" else "CN"
                    for x in geom_matches
                ]

        self.station_codes = []

        for state_url in state_urls:
            if state_matches is not None:
                state_abbr = state_url.split("/")[-1].split(".")[0]
                if state_abbr not in state_matches:
                    continue

            self.station_codes.extend(self._get_stations_for_state(state_url))

        if self.bbox:
            # retrieve metadata for all stations to properly filter them
            metadata = self._get_metadata(self.station_codes)
            parsed_metadata = self.parser._parse_metadata(metadata)

            def in_bbox(code):
                lat = parsed_metadata[code]["latitude"]
                lon = parsed_metadata[code]["longitude"]

                return (
                    lon >= self.bbox[0]
                    and lon <= self.bbox[2]
                    and lat >= self.bbox[1]
                    and lat <= self.bbox[3]
                )

            self.station_codes = list(filter(in_bbox, self.station_codes))

        return self.station_codes

    def _get_state_urls(self):
        root = BeautifulSoup(requests.get(self.states_url).text)
        areas = root.find_all("area")
        return list({x.attrs.get("href", None) for x in areas})

    def _get_stations_for_state(self, state_url):
        state_root = BeautifulSoup(requests.get(state_url).text)
        return [
            x
            for x in [
                x.attrs["href"].split("nesdis_id=")[-1]
                for x in state_root.find_all("a")
            ]
            if len(x) > 0
        ]

    def _get_raw_data(self, station_codes, **kwargs):
        if "verify" in kwargs:
            verify_cert = kwargs["verify"]
        else:
            verify_cert = True  # the default for requests

        since = 7
        if hasattr(self, "start_time") and self.start_time is not None:
            # calc delta between now and start_time
            timediff = (
                datetime.utcnow().replace(tzinfo=pytz.utc) - self.start_time
            )

            if timediff.days == 0:
                if timediff.seconds / 60 / 60 > 0:
                    since = -(timediff.seconds / 60 / 60)
                elif timediff.seconds / 60 > 0:
                    since = -1  # 1 hour minimum resolution
            else:
                since = min(7, timediff.days)  # max of 7 days

        resp = requests.post(
            self.obs_retrieval_url,
            data={
                "state": "nil",
                "hsa": "nil",
                "of": "1",
                "extraids": " ".join(station_codes),
                "sinceday": since,
            },
            verify=verify_cert,
        )
        resp.raise_for_status()

        return resp.text