def setUp(self): self.hp = HadsParser() # captured metadata 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.metadata = u'|17BC752E|WKGR1|CHIPUXET RIVER AT WEST KINGSTON|41 28 56|-71 33 06|BOX|RI|USGS01|SI|83 |002840|60|HG|15,-9|0.01,-9|0.0|13|0.0|-0.01|VB|60,-9|0.3124,-9|0.311|28|0.0|0.0|\r\n|CE4D0268|FOXR1|FOXPOINT HURRICANE BARRIER|41 48 57|-71 24 07|BOX|RI|CENED1|SU|161 |000100|30|HM|60,-9|0.01,-9|0.0|1|0.0|0.0|PA|60,-9|0.01,-9|0.0|1|0.0|0.0|TA|60,-9|0.1,-9|0.0|1|0.0|0.0|US|60,-9|1,-9|0.0|1|0.0|0.0|UD|60,-9|1,-9|0.0|1|0.0|0.0|\r\n|DD182264|USQR1|USQUEPAUG RIVER NEAR USQUEPAUG|41 28 36|-71 36 19|BOX|RI|USGS01|SI|83 |005830|60|HG|15,-9|0.01,-9|0.0|13|0.0|0.0|VB|60,-9|0.3124,-9|0.311|58|0.0|0.0|\r\n' # captured data 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.raw_data = u'CE4D0268|FOXR1|HM|2013-07-26 16:30|4.94| |\r\nCE4D0268|FOXR1|HM|2013-07-26 17:00|4.41| |\r\nCE4D0268|FOXR1|PA|2013-07-26 16:30|29.93| |\r\nCE4D0268|FOXR1|PA|2013-07-26 17:00|29.93| |\r\nCE4D0268|FOXR1|TA|2013-07-26 16:30|66.20| |\r\nCE4D0268|FOXR1|TA|2013-07-26 17:00|67.30| |\r\nCE4D0268|FOXR1|US|2013-07-26 16:30|5.00| |\r\nCE4D0268|FOXR1|US|2013-07-26 17:00|8.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 16:30|358.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 17:00|353.00| |\r\nDD182264|USQR1|HG|2013-07-26 16:30|3.07| |\r\nDD182264|USQR1|HG|2013-07-26 16:45|3.07| |\r\n'
def __init__(self, **kwargs): super(Hads, self).__init__() self.states_url = kwargs.get('states_url', "http://amazon.nws.noaa.gov/hads/goog_earth/") self.metadata_url = kwargs.get('metadata_url', "http://amazon.nws.noaa.gov/nexhads2/servlet/DCPInfo") self.obs_retrieval_url = kwargs.get('obs_retrieval_url', "http://amazon.nws.noaa.gov/nexhads2/servlet/DecodedData") self.station_codes = None self.parser = HadsParser()
def __init__(self, **kwargs): super(Hads, self).__init__() self.states_url = kwargs.get( "states_url", "https://hads.ncep.noaa.gov/hads/goog_earth/") self.metadata_url = kwargs.get( "metadata_url", "https://hads.ncep.noaa.gov/nexhads2/servlet/DCPInfo", ) self.obs_retrieval_url = kwargs.get( "obs_retrieval_url", "https://hads.ncep.noaa.gov/nexhads2/servlet/DecodedData", ) self.station_codes = None self.parser = HadsParser()
def setUp(self): self.hp = HadsParser() # captured metadata 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.metadata = u"|17BC752E|WKGR1|CHIPUXET RIVER AT WEST KINGSTON|41 28 56|-71 33 06|BOX|RI|USGS01|SI|83 |002840|60|HG|15,-9|0.01,-9|0.0|13|0.0|-0.01|VB|60,-9|0.3124,-9|0.311|28|0.0|0.0|\r\n|CE4D0268|FOXR1|FOXPOINT HURRICANE BARRIER|41 48 57|-71 24 07|BOX|RI|CENED1|SU|161 |000100|30|HM|60,-9|0.01,-9|0.0|1|0.0|0.0|PA|60,-9|0.01,-9|0.0|1|0.0|0.0|TA|60,-9|0.1,-9|0.0|1|0.0|0.0|US|60,-9|1,-9|0.0|1|0.0|0.0|UD|60,-9|1,-9|0.0|1|0.0|0.0|\r\n|DD182264|USQR1|USQUEPAUG RIVER NEAR USQUEPAUG|41 28 36|-71 36 19|BOX|RI|USGS01|SI|83 |005830|60|HG|15,-9|0.01,-9|0.0|13|0.0|0.0|VB|60,-9|0.3124,-9|0.311|58|0.0|0.0|\r\n" # noqa # captured data 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.raw_data = u"CE4D0268|FOXR1|HM|2013-07-26 16:30|4.94| |\r\nCE4D0268|FOXR1|HM|2013-07-26 17:00|4.41| |\r\nCE4D0268|FOXR1|PA|2013-07-26 16:30|29.93| |\r\nCE4D0268|FOXR1|PA|2013-07-26 17:00|29.93| |\r\nCE4D0268|FOXR1|TA|2013-07-26 16:30|66.20| |\r\nCE4D0268|FOXR1|TA|2013-07-26 17:00|67.30| |\r\nCE4D0268|FOXR1|US|2013-07-26 16:30|5.00| |\r\nCE4D0268|FOXR1|US|2013-07-26 17:00|8.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 16:30|358.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 17:00|353.00| |\r\nDD182264|USQR1|HG|2013-07-26 16:30|3.07| |\r\nDD182264|USQR1|HG|2013-07-26 16:45|3.07| |\r\n" # noqa
def __init__(self, **kwargs): super(Hads, self).__init__() self.states_url = kwargs.get( "states_url", "https://hads.ncep.noaa.gov/hads/goog_earth/" ) self.metadata_url = kwargs.get( "metadata_url", "https://hads.ncep.noaa.gov/nexhads2/servlet/DCPInfo", ) self.obs_retrieval_url = kwargs.get( "obs_retrieval_url", "https://hads.ncep.noaa.gov/nexhads2/servlet/DecodedData", ) self.station_codes = None self.parser = HadsParser()
class Hads(Collector): def __init__(self, **kwargs): super(Hads, self).__init__() self.states_url = kwargs.get( 'states_url', "https://hads.ncep.noaa.gov/hads/goog_earth/") self.metadata_url = kwargs.get( 'metadata_url', "https://hads.ncep.noaa.gov/nexhads2/servlet/DCPInfo") self.obs_retrieval_url = kwargs.get( 'obs_retrieval_url', "https://hads.ncep.noaa.gov/nexhads2/servlet/DecodedData") self.station_codes = None self.parser = HadsParser() def clear(self): super(Hads, self).clear() self.station_codes = None @Collector.bbox.setter def bbox(self, bbox): Collector.bbox.fset(self, bbox) self.station_codes = None @Collector.features.setter def features(self, features): Collector.features.fset(self, features) self.station_codes = None def list_variables(self): """ List available variables and applies any filters. """ station_codes = self._get_station_codes() station_codes = self._apply_features_filter(station_codes) variables = self._list_variables(station_codes) if hasattr(self, '_variables') and self.variables is not None: variables.intersection_update(set(self.variables)) return list(variables) def _list_variables(self, station_codes): """ Internal helper to list the variables for the given station codes. """ # sample output from obs retrieval: # # DD9452D0 # HP(SRBM5) # 2013-07-22 19:30 45.97 # HT(SRBM5) # 2013-07-22 19:30 44.29 # PC(SRBM5) # 2013-07-22 19:30 36.19 # rvar = re.compile("""\n\s([A-Z]{2}[A-Z0-9]{0,1})\(\w+\)""") variables = set() resp = requests.post(self.obs_retrieval_url, data={ 'state': 'nil', 'hsa': 'nil', 'of': '3', 'extraids': " ".join(station_codes), 'sinceday': -1 }) resp.raise_for_status() list(map(variables.add, rvar.findall(resp.text))) return variables def list_features(self): station_codes = self._get_station_codes() station_codes = self._apply_features_filter(station_codes) return station_codes def collect(self): var_filter = None if hasattr(self, '_variables'): var_filter = self._variables time_extents = (self.start_time if hasattr(self, 'start_time') else None, self.end_time if hasattr(self, 'end_time') else None) metadata, raw_data = self.raw() return self.parser.parse(metadata, raw_data, var_filter, time_extents) def raw(self, format=None): """ Returns a tuple of (metadata, raw data) """ station_codes = self._apply_features_filter(self._get_station_codes()) metadata = self._get_metadata(station_codes) raw_data = self._get_raw_data(station_codes) return (metadata, raw_data) def _apply_features_filter(self, station_codes): """ If the features filter is set, this will return the intersection of those filter items and the given station codes. """ # apply features filter if hasattr(self, 'features') and self.features is not None: station_codes = set(station_codes) station_codes = list(station_codes.intersection(set( self.features))) return station_codes def _get_metadata(self, station_codes): resp = requests.post(self.metadata_url, data={ 'state': 'nil', 'hsa': 'nil', 'of': '1', 'extraids': " ".join(station_codes), 'data': "Get Meta Data" }) resp.raise_for_status() return resp.text def _get_station_codes(self, force=False): """ Gets and caches a list of station codes optionally within a bbox. Will return the cached version if it exists unless force is True. """ if not force and self.station_codes is not None: return self.station_codes state_urls = self._get_state_urls() # filter by bounding box against a shapefile state_matches = None if self.bbox: with collection( os.path.join( "resources", "ne_50m_admin_1_states_provinces_lakes_shp.shp"), "r") as c: geom_matches = [ x['properties'] for x in c.filter(bbox=self.bbox) ] state_matches = [ x['postal'] if x['admin'] != 'Canada' else 'CN' for x in geom_matches ] self.station_codes = [] for state_url in state_urls: if state_matches is not None: state_abbr = state_url.split("/")[-1].split(".")[0] if state_abbr not in state_matches: continue self.station_codes.extend(self._get_stations_for_state(state_url)) if self.bbox: # retreive metadata for all stations to properly filter them metadata = self._get_metadata(self.station_codes) parsed_metadata = self.parser._parse_metadata(metadata) def in_bbox(code): lat = parsed_metadata[code]['latitude'] lon = parsed_metadata[code]['longitude'] return lon >= self.bbox[0] and lon <= self.bbox[ 2] and lat >= self.bbox[1] and lat <= self.bbox[3] self.station_codes = list(filter(in_bbox, self.station_codes)) return self.station_codes def _get_state_urls(self): root = BeautifulSoup(requests.get(self.states_url).text) areas = root.find_all("area") return list(set([x.attrs.get('href', None) for x in areas])) def _get_stations_for_state(self, state_url): state_root = BeautifulSoup(requests.get(state_url).text) return [ x for x in [ x.attrs['href'].split("nesdis_id=")[-1] for x in state_root.find_all('a') ] if len(x) > 0 ] def _get_raw_data(self, station_codes): since = 7 if hasattr(self, 'start_time') and self.start_time is not None: # calc delta between now and start_time timediff = datetime.utcnow().replace( tzinfo=pytz.utc) - self.start_time if timediff.days == 0: if timediff.seconds / 60 / 60 > 0: since = -(timediff.seconds / 60 / 60) elif timediff.seconds / 60 > 0: since = -1 # 1 hour minimum resolution else: since = min(7, timediff.days) # max of 7 days resp = requests.post(self.obs_retrieval_url, data={ 'state': 'nil', 'hsa': 'nil', 'of': '1', 'extraids': " ".join(station_codes), 'sinceday': since }) resp.raise_for_status() return resp.text
class HadsParserTest(unittest.TestCase): def setUp(self): self.hp = HadsParser() # captured metadata 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.metadata = u'|17BC752E|WKGR1|CHIPUXET RIVER AT WEST KINGSTON|41 28 56|-71 33 06|BOX|RI|USGS01|SI|83 |002840|60|HG|15,-9|0.01,-9|0.0|13|0.0|-0.01|VB|60,-9|0.3124,-9|0.311|28|0.0|0.0|\r\n|CE4D0268|FOXR1|FOXPOINT HURRICANE BARRIER|41 48 57|-71 24 07|BOX|RI|CENED1|SU|161 |000100|30|HM|60,-9|0.01,-9|0.0|1|0.0|0.0|PA|60,-9|0.01,-9|0.0|1|0.0|0.0|TA|60,-9|0.1,-9|0.0|1|0.0|0.0|US|60,-9|1,-9|0.0|1|0.0|0.0|UD|60,-9|1,-9|0.0|1|0.0|0.0|\r\n|DD182264|USQR1|USQUEPAUG RIVER NEAR USQUEPAUG|41 28 36|-71 36 19|BOX|RI|USGS01|SI|83 |005830|60|HG|15,-9|0.01,-9|0.0|13|0.0|0.0|VB|60,-9|0.3124,-9|0.311|58|0.0|0.0|\r\n' # captured data 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.raw_data = u'CE4D0268|FOXR1|HM|2013-07-26 16:30|4.94| |\r\nCE4D0268|FOXR1|HM|2013-07-26 17:00|4.41| |\r\nCE4D0268|FOXR1|PA|2013-07-26 16:30|29.93| |\r\nCE4D0268|FOXR1|PA|2013-07-26 17:00|29.93| |\r\nCE4D0268|FOXR1|TA|2013-07-26 16:30|66.20| |\r\nCE4D0268|FOXR1|TA|2013-07-26 17:00|67.30| |\r\nCE4D0268|FOXR1|US|2013-07-26 16:30|5.00| |\r\nCE4D0268|FOXR1|US|2013-07-26 17:00|8.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 16:30|358.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 17:00|353.00| |\r\nDD182264|USQR1|HG|2013-07-26 16:30|3.07| |\r\nDD182264|USQR1|HG|2013-07-26 16:45|3.07| |\r\n' def test_parse(self): station_collection = self.hp.parse(self.metadata, self.raw_data, None, (None, None)) assert isinstance(station_collection, StationCollection) station_collection.calculate_bounds() assert station_collection.size == 3 def test__parse_metadata(self): res = { u'17BC752E': { 'channel': u'83 ', 'hsa': u'BOX', 'init_transmit': u'002840', 'latitude': 41.48222222222222, 'location_text': u'CHIPUXET RIVER AT WEST KINGSTON', 'longitude': -70.44833333333334, 'manufacturer': u'SI', 'nesdis_id': u'17BC752E', 'nwsli': u'WKGR1', 'owner': u'USGS01', 'state': u'RI', 'variables': { u'HG': { 'base_elevation': 0.0, 'coefficient': u'0.01,-9', 'constant': u'0.0', 'data_interval': u'15,-9', 'gauge_correction': -0.01, 'time_offset': u'13' }, u'VB': { 'base_elevation': 0.0, 'coefficient': u'0.3124,-9', 'constant': u'0.311', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'28' } } }, u'CE4D0268': { 'channel': u'161 ', 'hsa': u'BOX', 'init_transmit': u'000100', 'latitude': 41.81583333333333, 'location_text': u'FOXPOINT HURRICANE BARRIER', 'longitude': -70.59805555555556, 'manufacturer': u'SU', 'nesdis_id': u'CE4D0268', 'nwsli': u'FOXR1', 'owner': u'CENED1', 'state': u'RI', 'variables': { u'HM': { 'base_elevation': 0.0, 'coefficient': u'0.01,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1' }, u'PA': { 'base_elevation': 0.0, 'coefficient': u'0.01,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1' }, u'TA': { 'base_elevation': 0.0, 'coefficient': u'0.1,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1' }, u'UD': { 'base_elevation': 0.0, 'coefficient': u'1,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1' }, u'US': { 'base_elevation': 0.0, 'coefficient': u'1,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1' } } }, u'DD182264': { 'channel': u'83 ', 'hsa': u'BOX', 'init_transmit': u'005830', 'latitude': 41.47666666666667, 'location_text': u'USQUEPAUG RIVER NEAR USQUEPAUG', 'longitude': -70.39472222222223, 'manufacturer': u'SI', 'nesdis_id': u'DD182264', 'nwsli': u'USQR1', 'owner': u'USGS01', 'state': u'RI', 'variables': { u'HG': { 'base_elevation': 0.0, 'coefficient': u'0.01,-9', 'constant': u'0.0', 'data_interval': u'15,-9', 'gauge_correction': 0.0, 'time_offset': u'13' }, u'VB': { 'base_elevation': 0.0, 'coefficient': u'0.3124,-9', 'constant': u'0.311', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'58' } } } } parsed = self.hp._parse_metadata(self.metadata) assert parsed == res def test__parse_data(self): res = { u'CE4D0268': [(u'HM', datetime.datetime(2013, 7, 26, 16, 30), u'4.94'), (u'HM', datetime.datetime(2013, 7, 26, 17, 0), u'4.41'), (u'PA', datetime.datetime(2013, 7, 26, 16, 30), u'29.93'), (u'PA', datetime.datetime(2013, 7, 26, 17, 0), u'29.93'), (u'TA', datetime.datetime(2013, 7, 26, 16, 30), u'66.20'), (u'TA', datetime.datetime(2013, 7, 26, 17, 0), u'67.30'), (u'US', datetime.datetime(2013, 7, 26, 16, 30), u'5.00'), (u'US', datetime.datetime(2013, 7, 26, 17, 0), u'8.00'), (u'UD', datetime.datetime(2013, 7, 26, 16, 30), u'358.00'), (u'UD', datetime.datetime(2013, 7, 26, 17, 0), u'353.00')], u'DD182264': [(u'HG', datetime.datetime(2013, 7, 26, 16, 30), u'3.07'), (u'HG', datetime.datetime(2013, 7, 26, 16, 45), u'3.07')] } parsed = self.hp._parse_data(self.raw_data, None, (None, None)) assert parsed == res def test__parse_data_with_var_filter(self): res = { u'CE4D0268': [(u'HM', datetime.datetime(2013, 7, 26, 16, 30), u'4.94'), (u'HM', datetime.datetime(2013, 7, 26, 17, 0), u'4.41'), (u'UD', datetime.datetime(2013, 7, 26, 16, 30), u'358.00'), (u'UD', datetime.datetime(2013, 7, 26, 17, 0), u'353.00')] } parsed = self.hp._parse_data(self.raw_data, [u'HM', u'UD'], (None, None))
class Hads(Collector): def __init__(self, **kwargs): super(Hads, self).__init__() self.states_url = kwargs.get('states_url', "http://amazon.nws.noaa.gov/hads/goog_earth/") self.metadata_url = kwargs.get('metadata_url', "http://amazon.nws.noaa.gov/nexhads2/servlet/DCPInfo") self.obs_retrieval_url = kwargs.get('obs_retrieval_url', "http://amazon.nws.noaa.gov/nexhads2/servlet/DecodedData") self.station_codes = None self.parser = HadsParser() def clear(self): super(Hads, self).clear() self.station_codes = None @Collector.bbox.setter def bbox(self, bbox): Collector.bbox.fset(self, bbox) self.station_codes = None @Collector.features.setter def features(self, features): Collector.features.fset(self, features) self.station_codes = None def list_variables(self): """ List available variables and applies any filters. """ station_codes = self._get_station_codes() station_codes = self._apply_features_filter(station_codes) variables = self._list_variables(station_codes) if hasattr(self, '_variables') and self.variables is not None: variables.intersection_update(set(self.variables)) return list(variables) def _list_variables(self, station_codes): """ Internal helper to list the variables for the given station codes. """ # sample output from obs retrieval: # # DD9452D0 # HP(SRBM5) # 2013-07-22 19:30 45.97 # HT(SRBM5) # 2013-07-22 19:30 44.29 # PC(SRBM5) # 2013-07-22 19:30 36.19 # rvar = re.compile("""\n\s([A-Z]{2}[A-Z0-9]{0,1})\(\w+\)""") variables = set() resp = requests.post(self.obs_retrieval_url, data={'state' : 'nil', 'hsa' : 'nil', 'of' : '3', 'extraids' : " ".join(station_codes), 'sinceday' : -1}) resp.raise_for_status() map(variables.add, rvar.findall(resp.text)) return variables def list_features(self): station_codes = self._get_station_codes() station_codes = self._apply_features_filter(station_codes) return station_codes def collect(self): var_filter = None if hasattr(self, '_variables'): var_filter = self._variables time_extents = (self.start_time if hasattr(self, 'start_time') else None, self.end_time if hasattr(self, 'end_time') else None) metadata, raw_data = self.raw() return self.parser.parse(metadata, raw_data, var_filter, time_extents) def raw(self, format=None): """ Returns a tuple of (metadata, raw data) """ station_codes = self._apply_features_filter(self._get_station_codes()) metadata = self._get_metadata(station_codes) raw_data = self._get_raw_data(station_codes) return (metadata, raw_data) def _apply_features_filter(self, station_codes): """ If the features filter is set, this will return the intersection of those filter items and the given station codes. """ # apply features filter if hasattr(self, 'features') and self.features is not None: station_codes = set(station_codes) station_codes = list(station_codes.intersection(set(self.features))) return station_codes def _get_metadata(self, station_codes): resp = requests.post(self.metadata_url, data={'state' : 'nil', 'hsa' : 'nil', 'of' : '1', 'extraids' : " ".join(station_codes), 'data' : "Get Meta Data"}) resp.raise_for_status() return resp.text def _get_station_codes(self, force=False): """ Gets and caches a list of station codes optionally within a bbox. Will return the cached version if it exists unless force is True. """ if not force and self.station_codes is not None: return self.station_codes state_urls = self._get_state_urls() # filter by bounding box against a shapefile state_matches = None if self.bbox: with collection(os.path.join("resources", "ne_50m_admin_1_states_provinces_lakes_shp.shp"), "r") as c: geom_matches = map(lambda x: x['properties'], c.filter(bbox=self.bbox)) state_matches = map(lambda x: x['postal'] if x['admin'] != 'Canada' else u'CN', geom_matches) self.station_codes = [] for state_url in state_urls: if state_matches is not None: state_abbr = state_url.split("/")[-1].split(".")[0] if state_abbr not in state_matches: continue self.station_codes.extend(self._get_stations_for_state(state_url)) if self.bbox: # retreive metadata for all stations to properly filter them metadata = self._get_metadata(self.station_codes) parsed_metadata = self.parser._parse_metadata(metadata) def in_bbox(code): lat = parsed_metadata[code]['latitude'] lon = parsed_metadata[code]['longitude'] return lon >= self.bbox[0] and lon <= self.bbox[2] and lat >= self.bbox[1] and lat <= self.bbox[3] self.station_codes = filter(in_bbox, self.station_codes) return self.station_codes def _get_state_urls(self): root = BeautifulSoup(requests.get(self.states_url).text) areas = root.find_all("area") return list(set(map(lambda x: x.attrs.get('href', None), areas))) def _get_stations_for_state(self, state_url): #print state_url state_root = BeautifulSoup(requests.get(state_url).text) return filter(lambda x: len(x) > 0, map(lambda x: x.attrs['href'].split("nesdis_id=")[-1], state_root.find_all('a'))) def _get_raw_data(self, station_codes): since = 7 if hasattr(self, 'start_time') and self.start_time is not None: # calc delta between now and start_time timediff = datetime.utcnow().replace(tzinfo=pytz.utc) - self.start_time if timediff.days == 0: if timediff.seconds / 60 / 60 > 0: since = -(timediff.seconds / 60 / 60) elif timediff.seconds / 60 > 0: since = -1 # 1 hour minimum resolution else: since = min(7, timediff.days) # max of 7 days resp = requests.post(self.obs_retrieval_url, data={'state' : 'nil', 'hsa' : 'nil', 'of' : '1', 'extraids' : " ".join(station_codes), 'sinceday' : since}) resp.raise_for_status() return resp.text
class HadsParserTest(unittest.TestCase): def setUp(self): self.hp = HadsParser() # captured metadata 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.metadata = u"|17BC752E|WKGR1|CHIPUXET RIVER AT WEST KINGSTON|41 28 56|-71 33 06|BOX|RI|USGS01|SI|83 |002840|60|HG|15,-9|0.01,-9|0.0|13|0.0|-0.01|VB|60,-9|0.3124,-9|0.311|28|0.0|0.0|\r\n|CE4D0268|FOXR1|FOXPOINT HURRICANE BARRIER|41 48 57|-71 24 07|BOX|RI|CENED1|SU|161 |000100|30|HM|60,-9|0.01,-9|0.0|1|0.0|0.0|PA|60,-9|0.01,-9|0.0|1|0.0|0.0|TA|60,-9|0.1,-9|0.0|1|0.0|0.0|US|60,-9|1,-9|0.0|1|0.0|0.0|UD|60,-9|1,-9|0.0|1|0.0|0.0|\r\n|DD182264|USQR1|USQUEPAUG RIVER NEAR USQUEPAUG|41 28 36|-71 36 19|BOX|RI|USGS01|SI|83 |005830|60|HG|15,-9|0.01,-9|0.0|13|0.0|0.0|VB|60,-9|0.3124,-9|0.311|58|0.0|0.0|\r\n" # noqa # captured data 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.raw_data = u"CE4D0268|FOXR1|HM|2013-07-26 16:30|4.94| |\r\nCE4D0268|FOXR1|HM|2013-07-26 17:00|4.41| |\r\nCE4D0268|FOXR1|PA|2013-07-26 16:30|29.93| |\r\nCE4D0268|FOXR1|PA|2013-07-26 17:00|29.93| |\r\nCE4D0268|FOXR1|TA|2013-07-26 16:30|66.20| |\r\nCE4D0268|FOXR1|TA|2013-07-26 17:00|67.30| |\r\nCE4D0268|FOXR1|US|2013-07-26 16:30|5.00| |\r\nCE4D0268|FOXR1|US|2013-07-26 17:00|8.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 16:30|358.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 17:00|353.00| |\r\nDD182264|USQR1|HG|2013-07-26 16:30|3.07| |\r\nDD182264|USQR1|HG|2013-07-26 16:45|3.07| |\r\n" # noqa def test_parse(self): station_collection = self.hp.parse( self.metadata, self.raw_data, None, (None, None) ) assert isinstance(station_collection, StationCollection) station_collection.calculate_bounds() assert station_collection.size == 3 def test__parse_metadata(self): res = { u"17BC752E": { "channel": u"83 ", "hsa": u"BOX", "init_transmit": u"002840", "latitude": 41.48222222222222, "location_text": u"CHIPUXET RIVER AT WEST KINGSTON", "longitude": -70.44833333333334, "manufacturer": u"SI", "nesdis_id": u"17BC752E", "nwsli": u"WKGR1", "owner": u"USGS01", "state": u"RI", "variables": { u"HG": { "base_elevation": 0.0, "coefficient": u"0.01,-9", "constant": u"0.0", "data_interval": u"15,-9", "gauge_correction": -0.01, "time_offset": u"13", }, u"VB": { "base_elevation": 0.0, "coefficient": u"0.3124,-9", "constant": u"0.311", "data_interval": u"60,-9", "gauge_correction": 0.0, "time_offset": u"28", }, }, }, u"CE4D0268": { "channel": u"161 ", "hsa": u"BOX", "init_transmit": u"000100", "latitude": 41.81583333333333, "location_text": u"FOXPOINT HURRICANE BARRIER", "longitude": -70.59805555555556, "manufacturer": u"SU", "nesdis_id": u"CE4D0268", "nwsli": u"FOXR1", "owner": u"CENED1", "state": u"RI", "variables": { u"HM": { "base_elevation": 0.0, "coefficient": u"0.01,-9", "constant": u"0.0", "data_interval": u"60,-9", "gauge_correction": 0.0, "time_offset": u"1", }, u"PA": { "base_elevation": 0.0, "coefficient": u"0.01,-9", "constant": u"0.0", "data_interval": u"60,-9", "gauge_correction": 0.0, "time_offset": u"1", }, u"TA": { "base_elevation": 0.0, "coefficient": u"0.1,-9", "constant": u"0.0", "data_interval": u"60,-9", "gauge_correction": 0.0, "time_offset": u"1", }, u"UD": { "base_elevation": 0.0, "coefficient": u"1,-9", "constant": u"0.0", "data_interval": u"60,-9", "gauge_correction": 0.0, "time_offset": u"1", }, u"US": { "base_elevation": 0.0, "coefficient": u"1,-9", "constant": u"0.0", "data_interval": u"60,-9", "gauge_correction": 0.0, "time_offset": u"1", }, }, }, u"DD182264": { "channel": u"83 ", "hsa": u"BOX", "init_transmit": u"005830", "latitude": 41.47666666666667, "location_text": u"USQUEPAUG RIVER NEAR USQUEPAUG", "longitude": -70.39472222222223, "manufacturer": u"SI", "nesdis_id": u"DD182264", "nwsli": u"USQR1", "owner": u"USGS01", "state": u"RI", "variables": { u"HG": { "base_elevation": 0.0, "coefficient": u"0.01,-9", "constant": u"0.0", "data_interval": u"15,-9", "gauge_correction": 0.0, "time_offset": u"13", }, u"VB": { "base_elevation": 0.0, "coefficient": u"0.3124,-9", "constant": u"0.311", "data_interval": u"60,-9", "gauge_correction": 0.0, "time_offset": u"58", }, }, }, } parsed = self.hp._parse_metadata(self.metadata) assert parsed == res def test__parse_data(self): res = { u"CE4D0268": [ ( u"HM", datetime.datetime(2013, 7, 26, 16, 30).replace( tzinfo=pytz.utc ), 4.94, ), ( u"HM", datetime.datetime(2013, 7, 26, 17, 0).replace( tzinfo=pytz.utc ), 4.41, ), ( u"PA", datetime.datetime(2013, 7, 26, 16, 30).replace( tzinfo=pytz.utc ), 29.93, ), ( u"PA", datetime.datetime(2013, 7, 26, 17, 0).replace( tzinfo=pytz.utc ), 29.93, ), ( u"TA", datetime.datetime(2013, 7, 26, 16, 30).replace( tzinfo=pytz.utc ), 66.20, ), ( u"TA", datetime.datetime(2013, 7, 26, 17, 0).replace( tzinfo=pytz.utc ), 67.30, ), ( u"US", datetime.datetime(2013, 7, 26, 16, 30).replace( tzinfo=pytz.utc ), 5.00, ), ( u"US", datetime.datetime(2013, 7, 26, 17, 0).replace( tzinfo=pytz.utc ), 8.00, ), ( u"UD", datetime.datetime(2013, 7, 26, 16, 30).replace( tzinfo=pytz.utc ), 358.00, ), ( u"UD", datetime.datetime(2013, 7, 26, 17, 0).replace( tzinfo=pytz.utc ), 353.00, ), ], u"DD182264": [ ( u"HG", datetime.datetime(2013, 7, 26, 16, 30).replace( tzinfo=pytz.utc ), 3.07, ), ( u"HG", datetime.datetime(2013, 7, 26, 16, 45).replace( tzinfo=pytz.utc ), 3.07, ), ], } parsed = self.hp._parse_data(self.raw_data, None, (None, None)) assert parsed == res def test__parse_data_with_var_filter(self): res = { u"CE4D0268": [ ( u"HM", datetime.datetime(2013, 7, 26, 16, 30).replace( tzinfo=pytz.utc ), 4.94, ), ( u"HM", datetime.datetime(2013, 7, 26, 17, 0).replace( tzinfo=pytz.utc ), 4.41, ), ( u"UD", datetime.datetime(2013, 7, 26, 16, 30).replace( tzinfo=pytz.utc ), 358.00, ), ( u"UD", datetime.datetime(2013, 7, 26, 17, 0).replace( tzinfo=pytz.utc ), 353.00, ), ] } parsed = self.hp._parse_data( self.raw_data, [u"HM", u"UD"], (None, None) ) assert parsed == res
class HadsParserTest(unittest.TestCase): def setUp(self): self.hp = HadsParser() # captured metadata 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.metadata = u'|17BC752E|WKGR1|CHIPUXET RIVER AT WEST KINGSTON|41 28 56|-71 33 06|BOX|RI|USGS01|SI|83 |002840|60|HG|15,-9|0.01,-9|0.0|13|0.0|-0.01|VB|60,-9|0.3124,-9|0.311|28|0.0|0.0|\r\n|CE4D0268|FOXR1|FOXPOINT HURRICANE BARRIER|41 48 57|-71 24 07|BOX|RI|CENED1|SU|161 |000100|30|HM|60,-9|0.01,-9|0.0|1|0.0|0.0|PA|60,-9|0.01,-9|0.0|1|0.0|0.0|TA|60,-9|0.1,-9|0.0|1|0.0|0.0|US|60,-9|1,-9|0.0|1|0.0|0.0|UD|60,-9|1,-9|0.0|1|0.0|0.0|\r\n|DD182264|USQR1|USQUEPAUG RIVER NEAR USQUEPAUG|41 28 36|-71 36 19|BOX|RI|USGS01|SI|83 |005830|60|HG|15,-9|0.01,-9|0.0|13|0.0|0.0|VB|60,-9|0.3124,-9|0.311|58|0.0|0.0|\r\n' # captured data 26 July 2013 for stations ['DD182264', '17BC752E', 'CE4D0268'] self.raw_data = u'CE4D0268|FOXR1|HM|2013-07-26 16:30|4.94| |\r\nCE4D0268|FOXR1|HM|2013-07-26 17:00|4.41| |\r\nCE4D0268|FOXR1|PA|2013-07-26 16:30|29.93| |\r\nCE4D0268|FOXR1|PA|2013-07-26 17:00|29.93| |\r\nCE4D0268|FOXR1|TA|2013-07-26 16:30|66.20| |\r\nCE4D0268|FOXR1|TA|2013-07-26 17:00|67.30| |\r\nCE4D0268|FOXR1|US|2013-07-26 16:30|5.00| |\r\nCE4D0268|FOXR1|US|2013-07-26 17:00|8.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 16:30|358.00| |\r\nCE4D0268|FOXR1|UD|2013-07-26 17:00|353.00| |\r\nDD182264|USQR1|HG|2013-07-26 16:30|3.07| |\r\nDD182264|USQR1|HG|2013-07-26 16:45|3.07| |\r\n' def test_parse(self): station_collection = self.hp.parse(self.metadata, self.raw_data, None, (None, None)) assert isinstance(station_collection, StationCollection) station_collection.calculate_bounds() assert station_collection.size == 3 def test__parse_metadata(self): res = {u'17BC752E': {'channel': u'83 ', 'hsa': u'BOX', 'init_transmit': u'002840', 'latitude': 41.48222222222222, 'location_text': u'CHIPUXET RIVER AT WEST KINGSTON', 'longitude': -70.44833333333334, 'manufacturer': u'SI', 'nesdis_id': u'17BC752E', 'nwsli': u'WKGR1', 'owner': u'USGS01', 'state': u'RI', 'variables': {u'HG': {'base_elevation': 0.0, 'coefficient': u'0.01,-9', 'constant': u'0.0', 'data_interval': u'15,-9', 'gauge_correction': -0.01, 'time_offset': u'13'}, u'VB': {'base_elevation': 0.0, 'coefficient': u'0.3124,-9', 'constant': u'0.311', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'28'}}}, u'CE4D0268': {'channel': u'161 ', 'hsa': u'BOX', 'init_transmit': u'000100', 'latitude': 41.81583333333333, 'location_text': u'FOXPOINT HURRICANE BARRIER', 'longitude': -70.59805555555556, 'manufacturer': u'SU', 'nesdis_id': u'CE4D0268', 'nwsli': u'FOXR1', 'owner': u'CENED1', 'state': u'RI', 'variables': {u'HM': {'base_elevation': 0.0, 'coefficient': u'0.01,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1'}, u'PA': {'base_elevation': 0.0, 'coefficient': u'0.01,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1'}, u'TA': {'base_elevation': 0.0, 'coefficient': u'0.1,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1'}, u'UD': {'base_elevation': 0.0, 'coefficient': u'1,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1'}, u'US': {'base_elevation': 0.0, 'coefficient': u'1,-9', 'constant': u'0.0', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'1'}}}, u'DD182264': {'channel': u'83 ', 'hsa': u'BOX', 'init_transmit': u'005830', 'latitude': 41.47666666666667, 'location_text': u'USQUEPAUG RIVER NEAR USQUEPAUG', 'longitude': -70.39472222222223, 'manufacturer': u'SI', 'nesdis_id': u'DD182264', 'nwsli': u'USQR1', 'owner': u'USGS01', 'state': u'RI', 'variables': {u'HG': {'base_elevation': 0.0, 'coefficient': u'0.01,-9', 'constant': u'0.0', 'data_interval': u'15,-9', 'gauge_correction': 0.0, 'time_offset': u'13'}, u'VB': {'base_elevation': 0.0, 'coefficient': u'0.3124,-9', 'constant': u'0.311', 'data_interval': u'60,-9', 'gauge_correction': 0.0, 'time_offset': u'58'}}}} parsed = self.hp._parse_metadata(self.metadata) assert parsed == res def test__parse_data(self): res = {u'CE4D0268': [(u'HM', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 4.94), (u'HM', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 4.41), (u'PA', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 29.93), (u'PA', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 29.93), (u'TA', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 66.20), (u'TA', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 67.30), (u'US', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 5.00), (u'US', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 8.00), (u'UD', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 358.00), (u'UD', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 353.00)], u'DD182264': [(u'HG', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 3.07), (u'HG', datetime.datetime(2013, 7, 26, 16, 45).replace(tzinfo=pytz.utc), 3.07)]} parsed = self.hp._parse_data(self.raw_data, None, (None, None)) assert parsed == res def test__parse_data_with_var_filter(self): res = {u'CE4D0268': [(u'HM', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 4.94), (u'HM', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 4.41), (u'UD', datetime.datetime(2013, 7, 26, 16, 30).replace(tzinfo=pytz.utc), 358.00), (u'UD', datetime.datetime(2013, 7, 26, 17, 0).replace(tzinfo=pytz.utc), 353.00)]} parsed = self.hp._parse_data(self.raw_data, [u'HM', u'UD'], (None, None)) assert parsed == res
class Hads(Collector): def __init__(self, **kwargs): super(Hads, self).__init__() self.states_url = kwargs.get( "states_url", "https://hads.ncep.noaa.gov/hads/goog_earth/" ) self.metadata_url = kwargs.get( "metadata_url", "https://hads.ncep.noaa.gov/nexhads2/servlet/DCPInfo", ) self.obs_retrieval_url = kwargs.get( "obs_retrieval_url", "https://hads.ncep.noaa.gov/nexhads2/servlet/DecodedData", ) self.station_codes = None self.parser = HadsParser() def clear(self): super(Hads, self).clear() self.station_codes = None @Collector.bbox.setter def bbox(self, bbox): Collector.bbox.fset(self, bbox) self.station_codes = None @Collector.features.setter def features(self, features): Collector.features.fset(self, features) self.station_codes = None def list_variables(self): """ List available variables and applies any filters. """ station_codes = self._get_station_codes() station_codes = self._apply_features_filter(station_codes) variables = self._list_variables(station_codes) if hasattr(self, "_variables") and self.variables is not None: variables.intersection_update(set(self.variables)) return list(variables) def _list_variables(self, station_codes): """ Internal helper to list the variables for the given station codes. """ # sample output from obs retrieval: # # DD9452D0 # HP(SRBM5) # 2013-07-22 19:30 45.97 # HT(SRBM5) # 2013-07-22 19:30 44.29 # PC(SRBM5) # 2013-07-22 19:30 36.19 # rvar = re.compile(r"\n\s([A-Z]{2}[A-Z0-9]{0,1})\(\w+\)") variables = set() resp = requests.post( self.obs_retrieval_url, data={ "state": "nil", "hsa": "nil", "of": "3", "extraids": " ".join(station_codes), "sinceday": -1, }, ) resp.raise_for_status() list(map(variables.add, rvar.findall(resp.text))) return variables def list_features(self): station_codes = self._get_station_codes() station_codes = self._apply_features_filter(station_codes) return station_codes def collect(self, **kwargs): var_filter = None if hasattr(self, "_variables"): var_filter = self._variables time_extents = ( self.start_time if hasattr(self, "start_time") else None, self.end_time if hasattr(self, "end_time") else None, ) metadata, raw_data = self.raw(**kwargs) return self.parser.parse(metadata, raw_data, var_filter, time_extents) def raw(self, format=None, **kwargs): """ Returns a tuple of (metadata, raw data) """ station_codes = self._apply_features_filter(self._get_station_codes()) metadata = self._get_metadata(station_codes, **kwargs) raw_data = self._get_raw_data(station_codes, **kwargs) return (metadata, raw_data) def _apply_features_filter(self, station_codes): """ If the features filter is set, this will return the intersection of those filter items and the given station codes. """ # apply features filter if hasattr(self, "features") and self.features is not None: station_codes = set(station_codes) station_codes = list( station_codes.intersection(set(self.features)) ) return station_codes def _get_metadata(self, station_codes, **kwargs): if "verify" in kwargs: verify_cert = kwargs["verify"] else: verify_cert = True # the default for requests resp = requests.post( self.metadata_url, data={ "state": "nil", "hsa": "nil", "of": "1", "extraids": " ".join(station_codes), "data": "Get Meta Data", }, verify=verify_cert, ) resp.raise_for_status() return resp.text def _get_station_codes(self, force=False): """ Gets and caches a list of station codes optionally within a bbox. Will return the cached version if it exists unless force is True. """ if not force and self.station_codes is not None: return self.station_codes state_urls = self._get_state_urls() # filter by bounding box against a shapefile state_matches = None if self.bbox: with collection( os.path.join( "resources", "ne_50m_admin_1_states_provinces_lakes_shp.shp", ), "r", ) as c: geom_matches = [ x["properties"] for x in c.filter(bbox=self.bbox) ] state_matches = [ x["postal"] if x["admin"] != "Canada" else "CN" for x in geom_matches ] self.station_codes = [] for state_url in state_urls: if state_matches is not None: state_abbr = state_url.split("/")[-1].split(".")[0] if state_abbr not in state_matches: continue self.station_codes.extend(self._get_stations_for_state(state_url)) if self.bbox: # retrieve metadata for all stations to properly filter them metadata = self._get_metadata(self.station_codes) parsed_metadata = self.parser._parse_metadata(metadata) def in_bbox(code): lat = parsed_metadata[code]["latitude"] lon = parsed_metadata[code]["longitude"] return ( lon >= self.bbox[0] and lon <= self.bbox[2] and lat >= self.bbox[1] and lat <= self.bbox[3] ) self.station_codes = list(filter(in_bbox, self.station_codes)) return self.station_codes def _get_state_urls(self): root = BeautifulSoup(requests.get(self.states_url).text) areas = root.find_all("area") return list({x.attrs.get("href", None) for x in areas}) def _get_stations_for_state(self, state_url): state_root = BeautifulSoup(requests.get(state_url).text) return [ x for x in [ x.attrs["href"].split("nesdis_id=")[-1] for x in state_root.find_all("a") ] if len(x) > 0 ] def _get_raw_data(self, station_codes, **kwargs): if "verify" in kwargs: verify_cert = kwargs["verify"] else: verify_cert = True # the default for requests since = 7 if hasattr(self, "start_time") and self.start_time is not None: # calc delta between now and start_time timediff = ( datetime.utcnow().replace(tzinfo=pytz.utc) - self.start_time ) if timediff.days == 0: if timediff.seconds / 60 / 60 > 0: since = -(timediff.seconds / 60 / 60) elif timediff.seconds / 60 > 0: since = -1 # 1 hour minimum resolution else: since = min(7, timediff.days) # max of 7 days resp = requests.post( self.obs_retrieval_url, data={ "state": "nil", "hsa": "nil", "of": "1", "extraids": " ".join(station_codes), "sinceday": since, }, verify=verify_cert, ) resp.raise_for_status() return resp.text