def get(self, url_part, params=None, headers=None): """ Login Data Source if not already logged in. Access url with the Authorization header and the access token Authorization Header: - Authorization": "{token_type} {access_token} :param url_part: The url part to request :param params: additional parameters for the request :type params: dict :param headers: request headers :return: None :raises: PermissionDenied """ self._validate_token() # Prepare the Authorization header auth_headers = { "Authorization": "{token_type} {access_token}".format(**self.token) } if headers: auth_headers.update(headers) return get_url(url_part, params=params, headers=auth_headers, verify=self.verify_ssl)
def get_hydrological_unit_codes(self): """Get the hydrological unit codes for USGS""" try: response = get_url(URL_USGS_HUC, timeout=0.5) if response.status_code == 200: return response.text except requests.exceptions.ReadTimeout: logger.warning( f"Read Timeout for {URL_USGS_HUC} - Failing over to stored HUC codes" ) except requests.exceptions.ConnectTimeout: logger.warning( f"Connection Timeout for {URL_USGS_HUC} - Failing over to stored HUC codes" ) return usgs_huc_codes.CONTENT
def get_observed_properties_variables(self, usgs_sites): """ Get a dictionary of location variables for the given location results :param usgs_sites: datasource JSON object of the locations :return: """ # Gather the location ids and get the parameters available # Only search for the mean data statCd=00003. url = '{}dv?huc={}&format=json&statCd=00003'.format( self.datasource.location, ",".join(usgs_sites)) response_variables = get_url(url) observed_properties_variables = {} if response_variables and response_variables.status_code == 200: for location in response_variables.json()['value']['timeSeries']: _, location_id, parameter, statistic = location['name'].split( ":") # We only want the mean observed_properties_variables.setdefault(location_id, []) observed_properties_variables[location_id].append(parameter) logging.debug( "Location DataTypes: {}".format(observed_properties_variables)) return observed_properties_variables
def test_get_url(): """Test get url""" response = get_url("http://www.google.com") assert response assert response.status_code == 200
def generator_usgs_measurement_timeseries_tvp_observation( view, query: QueryMeasurementTimeseriesTVP): """ Get the Data Points for USGS Daily Values =================== === =================== USGS NWIS Broker =================== === =================== ``nwis/dv`` >> ``data_points/`` =================== === =================== :param view: The request object ( Please refer to the Django documentation ). :param query: Query information for this request :returns: a generator object that yields :class:`~basin3d.synthesis.models.field.DataPoint` objects """ # Temporal resolution is always daily. search_params: List[Tuple[str, Any]] = list() search_params.append(("startDT", query.start_date)) if query.end_date: search_params.append(("endDT", query.end_date)) search_params.append( ("parameterCd", ",".join([str(o) for o in query.observed_property_variables]))) if query.statistic: statistics: List[str] = [] for stat in query.statistic: sythesized_stat = USGS_STATISTIC_MAP.get(stat) if not sythesized_stat: logger.info( f"USGS Daily Values service does not support statistic {stat}" ) else: statistics.append(sythesized_stat) search_params.append(("statCd", ",".join(statistics))) else: search_params.append(("siteStatus", "all")) if len(query.monitoring_features[0]) > 2: # search for stations search_params.append(("sites", ",".join(query.monitoring_features))) else: # search for stations by specifying the huc search_params.append(("huc", ",".join(query.monitoring_features))) # look for station locations only search_params.append(("siteType", "ST")) # JSON format search_params.append(("format", "json")) # Request the data points response = get_url('{}dv'.format(view.datasource.location), params=search_params) if response.status_code == 200: try: json_obj = response.json() # There is a valid json response if json_obj: timeseries_json = json_obj['value']['timeSeries'] # Iterate over monitoring_features for data_json in timeseries_json: yield data_json except json.decoder.JSONDecodeError: logger.error("JSON Not Returned: {}".format(response.content)) else: import re p = re.compile(r'<.*?>') logger.error("HTTP {}: {}".format(response.status_code, p.sub(' ', response.text)))
def list(self, query: QueryMeasurementTimeseriesTVP): """ Get the Data Points for USGS Daily Values =================== === ====================== USGS NWIS Broker =================== === ====================== ``nwis/dv`` >> ``measurement_tvp_timeseries/`` =================== === ====================== :returns: a generator object that yields :class:`~basin3d.synthesis.models.measurement.MeasurementTimeseriesTVPObservation` objects """ search_params = "" feature_obj_dict = {} if not query.monitoring_features: return None search_params = ",".join(query.monitoring_features) url = '{}site/?sites={}'.format(self.datasource.location, search_params) if len(search_params) < 3: url = '{}site/?huc={}'.format(self.datasource.location, search_params) usgs_site_response = None try: usgs_site_response = get_url(url) logging.debug("{}.{}".format(self.__class__.__name__, "list"), url=url) except Exception as e: logging.warning( "Could not connect to USGS site info: {}".format(e)) if usgs_site_response: for v in iter_rdb_to_json(usgs_site_response.text): if v["site_no"]: feature_obj_dict[v["site_no"]] = v # Iterate over data objects returned for data_json in generator_usgs_measurement_timeseries_tvp_observation( self, query): unit_of_measurement = data_json["variable"]["unit"]['unitCode'] timezone_offset = data_json["sourceInfo"]["timeZoneInfo"][ "defaultTimeZone"]["zoneOffset"] # name has agency, sitecode, parameter id and stat code # e.g. "USGS:385106106571000:00060:00003" _, feature_id, parameter, statistic = data_json["name"].split(":") if feature_id in feature_obj_dict.keys(): monitoring_feature = _load_point_obj( datasource=self, json_obj=feature_obj_dict[feature_id], feature_observed_properties=dict(), observed_property_variables= "Find observed property variables at monitoring feature url" ) else: # ToDo: expand this to use the info in the data return # ToDo: log message monitoring_feature = None # deal with statistic basin3d_statistic = "NOT SET" if statistic: basin3d_statistic = map_statistic_code(statistic) result_points = [] result_qualifiers = set() for values in data_json["values"]: result_qualifiers.update( self.get_result_qualifiers(values["qualifier"])) for value in values["value"]: # VAL: with result quality here, the quality of last value in the timeseries will be used. # Is the value the same throughout the time series? result_quality = self.result_quality(value['qualifiers']) # TODO: write some tests for this which will require mocking a data return. # Only filter if quality_checked is True # if QUERY_PARAM_RESULT_QUALITY not in kwargs or not kwargs[QUERY_PARAM_RESULT_QUALITY] or \ # (QUERY_PARAM_RESULT_QUALITY in kwargs and kwargs[ # QUERY_PARAM_RESULT_QUALITY] == result_quality): if not query.result_quality or query.result_quality == result_quality: # Get the broker parameter try: try: data: Optional[float] = float(value['value']) # Hardcoded unit conversion for river discharge parameters data, unit_of_measurement = convert_discharge( data, parameter, unit_of_measurement) except Exception as e: logger.error(str(e)) data = None # What do do with bad values? result_points.append( TimeValuePair(timestamp=value['dateTime'], value=data)) except Exception as e: logger.error(e) timeseries_result_quality = query.result_quality if not timeseries_result_quality: if ResultQualityEnum.CHECKED in result_qualifiers: timeseries_result_quality = ResultQualityEnum.CHECKED if ResultQualityEnum.UNCHECKED in result_qualifiers: timeseries_result_quality = ResultQualityEnum.UNCHECKED if (ResultQualityEnum.PARTIALLY_CHECKED in result_qualifiers and ResultQualityEnum.CHECKED in result_qualifiers): timeseries_result_quality = ResultQualityEnum.PARTIALLY_CHECKED measurement_timeseries_tvp_observation = MeasurementTimeseriesTVPObservation( self, id= feature_id, # FYI: this field is not unique and thus kinda useless unit_of_measurement=unit_of_measurement, feature_of_interest_type=FeatureTypeEnum.POINT, feature_of_interest=monitoring_feature, utc_offset=int(timezone_offset.split(":")[0]), result_points=result_points, observed_property_variable=parameter, result_quality=timeseries_result_quality, aggregation_duration=query.aggregation_duration, time_reference_position=TimeMetadataMixin. TIME_REFERENCE_MIDDLE, statistic=basin3d_statistic) yield measurement_timeseries_tvp_observation
def list(self, query: QueryMonitoringFeature): """ Get the Regions =================== === =================== USGS NWIS Broker =================== === =================== ``new_huc_rdb.txt`` >> ``MonitoringFeature/`` =================== === =================== :param query: The query information object :returns: a generator object that yields :class:`~basin3d.synthesis.models.field.MonitoringFeature` objects """ feature_type = isinstance( query.feature_type, FeatureTypeEnum) and query.feature_type.value or query.feature_type if feature_type in USGSDataSourcePlugin.feature_types or feature_type is None: # Convert parent_features usgs_regions = [] usgs_subbasins = [] parent_features = [] if query.parent_features: for value in query.parent_features: parent_features.append(value) if len(value) < 4: usgs_regions.append(value) elif len(value) == 8: usgs_subbasins.append(value) if not feature_type or feature_type != FeatureTypeEnum.POINT: huc_text = self.get_hydrological_unit_codes() logging.debug("{}.{}".format(self.__class__.__name__, "list"), url=URL_USGS_HUC) for json_obj in [ o for o in iter_rdb_to_json(huc_text) if not parent_features or [p for p in parent_features if o["huc"].startswith(p)] ]: monitoring_feature = None if (feature_type is None or feature_type == FeatureTypeEnum.REGION) and len( json_obj["huc"]) < 4: monitoring_feature = self._load_huc_obj( json_obj, feature_type=FeatureTypeEnum.REGION) elif (feature_type is None or feature_type == FeatureTypeEnum.SUBREGION) and len( json_obj["huc"]) == 4: monitoring_feature = self._load_huc_obj( json_obj, feature_type=FeatureTypeEnum.SUBREGION, related_sampling_feature=json_obj["huc"][0:2], related_sampling_feature_type=FeatureTypeEnum. REGION) elif (feature_type is None or feature_type == FeatureTypeEnum.BASIN) and len( json_obj["huc"]) == 6: monitoring_feature = self._load_huc_obj( json_obj, feature_type=FeatureTypeEnum.BASIN, related_sampling_feature=json_obj["huc"][0:4], related_sampling_feature_type=FeatureTypeEnum. SUBREGION) elif (feature_type is None or feature_type == FeatureTypeEnum.SUBBASIN) and len( json_obj["huc"]) == 8: hucs = {json_obj["huc"][0:i] for i in range(2, 8, 2)} # Filter by regions if it is set if not usgs_regions or not hucs.isdisjoint( usgs_regions): # This is a Cataloging Unit (See https://water.usgs.gov/GIS/huc_name.html) monitoring_feature = self._load_huc_obj( json_obj=json_obj, feature_type=FeatureTypeEnum.SUBBASIN, description= "{} Watershed: Drainage basin code is defined by the USGS State " "Office where the site is located.".format( json_obj["basin"]), related_sampling_feature=json_obj["huc"][0:6], related_sampling_feature_type=FeatureTypeEnum. BASIN) else: logger.debug("Ignoring HUC {}".format(json_obj["huc"])) # Determine whether to yield the monitoring feature object if monitoring_feature: if query.monitoring_features and json_obj[ 'huc'] in query.monitoring_features: yield monitoring_feature elif not query.monitoring_features: yield monitoring_feature # points: USGS calls these sites else: if query.monitoring_features: usgs_sites = ",".join(query.monitoring_features) feature_observed_properties = self.get_observed_properties_variables( query.monitoring_features) else: # Get the variables with data feature_observed_properties = self.get_observed_properties_variables( usgs_subbasins) usgs_sites = ",".join(feature_observed_properties.keys()) # Filter by locations with data url = '{}site/?sites={}'.format(self.datasource.location, usgs_sites) usgs_site_response = get_url(url) logging.debug("{}.{}".format(self.__class__.__name__, "list"), url=url) if usgs_site_response and usgs_site_response.status_code == 200: for v in iter_rdb_to_json(usgs_site_response.text): yield _load_point_obj(datasource=self, json_obj=v, feature_observed_properties= feature_observed_properties) else: logger.warning( f"Feature type {feature_type} not supported by {self.datasource.name}." )