def elevation_byloc(coord: Tuple[float, float], crs: str = DEF_CRS): """Get elevation from USGS 3DEP service for a coordinate. Parameters ---------- coord : tuple Coordinates of the location as a tuple crs : str, optional The spatial reference of the input coord, defaults to epsg:4326 (lon, lat) Returns ------- float Elevation in meter """ if not isinstance(coord, tuple) or len(coord) != 2: raise InvalidInputType("coord", "tuple of length 2", "(x, y)") lon, lat = MatchCRS.coords(([coord[0]], [coord[1]]), crs, DEF_CRS) url = "https://nationalmap.gov/epqs/pqs.php" payload = {"output": "json", "x": lon[0], "y": lat[0], "units": "Meters"} r = RetrySession().get(url, payload) root = r.json()["USGS_Elevation_Point_Query_Service"] elevation = float(root["Elevation_Query"]["Elevation"]) if abs(elevation - (-1000000)) < 1e-3: raise ValueError( f"The elevation of the requested coordinate ({coord[0]}, {coord[1]}) cannot be found." ) return elevation
def __init__(self, save_dir: Optional[str] = None) -> None: self.save_dir = Path(save_dir) if save_dir else Path( tempfile.gettempdir()) if not self.save_dir.exists(): os.makedirs(self.save_dir) self.session = RetrySession() self.nhd_attr_item = "5669a79ee4b08895842a1d47" self.char_feather = Path(self.save_dir, "nhdplus_attrs.feather")
def ssebopeta_bygeom( geometry: Union[Polygon, Tuple[float, float, float, float]], dates: Union[Tuple[str, str], Union[int, List[int]]], geo_crs: str = DEF_CRS, fill_holes: bool = False, ) -> xr.DataArray: """Get daily actual ET for a region from SSEBop database. Notes ----- Since there's still no web service available for subsetting SSEBop, the data first needs to be downloaded for the requested period then it is masked by the region of interest locally. Therefore, it's not as fast as other functions and the bottleneck could be the download speed. Parameters ---------- geometry : shapely.geometry.Polygon or tuple The geometry for downloading clipping the data. For a tuple bbox, the order should be (west, south, east, north). dates : tuple or list, optional Start and end dates as a tuple (start, end) or a list of years [2001, 2010, ...]. geo_crs : str, optional The CRS of the input geometry, defaults to epsg:4326. fill_holes : bool, optional Whether to fill the holes in the geometry's interior (Polygon type), defaults to False. Returns ------- xarray.DataArray Daily actual ET within a geometry in mm/day at 1 km resolution """ _geometry = geoutils.geo2polygon(geometry, geo_crs, DEF_CRS) _geometry = Polygon(_geometry.exterior) if fill_holes else _geometry f_list = _get_ssebopeta_urls(dates) session = RetrySession() with session.onlyipv4(): def _ssebop(url_stamped): dt, url = url_stamped resp = session.get(url) zfile = zipfile.ZipFile(io.BytesIO(resp.content)) content = zfile.read(zfile.filelist[0].filename) ds = geoutils.gtiff2xarray({"eta": content}, _geometry, DEF_CRS) return dt, ds.expand_dims({"time": [dt]}) resp_list = ogc.utils.threading(_ssebop, f_list, max_workers=4) data = xr.merge( OrderedDict(sorted(resp_list, key=lambda x: x[0])).values()) eta = data.eta.copy() eta *= 1e-3 eta.attrs.update({"units": "mm/day", "nodatavals": (np.nan, )}) return eta
def __init__(self) -> None: self.base_url = ServiceURL().restful.nldi self.session = RetrySession() resp = self.session.get("/".join([self.base_url, "linked-data"])).json() self.valid_fsources = {r["source"]: r["sourceName"] for r in resp} resp = self.session.get("/".join([self.base_url, "lookups"])).json() self.valid_chartypes = {r["type"]: r["typeName"] for r in resp}
def __init__(self) -> None: self.session = RetrySession() self.base_url = "https://nid.sec.usace.army.mil/ords" self.headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "DNT": "1", }
def test_ipv4(): url = ( "https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/uswem/web/conus" + "/eta/modis_eta/daily/downloads/det2004003.modisSSEBopETactual.zip") session = RetrySession() with session.onlyipv4(): r = session.get(url) z = zipfile.ZipFile(io.BytesIO(r.content)) fname = z.read(z.filelist[0].filename) assert sys.getsizeof(fname) == 4361682
def ssebopeta_byloc( coords: Tuple[float, float], dates: Union[Tuple[str, str], Union[int, List[int]]], ) -> pd.DataFrame: """Daily actual ET for a location from SSEBop database in mm/day. Parameters ---------- coords : tuple Longitude and latitude of the location of interest as a tuple (lon, lat) dates : tuple or list, optional Start and end dates as a tuple (start, end) or a list of years [2001, 2010, ...]. Returns ------- pandas.DataFrame Daily actual ET for a location """ if isinstance(coords, tuple) and len(coords) == 2: lon, lat = coords else: raise InvalidInputType("coords", "tuple", "(lon, lat)") f_list = _get_ssebopeta_urls(dates) session = RetrySession() with session.onlyipv4(): def _ssebop(urls): dt, url = urls r = session.get(url) z = zipfile.ZipFile(io.BytesIO(r.content)) with rio.MemoryFile() as memfile: memfile.write(z.read(z.filelist[0].filename)) with memfile.open() as src: return { "dt": dt, "eta": [e[0] for e in src.sample([(lon, lat)])][0], } eta_list = ogc.utils.threading(_ssebop, f_list, max_workers=4) eta = pd.DataFrame.from_records(eta_list) eta.columns = ["datetime", "eta (mm/day)"] eta = eta.set_index("datetime") return eta * 1e-3
def ssebopeta_bygeom( geometry: GTYPE, dates: Union[Tuple[str, str], Union[int, List[int]]], geo_crs: str = DEF_CRS, ) -> xr.DataArray: """Get daily actual ET for a region from SSEBop database. Notes ----- Since there's still no web service available for subsetting SSEBop, the data first needs to be downloaded for the requested period then it is masked by the region of interest locally. Therefore, it's not as fast as other functions and the bottleneck could be the download speed. Parameters ---------- geometry : shapely.geometry.Polygon or tuple The geometry for downloading clipping the data. For a tuple bbox, the order should be (west, south, east, north). dates : tuple or list, optional Start and end dates as a tuple (start, end) or a list of years [2001, 2010, ...]. geo_crs : str, optional The CRS of the input geometry, defaults to epsg:4326. Returns ------- xarray.DataArray Daily actual ET within a geometry in mm/day at 1 km resolution """ f_list = helpers.get_ssebopeta_urls(dates) if isinstance(geometry, (Polygon, MultiPolygon)): gtiff2xarray = tlz.partial(geoutils.gtiff2xarray, geometry=geometry, geo_crs=geo_crs) else: gtiff2xarray = tlz.partial(geoutils.gtiff2xarray) session = RetrySession() with patch("socket.has_ipv6", False): def _ssebop(t: pd.Timestamp, url: str) -> xr.DataArray: resp = session.get(url) zfile = zipfile.ZipFile(io.BytesIO(resp.content)) content = zfile.read(zfile.filelist[0].filename) ds: xr.DataArray = gtiff2xarray(r_dict={"eta": content}) return ds.expand_dims({"time": [t]}) data = xr.merge(_ssebop(t, url) for t, url in f_list) eta: xr.DataArray = data.where( data.eta < data.eta.nodatavals[0]).eta.copy() * 1e-3 eta.attrs.update({ "units": "mm/day", "nodatavals": (np.nan, ), "crs": DEF_CRS, "long_name": "Actual ET" }) return eta
class NID: """Retrieve data from the National Inventory of Dams.""" def __init__(self) -> None: self.session = RetrySession() self.base_url = "https://nid.sec.usace.army.mil/ords" self.headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "DNT": "1", } def get_xlsx(self) -> io.BytesIO: """Get the excel file that containes the dam data.""" self.headers.update({"Cookie": "ORA_WWV_APP_105=ORA_WWV-QM2rrHvxwzROBqNYVD0WIlg2"}) payload = {"InFileName": "NID2019_U.xlsx"} r = self.session.get( f"{self.base_url}/NID_R.DOWNLOADFILE", payload=payload, headers=self.headers ) return io.BytesIO(r.content) def get_attrs(self, variables: List[str]) -> Dict[str, str]: """Get descriptions of the NID variables.""" self.headers.update({"Cookie": "ORA_WWV_APP_105=ORA_WWV-iaBJjzLW1v3a1s1mXEub0S7R"}) desc: Dict[str, str] = {} for v in variables: payload = {"p": f"105:10:10326760693796::NO::P10_COLUMN_NAME:{v}"} page = self.session.get(f"{self.base_url}/f", payload=payload, headers=self.headers) tables = pd.read_html(page.text) desc[v] = tables[0]["Field Definition"].values[0] return desc def get_codes(self) -> str: """Get the definitions of letter codes in NID database.""" self.headers.update({"Cookie": "ORA_WWV_APP_105=ORA_WWV-Bk16kg_4BwSK2anC36B4XBQn"}) payload = {"p": "105:21:16137342922753::NO:::"} page = self.session.get(f"{self.base_url}/f", payload=payload, headers=self.headers) return page.text
def elevation_bycoords(coords: List[Tuple[float, float]], crs: str = DEF_CRS) -> List[int]: """Get elevation from Airmap for a list of coordinates. Parameters ---------- coords : list of tuples Coordinates of the location as a tuple crs : str, optional The spatial reference of the input coord, defaults to epsg:4326 (lon, lat) Returns ------- list of int Elevation in meter """ if not isinstance(coords, (list, Iterator)): raise InvalidInputType("coord", "list (or iterator) of tuples of length 2", "[(x, y), ...]") if isinstance(coords, list) and any(len(c) != 2 for c in coords): raise InvalidInputType("coord", "list of tuples of length 2", "[(x, y), ...]") coords_reproj = zip(*MatchCRS.coords(tuple(zip(*coords)), crs, DEF_CRS)) coords_reproj = tlz.partition_all(100, coords_reproj) headers = {"Content-Type": "application/json", "charset": "utf-8"} elevations = [] for chunk in coords_reproj: payload = {"points": ",".join(f"{lat},{lon}" for lon, lat in chunk)} resp = RetrySession().get(ServiceURL().restful.airmap, payload=payload, headers=headers) elevations.append(resp.json()["data"]) return list(tlz.concat(elevations))
def __init__( self, variables: Optional[Union[List[str], str]] = None, pet: bool = False, ) -> None: self.session = RetrySession() vars_table = pd.read_html("https://daymet.ornl.gov/overview")[1] self.units = dict(zip(vars_table["Abbr"], vars_table["Units"])) valid_variables = vars_table.Abbr.to_list() if variables is None: self.variables = valid_variables else: self.variables = variables if isinstance(variables, list) else [variables] if not set(self.variables).issubset(set(valid_variables)): raise InvalidInputValue("variables", valid_variables) if pet: reqs = ("tmin", "tmax", "vp", "srad", "dayl") self.variables = list(set(reqs) | set(self.variables))
class NLDI: """Access the Hydro Network-Linked Data Index (NLDI) service.""" def __init__(self) -> None: self.base_url = ServiceURL().restful.nldi self.session = RetrySession() resp = self.session.get("/".join([self.base_url, "linked-data"])).json() self.valid_fsources = {r["source"]: r["sourceName"] for r in resp} resp = self.session.get("/".join([self.base_url, "lookups"])).json() self.valid_chartypes = {r["type"]: r["typeName"] for r in resp} @staticmethod def _missing_warning(n_miss: int, n_tot: int) -> None: """Show a warning if there are misssing features.""" logger.warning(" ".join([ f"{n_miss} of {n_tot} inputs didn't return any features.", "They are returned as a list.", ])) def getfeature_byid( self, fsource: str, fid: Union[str, List[str]] ) -> Union[gpd.GeoDataFrame, Tuple[gpd.GeoDataFrame, List[str]]]: """Get feature(s) based ID(s). Parameters ---------- fsource : str The name of feature(s) source. The valid sources are: comid, huc12pp, nwissite, wade, wqp fid : str or list Feature ID(s). Returns ------- geopandas.GeoDataFrame or (geopandas.GeoDataFrame, list) NLDI indexed features in EPSG:4326. If some IDs don't return any features a list of missing ID(s) are returnd as well. """ self._validate_fsource(fsource) fid = fid if isinstance(fid, list) else [fid] urls = { f: "/".join([self.base_url, "linked-data", fsource, f]) for f in fid } features, not_found = self._get_urls(urls) if len(not_found) > 0: self._missing_warning(len(not_found), len(fid)) return features, not_found return features def comid_byloc( self, coords: Union[Tuple[float, float], List[Tuple[float, float]]], loc_crs: str = DEF_CRS, ) -> Union[gpd.GeoDataFrame, Tuple[gpd.GeoDataFrame, List[Tuple[float, float]]]]: """Get the closest ComID(s) based on coordinates. Parameters ---------- coords : tuple or list A tuple of length two (x, y) or a list of them. loc_crs : str, optional The spatial reference of the input coordinate, defaults to EPSG:4326. Returns ------- geopandas.GeoDataFrame or (geopandas.GeoDataFrame, list) NLDI indexed ComID(s) in EPSG:4326. If some coords don't return any ComID a list of missing coords are returnd as well. """ coords = coords if isinstance(coords, list) else [coords] coords_4326 = list( zip(*MatchCRS.coords(tuple(zip(*coords)), loc_crs, DEF_CRS))) base_url = "/".join( [self.base_url, "linked-data", "comid", "position"]) urls = {(coords[i][0], coords[i][1]): f"{base_url}?coords=POINT({lon} {lat})" for i, (lon, lat) in enumerate(coords_4326)} comids, not_found = self._get_urls(urls) comids = comids.reset_index(level=2, drop=True) if len(not_found) > 0: self._missing_warning(len(not_found), len(coords)) return comids, not_found return comids def get_basins( self, station_ids: Union[str, List[str]] ) -> Union[gpd.GeoDataFrame, Tuple[gpd.GeoDataFrame, List[str]]]: """Get basins for a list of station IDs. Parameters ---------- station_ids : str or list USGS station ID(s). Returns ------- geopandas.GeoDataFrame or (geopandas.GeoDataFrame, list) NLDI indexed basins in EPSG:4326. If some IDs don't return any features a list of missing ID(s) are returnd as well. """ station_ids = station_ids if isinstance(station_ids, list) else [station_ids] urls = { s: f"{self.base_url}/linked-data/nwissite/USGS-{s}/basin" for s in station_ids } basins, not_found = self._get_urls(urls) basins = basins.reset_index(level=1, drop=True) basins.index.rename("identifier", inplace=True) if len(not_found) > 0: self._missing_warning(len(not_found), len(station_ids)) return basins, not_found return basins def getcharacteristic_byid( self, comids: Union[List[str], str], char_type: str, char_ids: Union[str, List[str]] = "all", values_only: bool = True, ) -> Union[pd.DataFrame, Tuple[pd.DataFrame, pd.DataFrame]]: """Get characteristics using a list ComIDs. Parameters ---------- comids : str or list The ID of the feature. char_type : str Type of the characteristic. Valid values are ``local`` for individual reach catchments, ``tot`` for network-accumulated values using total cumulative drainage area and ``div`` for network-accumulated values using divergence-routed. char_ids : str or list, optional Name(s) of the target characteristics, default to all. values_only : bool, optional Whether to return only ``characteristic_value`` as a series, default to True. If is set to False, ``percent_nodata`` is returned as well. Returns ------- pandas.DataFrame or tuple of pandas.DataFrame Either only ``characteristic_value`` as a dataframe or or if ``values_only`` is Fale return ``percent_nodata`` as well. """ if char_type not in self.valid_chartypes: valids = [ f'"{s}" for {d}' for s, d in self.valid_chartypes.items() ] raise InvalidInputValue("char", valids) comids = comids if isinstance(comids, list) else [comids] v_dict, nd_dict = {}, {} if char_ids == "all": payload = None else: _char_ids = char_ids if isinstance(char_ids, list) else [char_ids] valid_charids = self.get_validchars(char_type) idx = valid_charids.index if any(c not in idx for c in _char_ids): vids = valid_charids["characteristic_description"] raise InvalidInputValue( "char_id", [f'"{s}" for {d}' for s, d in vids.items()]) payload = {"characteristicId": ",".join(_char_ids)} for comid in comids: url = "/".join( [self.base_url, "linked-data", "comid", comid, char_type]) rjson = self._get_url(url, payload) char = pd.DataFrame.from_dict(rjson["characteristics"], orient="columns").T char.columns = char.iloc[0] char = char.drop(index="characteristic_id") v_dict[comid] = char.loc["characteristic_value"] if values_only: continue nd_dict[comid] = char.loc["percent_nodata"] def todf(df_dict: Dict[str, pd.DataFrame]) -> pd.DataFrame: df = pd.DataFrame.from_dict(df_dict, orient="index") df[df == ""] = np.nan df.index = df.index.astype("int64") return df.astype("f4") chars = todf(v_dict) if values_only: return chars return chars, todf(nd_dict) def get_validchars(self, char_type: str) -> pd.DataFrame: """Get all the avialable characteristics IDs for a give characteristics type.""" resp = self.session.get("/".join( [self.base_url, "lookups", char_type, "characteristics"])) c_list = ogc.utils.traverse_json( resp.json(), ["characteristicMetadata", "characteristic"]) return pd.DataFrame.from_dict( {c.pop("characteristic_id"): c for c in c_list}, orient="index") def navigate_byid( self, fsource: str, fid: str, navigation: str, source: str, distance: int = 500, ) -> gpd.GeoDataFrame: """Navigate the NHDPlus databse from a single feature id up to a distance. Parameters ---------- fsource : str The name of feature source. The valid sources are: comid, huc12pp, nwissite, wade, WQP. fid : str The ID of the feature. navigation : str The navigation method. source : str, optional Return the data from another source after navigating the features using fsource, defaults to None. distance : int, optional Limit the search for navigation up to a distance in km, defaults is 500 km. Note that this is an expensive request so you have be mindful of the value that you provide. Returns ------- geopandas.GeoDataFrame NLDI indexed features in EPSG:4326. """ self._validate_fsource(fsource) url = "/".join( [self.base_url, "linked-data", fsource, fid, "navigation"]) valid_navigations = self._get_url(url) if navigation not in valid_navigations.keys(): raise InvalidInputValue("navigation", list(valid_navigations.keys())) url = valid_navigations[navigation] r_json = self._get_url(url) valid_sources = {s["source"].lower(): s["features"] for s in r_json} # type: ignore if source not in valid_sources: raise InvalidInputValue("source", list(valid_sources.keys())) url = f"{valid_sources[source]}?distance={int(distance)}" return geoutils.json2geodf(self._get_url(url), ALT_CRS, DEF_CRS) def navigate_byloc( self, coords: Tuple[float, float], navigation: Optional[str] = None, source: Optional[str] = None, loc_crs: str = DEF_CRS, distance: int = 500, ) -> gpd.GeoDataFrame: """Navigate the NHDPlus databse from a coordinate. Parameters ---------- coords : tuple A tuple of length two (x, y). navigation : str, optional The navigation method, defaults to None which throws an exception if comid_only is False. source : str, optional Return the data from another source after navigating the features using fsource, defaults to None which throws an exception if comid_only is False. loc_crs : str, optional The spatial reference of the input coordinate, defaults to EPSG:4326. distance : int, optional Limit the search for navigation up to a distance in km, defaults to 500 km. Note that this is an expensive request so you have be mindful of the value that you provide. If you want to get all the available features you can pass a large distance like 9999999. Returns ------- geopandas.GeoDataFrame NLDI indexed features in EPSG:4326. """ _coords = MatchCRS().coords(((coords[0], ), (coords[1], )), loc_crs, DEF_CRS) lon, lat = _coords[0][0], _coords[1][0] url = "/".join([self.base_url, "linked-data", "comid", "position"]) payload = {"coords": f"POINT({lon} {lat})"} rjson = self._get_url(url, payload) comid = geoutils.json2geodf(rjson, ALT_CRS, DEF_CRS).comid.iloc[0] if navigation is None or source is None: raise MissingItems(["navigation", "source"]) return self.navigate_byid("comid", comid, navigation, source, distance) def _validate_fsource(self, fsource: str) -> None: """Check if the given feature source is valid.""" if fsource not in self.valid_fsources: valids = [f'"{s}" for {d}' for s, d in self.valid_fsources.items()] raise InvalidInputValue("feature source", valids) def _get_urls(self, urls: Dict[Any, str]) -> Tuple[gpd.GeoDataFrame, List[str]]: """Get basins for a list of station IDs. Parameters ---------- urls_dict : dict A dict with keys as feature ids and values as corresponsing url. Returns ------- (geopandas.GeoDataFrame, list) NLDI indexed features in EPSG:4326 and list of ID(s) that no feature was found. """ not_found = [] resp = [] for f, u in urls.items(): try: rjson = self._get_url(u) resp.append((f, geoutils.json2geodf(rjson, ALT_CRS, DEF_CRS))) except (ZeroMatched, JSONDecodeError, ConnectionError): not_found.append(f) if len(resp) == 0: raise ZeroMatched("No feature was found with the provided inputs.") resp_df = gpd.GeoDataFrame(pd.concat(dict(resp))) return resp_df, not_found def _get_url(self, url: str, payload: Optional[Dict[str, str]] = None) -> Dict[str, Any]: """Send a request to the service using GET method.""" if payload: payload.update({"f": "json"}) else: payload = {"f": "json"} try: return self.session.get(url, payload).json() except JSONDecodeError: raise ZeroMatched("No feature was found with the provided inputs.") except ConnectionError: raise ConnectionError( "NLDI server cannot be reached at the moment.")
def nlcd_helper() -> Dict[str, Any]: """Get legends and properties of the NLCD cover dataset. Notes ----- The following references have been used: - https://github.com/jzmiller1/nlcd - https://www.mrlc.gov/data-services-page - https://www.mrlc.gov/data/legends/national-land-cover-database-2016-nlcd2016-legend """ url = ("https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/metadata/" + "NLCD_2016_Land_Cover_Science_product_L48.xml") r = RetrySession().get(url) root = ET.fromstring(r.content) clist = root[4][1][1].text.split("\n")[2:] _colors = [i.split() for i in clist] colors = {int(c): (float(r), float(g), float(b)) for c, r, g, b in _colors} classes = { root[4][0][3][i][0][0].text: root[4][0][3][i][0][1].text.split("-")[0].strip() for i in range(3, len(root[4][0][3])) } nlcd_meta = { "impervious_years": [2016, 2011, 2006, 2001], "canopy_years": [2016, 2011], "cover_years": [2016, 2013, 2011, 2008, 2006, 2004, 2001], "classes": classes, "categories": { "Unclassified": ("0"), "Water": ("11", "12"), "Developed": ("21", "22", "23", "24"), "Barren": ("31", ), "Forest": ("41", "42", "43", "45", "46"), "Shrubland": ("51", "52"), "Herbaceous": ("71", "72", "73", "74"), "Planted/Cultivated": ("81", "82"), "Wetlands": ("90", "95"), }, "roughness": { "11": 0.001, "12": 0.022, "21": 0.0404, "22": 0.0678, "23": 0.0678, "24": 0.0404, "31": 0.0113, "41": 0.36, "42": 0.32, "43": 0.4, "45": 0.4, "46": 0.24, "51": 0.24, "52": 0.4, "71": 0.368, "72": np.nan, "81": 0.325, "82": 0.16, "90": 0.086, "95": 0.1825, }, "colors": colors, } return nlcd_meta
class NLDI: """Access the Hydro Network-Linked Data Index (NLDI) service.""" def __init__(self) -> None: self.base_url = ServiceURL().restful.nldi self.session = RetrySession() resp = self.session.get("/".join([self.base_url, "linked-data"])).json() self.valid_fsources = {r["source"]: r["sourceName"] for r in resp} resp = self.session.get("/".join([self.base_url, "lookups"])).json() self.valid_chartypes = {r["type"]: r["typeName"] for r in resp} def getfeature_byid(self, fsource: str, fid: str, basin: bool = False) -> gpd.GeoDataFrame: """Get features of a single id. Parameters ---------- fsource : str The name of feature source. The valid sources are: comid, huc12pp, nwissite, wade, wqp fid : str The ID of the feature. basin : bool Whether to return the basin containing the feature. Returns ------- geopandas.GeoDataFrame NLDI indexed features in EPSG:4326. """ self._validate_fsource(fsource) url = "/".join([self.base_url, "linked-data", fsource, fid]) if basin: url += "/basin" return geoutils.json2geodf(self._geturl(url), ALT_CRS, DEF_CRS) def getcharacteristic_byid( self, comids: Union[List[str], str], char_type: str, char_ids: str = "all", values_only: bool = True, ) -> Union[pd.DataFrame, Tuple[pd.DataFrame, pd.DataFrame]]: """Get characteristics using a list ComIDs. Parameters ---------- comids : str or list The ID of the feature. char_type : str Type of the characteristic. Valid values are ``local`` for individual reach catchments, ``tot`` for network-accumulated values using total cumulative drainage area and ``div`` for network-accumulated values using divergence-routed. char_ids : str or list, optional Name(s) of the target characteristics, default to all. values_only : bool, optional Whether to return only ``characteristic_value`` as a series, default to True. If is set to False, ``percent_nodata`` is returned as well. Returns ------- pandas.DataFrame or tuple of pandas.DataFrame Either only ``characteristic_value`` as a dataframe or or if ``values_only`` is Fale return ``percent_nodata`` is well. """ if char_type not in self.valid_chartypes: valids = [ f'"{s}" for {d}' for s, d in self.valid_chartypes.items() ] raise InvalidInputValue("char", valids) comids = comids if isinstance(comids, list) else [comids] v_dict, nd_dict = {}, {} if char_ids == "all": payload = None else: _char_ids = char_ids if isinstance(char_ids, list) else [char_ids] valid_charids = self.get_validchars(char_type) idx = valid_charids.index if any(c not in idx for c in _char_ids): vids = valid_charids["characteristic_description"] raise InvalidInputValue( "char_id", [f'"{s}" for {d}' for s, d in vids.items()]) payload = {"characteristicId": ",".join(_char_ids)} for comid in comids: url = "/".join( [self.base_url, "linked-data", "comid", comid, char_type]) rjson = self._geturl(url, payload) char = pd.DataFrame.from_dict(rjson["characteristics"], orient="columns").T char.columns = char.iloc[0] char = char.drop(index="characteristic_id") v_dict[comid] = char.loc["characteristic_value"] if values_only: continue nd_dict[comid] = char.loc["percent_nodata"] def todf(df_dict: Dict[str, pd.DataFrame]) -> pd.DataFrame: df = pd.DataFrame.from_dict(df_dict, orient="index") df[df == ""] = np.nan df.index = df.index.astype("int64") return df.astype("f4") chars = todf(v_dict) if values_only: return chars return chars, todf(nd_dict) def get_validchars(self, char_type: str) -> pd.DataFrame: """Get all the avialable characteristics IDs for a give characteristics type.""" resp = self.session.get("/".join( [self.base_url, "lookups", char_type, "characteristics"])) c_list = ogc.utils.traverse_json( resp.json(), ["characteristicMetadata", "characteristic"]) return pd.DataFrame.from_dict( {c.pop("characteristic_id"): c for c in c_list}, orient="index") def navigate_byid( self, fsource: str, fid: str, navigation: str, source: str, distance: int = 500, ) -> gpd.GeoDataFrame: """Navigate the NHDPlus databse from a single feature id up to a distance. Parameters ---------- fsource : str The name of feature source. The valid sources are: comid, huc12pp, nwissite, wade, WQP. fid : str The ID of the feature. navigation : str The navigation method. source : str, optional Return the data from another source after navigating the features using fsource, defaults to None. distance : int, optional Limit the search for navigation up to a distance in km, defaults is 500 km. Note that this is an expensive request so you have be mindful of the value that you provide. Returns ------- geopandas.GeoDataFrame NLDI indexed features in EPSG:4326. """ self._validate_fsource(fsource) url = "/".join( [self.base_url, "linked-data", fsource, fid, "navigation"]) valid_navigations = self._geturl(url) if navigation not in valid_navigations.keys(): raise InvalidInputValue("navigation", list(valid_navigations.keys())) url = valid_navigations[navigation] r_json = self._geturl(url) valid_sources = {s["source"].lower(): s["features"] for s in r_json} if source not in valid_sources: raise InvalidInputValue("source", list(valid_sources.keys())) url = f"{valid_sources[source]}?distance={int(distance)}" return geoutils.json2geodf(self._geturl(url), ALT_CRS, DEF_CRS) def navigate_byloc( self, coords: Tuple[float, float], navigation: Optional[str] = None, source: Optional[str] = None, loc_crs: str = DEF_CRS, distance: int = 500, comid_only: bool = False, ) -> gpd.GeoDataFrame: """Navigate the NHDPlus databse from a coordinate. Parameters ---------- coordinate : tuple A tuple of length two (x, y). navigation : str, optional The navigation method, defaults to None which throws an exception if comid_only is False. source : str, optional Return the data from another source after navigating the features using fsource, defaults to None which throws an exception if comid_only is False. loc_crs : str, optional The spatial reference of the input coordinate, defaults to EPSG:4326. distance : int, optional Limit the search for navigation up to a distance in km, defaults to 500 km. Note that this is an expensive request so you have be mindful of the value that you provide. If you want to get all the available features you can pass a large distance like 9999999. comid_only : bool, optional Whether to return the nearest comid without navigation. Returns ------- geopandas.GeoDataFrame NLDI indexed features in EPSG:4326. """ _coords = MatchCRS().coords(((coords[0], ), (coords[1], )), loc_crs, DEF_CRS) lon, lat = _coords[0][0], _coords[1][0] url = "/".join([self.base_url, "linked-data", "comid", "position"]) payload = {"coords": f"POINT({lon} {lat})"} rjson = self._geturl(url, payload) comid = geoutils.json2geodf(rjson, ALT_CRS, DEF_CRS).comid.iloc[0] if comid_only: return comid if navigation is None or source is None: raise MissingItems(["navigation", "source"]) return self.navigate_byid("comid", comid, navigation, source, distance) def characteristics_dataframe( self, char_type: str, char_id: str, filename: Optional[str] = None, metadata: bool = False, ) -> Union[Dict[str, Any], pd.DataFrame]: """Get a NHDPlus-based characteristic from sciencebase.gov as dataframe. Parameters ---------- char_type : str Characteristic type. Valid values are ``local`` for individual reach catchments, ``tot`` for network-accumulated values using total cumulative drainage area and ``div`` for network-accumulated values using divergence-routed. char_id : str Characteristic ID. filename : str, optional File name, defaults to None that throws an error and shows a list of available files. metadata : bool Whether to only return the metadata for the selected characteristic, defaults to False. Useful for getting information about the dataset such as citation, units, column names, etc. Returns ------- pandas.DataFrame or dict The requested characteristic as a dataframe or if ``metadata`` is True the metadata as a dictionary. """ if char_type not in self.valid_chartypes: valids = [ f'"{s}" for {d}' for s, d in self.valid_chartypes.items() ] raise InvalidInputValue("char", valids) valid_charids = self.get_validchars(char_type) if char_id not in valid_charids.index: vids = valid_charids["characteristic_description"] raise InvalidInputValue( "char_id", [f'"{s}" for {d}' for s, d in vids.items()]) meta = self.session.get(valid_charids.loc[char_id, "dataset_url"], { "format": "json" }).json() if metadata: return meta flist = { f["name"]: f["downloadUri"] for f in meta["files"] if f["name"].split(".")[-1] == "zip" } if filename not in flist: raise InvalidInputValue("filename", list(flist.keys())) return pd.read_csv(flist[filename], compression="zip") def _validate_fsource(self, fsource: str) -> None: """Check if the given feature source is valid.""" if fsource not in self.valid_fsources: valids = [f'"{s}" for {d}' for s, d in self.valid_fsources.items()] raise InvalidInputValue("feature source", valids) def _geturl(self, url: str, payload: Optional[Dict[str, str]] = None): """Send a request to the service using GET method.""" if payload is None: payload = {"f": "json"} else: payload.update({"f": "json"}) try: return self.session.get(url, payload).json() except JSONDecodeError: raise ZeroMatched("No feature was found with the provided inputs.")
def post_connection_error(): url = "https://somefailedurl.com" s = RetrySession(retries=2) s.post(url)
def ssebopeta_bycoords( coords: pd.DataFrame, dates: Union[Tuple[str, str], Union[int, List[int]]], crs: str = DEF_CRS, ) -> xr.Dataset: """Daily actual ET for a dataframe of coords from SSEBop database in mm/day. Parameters ---------- coords : pandas.DataFrame A dataframe with ``id``, ``x``, ``y`` columns. dates : tuple or list, optional Start and end dates as a tuple (start, end) or a list of years [2001, 2010, ...]. crs : str, optional The CRS of the input coordinates, defaults to epsg:4326. Returns ------- xarray.Dataset Daily actual ET in mm/day as a dataset with ``time`` and ``location_id`` dimensions. The ``location_id`` dimension is the same as the ``id`` column in the input dataframe. """ if not isinstance(coords, pd.DataFrame): raise InvalidInputType("coords", "pandas.DataFrame") req_cols = ["id", "x", "y"] if not set(req_cols).issubset(coords.columns): raise MissingColumns(req_cols) _coords = gpd.GeoSeries(gpd.points_from_xy(coords["x"], coords["y"]), index=coords["id"], crs=crs) _coords = _coords.to_crs(DEF_CRS) co_list = list(zip(_coords.x, _coords.y)) f_list = helpers.get_ssebopeta_urls(dates) session = RetrySession() with patch("socket.has_ipv6", False): def _ssebop(url: str) -> List[np.ndarray]: # type: ignore r = session.get(url) z = zipfile.ZipFile(io.BytesIO(r.content)) with rio.MemoryFile() as memfile: memfile.write(z.read(z.filelist[0].filename)) with memfile.open() as src: return list(src.sample(co_list)) time, eta = zip(*[(t, _ssebop(url)) for t, url in f_list]) eta_arr = np.array(eta).reshape(len(time), -1) # type: ignore ds = xr.Dataset( data_vars={ "eta": (["time", "location_id"], eta_arr), "x": (["location_id"], coords["x"].to_numpy()), "y": (["location_id"], coords["y"].to_numpy()), }, coords={ "time": np.array(time, dtype="datetime64[ns]"), "location_id": coords["id"].to_numpy(), }, ) ds["eta"] = ds["eta"].where(ds["eta"] != 9999, np.nan) * 1e-3 ds.eta.attrs = { "units": "mm/day", "long_name": "Actual ET", "nodatavals": (np.nan, ), } ds.x.attrs = {"crs": pyproj.CRS(crs).to_string()} ds.y.attrs = {"crs": pyproj.CRS(crs).to_string()} return ds
class NWIS: """Access NWIS web service.""" def __init__(self): self.session = RetrySession() self.url = ServiceURL().restful.nwis @staticmethod def query_byid(ids: Union[str, List[str]]) -> Dict[str, str]: """Generate the geometry keys and values of an ArcGISRESTful query.""" if not isinstance(ids, (str, list)): raise InvalidInputType("ids", "str or list") ids = [str(i) for i in ids] if isinstance(ids, list) else [str(ids)] query = {"sites": ",".join(ids)} return query @staticmethod def query_bybox(bbox: Tuple[float, float, float, float]) -> Dict[str, str]: """Generate the geometry keys and values of an ArcGISRESTful query.""" geoutils.check_bbox(bbox) query = {"bBox": ",".join(f"{b:.06f}" for b in bbox)} return query def get_info(self, query: Dict[str, str], expanded: bool = False) -> pd.DataFrame: """Get NWIS stations by a list of IDs or within a bounding box. Only stations that record(ed) daily streamflow data are returned. The following columns are included in the dataframe with expanded set to False: ================== ================================== Name Description ================== ================================== site_no Site identification number station_nm Site name site_tp_cd Site type dec_lat_va Decimal latitude dec_long_va Decimal longitude coord_acy_cd Latitude-longitude accuracy dec_coord_datum_cd Decimal Latitude-longitude datum alt_va Altitude of Gage/land surface alt_acy_va Altitude accuracy alt_datum_cd Altitude datum huc_cd Hydrologic unit code parm_cd Parameter code stat_cd Statistical code ts_id Internal timeseries ID loc_web_ds Additional measurement description medium_grp_cd Medium group code parm_grp_cd Parameter group code srs_id SRS ID access_cd Access code begin_date Begin date end_date End date count_nu Record count hcdn_2009 Whether is in HCDN-2009 stations ================== ================================== Parameters ---------- query : dict A dictionary containing query by IDs or BBOX. Use ``query_byid`` or ``query_bbox`` class methods to generate the queries. expanded : bool, optional Whether to get expanded sit information for example drainage area. Returns ------- pandas.DataFrame NWIS stations """ if not isinstance(query, dict): raise InvalidInputType("query", "dict") output_type = [{"outputDataTypeCd": "dv"}] if expanded: output_type.append({"siteOutput": "expanded"}) site_list = [] for t in output_type: payload = { **query, **t, "format": "rdb", "parameterCd": "00060", "siteStatus": "all", "hasDataTypeCd": "dv", } resp = self.session.post(f"{self.url}/site", payload).text.split("\n") r_list = [txt.split("\t") for txt in resp if "#" not in txt] r_dict = [dict(zip(r_list[0], st)) for st in r_list[2:]] site_list.append(pd.DataFrame.from_dict(r_dict).dropna()) if expanded: sites = pd.merge( *site_list, on="site_no", how="outer", suffixes=("", "_overlap") ).filter(regex="^(?!.*_overlap)") else: sites = site_list[0] sites = sites.drop(sites[sites.alt_va == ""].index) try: sites = sites[sites.parm_cd == "00060"] sites["begin_date"] = pd.to_datetime(sites["begin_date"]) sites["end_date"] = pd.to_datetime(sites["end_date"]) except AttributeError: pass float_cols = ["dec_lat_va", "dec_long_va", "alt_va", "alt_acy_va"] if expanded: float_cols += ["drain_area_va", "contrib_drain_area_va"] sites[float_cols] = sites[float_cols].apply(lambda x: pd.to_numeric(x, errors="coerce")) sites = sites[sites.site_no.apply(len) == 8] gii = WaterData("gagesii", DEF_CRS) hcdn = gii.byid("staid", sites.site_no.tolist()) hcdn_dict = hcdn[["staid", "hcdn_2009"]].set_index("staid").hcdn_2009.to_dict() sites["hcdn_2009"] = sites.site_no.apply( lambda x: len(hcdn_dict[x]) > 0 if x in hcdn_dict.keys() else False ) return sites def get_streamflow( self, station_ids: Union[List[str], str], dates: Tuple[str, str], mmd: bool = False ) -> pd.DataFrame: """Get daily streamflow observations from USGS. Parameters ---------- station_ids : str, list The gage ID(s) of the USGS station. dates : tuple Start and end dates as a tuple (start, end). mmd : bool Convert cms to mm/day based on the contributing drainage area of the stations. Returns ------- pandas.DataFrame Streamflow data observations in cubic meter per second (cms) """ if not isinstance(station_ids, (str, list)): raise InvalidInputType("ids", "str or list") station_ids = station_ids if isinstance(station_ids, list) else [station_ids] if not isinstance(dates, tuple) or len(dates) != 2: raise InvalidInputType("dates", "tuple", "(start, end)") start = pd.to_datetime(dates[0]) end = pd.to_datetime(dates[1]) siteinfo = self.get_info(self.query_byid(station_ids)) check_dates = siteinfo.loc[ ( (siteinfo.stat_cd == "00003") & (start < siteinfo.begin_date) & (end > siteinfo.end_date) ), "site_no", ].tolist() nas = [s for s in station_ids if s in check_dates] if len(nas) > 0: raise InvalidInputRange( "Daily Mean data unavailable for the specified time " + "period for the following stations:\n" + ", ".join(nas) ) payload = { "format": "json", "sites": ",".join(station_ids), "startDT": start.strftime("%Y-%m-%d"), "endDT": end.strftime("%Y-%m-%d"), "parameterCd": "00060", "statCd": "00003", "siteStatus": "all", } resp = self.session.post(f"{self.url}/dv", payload) time_series = resp.json()["value"]["timeSeries"] r_ts = { t["sourceInfo"]["siteCode"][0]["value"]: t["values"][0]["value"] for t in time_series } def to_df(col, dic): discharge = pd.DataFrame.from_records(dic, exclude=["qualifiers"], index=["dateTime"]) discharge.index = pd.to_datetime(discharge.index) discharge.columns = [col] return discharge qobs = pd.concat([to_df(f"USGS-{s}", t) for s, t in r_ts.items()], axis=1) # Convert cfs to cms qobs = qobs.astype("float64") * 0.028316846592 if mmd: nldi = NLDI() basins_dict = { f"USGS-{s}": nldi.getfeature_byid("nwissite", f"USGS-{s}", basin=True).geometry for s in station_ids } basins = gpd.GeoDataFrame.from_dict(basins_dict, orient="index") basins.columns = ["geometry"] basins = basins.set_crs(DEF_CRS) eck4 = "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs" area = basins.to_crs(eck4).area ms2mmd = 1000.0 * 24.0 * 3600.0 qobs = qobs.apply(lambda x: x / area.loc[x.name] * ms2mmd) return qobs
def __init__(self): self.session = RetrySession() self.url = ServiceURL().restful.nwis
class ScienceBase: """Access NHDPlus V2.1 Attributes from ScienceBase over CONUS. More info can be found `here <https://www.sciencebase.gov/catalog/item/5669a79ee4b08895842a1d47>`_. Parameters ---------- save_dir : str Directory to save the staged data frame containing metadata for the database, defaults to system's temp directory. The metadata dataframe is saved as a feather file, nhdplus_attrs.feather, in save_dir that can be loaded with Pandas. """ def __init__(self, save_dir: Optional[str] = None) -> None: self.save_dir = Path(save_dir) if save_dir else Path( tempfile.gettempdir()) if not self.save_dir.exists(): os.makedirs(self.save_dir) self.session = RetrySession() self.nhd_attr_item = "5669a79ee4b08895842a1d47" self.char_feather = Path(self.save_dir, "nhdplus_attrs.feather") def get_children(self, item: str) -> Dict[str, Any]: """Get childern items of an item.""" url = "https://www.sciencebase.gov/catalog/items" payload = { "filter": f"parentIdExcludingLinks={item}", "fields": "title,id", "format": "json", } return self.session.get(url, payload=payload).json() def get_files(self, item: str) -> Dict[str, Tuple[str, str]]: """Get all the available zip files in an item.""" url = "https://www.sciencebase.gov/catalog/item" payload = {"fields": "files,downloadUri", "format": "json"} r = self.session.get(f"{url}/{item}", payload=payload).json() files_url = zip(tlz.pluck("name", r["files"]), tlz.pluck("url", r["files"])) # TODO: Add units meta = "".join(tlz.pluck("metadataHtmlViewUri", r["files"], default="")) return { f.replace("_CONUS.zip", ""): (u, meta) for f, u in files_url if ".zip" in f } def stage_data(self) -> pd.DataFrame: """Stage the NHDPlus Attributes database and save to nhdplus_attrs.feather.""" r = self.get_children(self.nhd_attr_item) titles = tlz.pluck("title", r["items"]) titles = tlz.concat( tlz.map(tlz.partial(re.findall, "Select(.*?)Attributes"), titles)) titles = tlz.map(str.strip, titles) main_items = dict(zip(titles, tlz.pluck("id", r["items"]))) files = {} soil = main_items.pop("Soil") for i, item in main_items.items(): r = self.get_children(item) titles = tlz.pluck("title", r["items"]) titles = tlz.map( lambda s: s.split(":")[1].strip() if ":" in s else s, titles) child_items = dict(zip(titles, tlz.pluck("id", r["items"]))) files[i] = {t: self.get_files(c) for t, c in child_items.items()} r = self.get_children(soil) titles = tlz.pluck("title", r["items"]) titles = tlz.map(lambda s: s.split(":")[1].strip() if ":" in s else s, titles) child_items = dict(zip(titles, tlz.pluck("id", r["items"]))) stat = child_items.pop("STATSGO Soil Characteristics") ssur = child_items.pop("SSURGO Soil Characteristics") files["Soil"] = {t: self.get_files(c) for t, c in child_items.items()} r = self.get_children(stat) titles = tlz.pluck("title", r["items"]) titles = tlz.map(lambda s: s.split(":")[1].split(",")[1].strip(), titles) child_items = dict(zip(titles, tlz.pluck("id", r["items"]))) files["STATSGO"] = { t: self.get_files(c) for t, c in child_items.items() } r = self.get_children(ssur) titles = tlz.pluck("title", r["items"]) titles = tlz.map(lambda s: s.split(":")[1].strip(), titles) child_items = dict(zip(titles, tlz.pluck("id", r["items"]))) files["SSURGO"] = { t: self.get_files(c) for t, c in child_items.items() } chars = [] types = {"CAT": "local", "TOT": "upstream_acc", "ACC": "div_routing"} for t, dd in files.items(): for d, fd in dd.items(): for f, u in fd.items(): chars.append({ "name": f, "type": types.get(f[-3:], "other"), "theme": t, "description": d, "url": u[0], "meta": u[1], }) char_df = pd.DataFrame(chars, dtype="category") char_df.to_feather(self.char_feather) return char_df