def test_waterdata_byfilter(): crs = "epsg:3857" wd = WaterData("huc12", crs) wb = wd.byfilter(f"{wd.layer} LIKE '17030001%'") coords = (wb.iloc[0].geometry.centroid.x, wb.iloc[0].geometry.centroid.y) hucs = wd.bydistance(coords, 100, crs) assert wb.shape[0] == 52 and hucs.huc12.values[0] == "170300010602"
def test_waterdata_bybox(): wd = WaterData("nhdwaterbody") print(wd) wb_g = wd.bygeom(box(-69.7718, 45.0742, -69.3141, 45.4534), predicate="INTERSECTS", xy=True) wb_b = wd.bybox((-69.7718, 45.0742, -69.3141, 45.4534)) assert abs(wb_b.areasqkm.sum() - wb_g.areasqkm.sum()) < 1e-3
def test_acc(): wd = WaterData("nhdflowline_network") comids = nldi.navigate_byid("nwissite", "USGS-11092450", UT, "flowlines") comid_list = comids.nhdplus_comid.tolist() trib = wd.byid("comid", comid_list) nhd.prepare_nhdplus(trib, 0, 0, 0, False, False) flw = nhd.prepare_nhdplus(trib, 1, 1, 1, True, True) def routing(qin, q): return qin + q qsim = nhd.vector_accumulation( flw[["comid", "tocomid", "lengthkm"]], routing, "lengthkm", ["lengthkm"], ) flw = flw.merge(qsim, on="comid") diff = flw.arbolatesu - flw.acc_lengthkm assert diff.abs().sum() < 1e-5
def test_waterdata_byid(): comids = nldi.navigate_byid(site, station_id, UT, "flowlines") comid_list = comids.nhdplus_comid.tolist() wd = WaterData("nhdflowline_network") trib = wd.byid("comid", comid_list) wd = WaterData("catchmentsp") ct = wd.byid("featureid", comid_list) assert (trib.shape[0] == 432 and abs(trib.lengthkm.sum() - 565.755) < 1e-3 and abs(ct.areasqkm.sum() - 773.954) < 1e-3)
def get_info(self, query: Dict[str, str], expanded: bool = False) -> pd.DataFrame: """Get NWIS stations by a list of IDs or within a bounding box. Only stations that record(ed) daily streamflow data are returned. The following columns are included in the dataframe with expanded set to False: ================== ================================== Name Description ================== ================================== site_no Site identification number station_nm Site name site_tp_cd Site type dec_lat_va Decimal latitude dec_long_va Decimal longitude coord_acy_cd Latitude-longitude accuracy dec_coord_datum_cd Decimal Latitude-longitude datum alt_va Altitude of Gage/land surface alt_acy_va Altitude accuracy alt_datum_cd Altitude datum huc_cd Hydrologic unit code parm_cd Parameter code stat_cd Statistical code ts_id Internal timeseries ID loc_web_ds Additional measurement description medium_grp_cd Medium group code parm_grp_cd Parameter group code srs_id SRS ID access_cd Access code begin_date Begin date end_date End date count_nu Record count hcdn_2009 Whether is in HCDN-2009 stations ================== ================================== Parameters ---------- query : dict A dictionary containing query by IDs or BBOX. Use ``query_byid`` or ``query_bbox`` class methods to generate the queries. expanded : bool, optional Whether to get expanded sit information for example drainage area. Returns ------- pandas.DataFrame NWIS stations """ if not isinstance(query, dict): raise InvalidInputType("query", "dict") output_type = [{"outputDataTypeCd": "dv"}] if expanded: output_type.append({"siteOutput": "expanded"}) site_list = [] for t in output_type: payload = { **query, **t, "format": "rdb", "parameterCd": "00060", "siteStatus": "all", "hasDataTypeCd": "dv", } resp = self.session.post(f"{self.url}/site", payload).text.split("\n") r_list = [txt.split("\t") for txt in resp if "#" not in txt] r_dict = [dict(zip(r_list[0], st)) for st in r_list[2:]] site_list.append(pd.DataFrame.from_dict(r_dict).dropna()) if expanded: sites = pd.merge( *site_list, on="site_no", how="outer", suffixes=("", "_overlap") ).filter(regex="^(?!.*_overlap)") else: sites = site_list[0] sites = sites.drop(sites[sites.alt_va == ""].index) try: sites = sites[sites.parm_cd == "00060"] sites["begin_date"] = pd.to_datetime(sites["begin_date"]) sites["end_date"] = pd.to_datetime(sites["end_date"]) except AttributeError: pass float_cols = ["dec_lat_va", "dec_long_va", "alt_va", "alt_acy_va"] if expanded: float_cols += ["drain_area_va", "contrib_drain_area_va"] sites[float_cols] = sites[float_cols].apply(lambda x: pd.to_numeric(x, errors="coerce")) sites = sites[sites.site_no.apply(len) == 8] gii = WaterData("gagesii", DEF_CRS) hcdn = gii.byid("staid", sites.site_no.tolist()) hcdn_dict = hcdn[["staid", "hcdn_2009"]].set_index("staid").hcdn_2009.to_dict() sites["hcdn_2009"] = sites.site_no.apply( lambda x: len(hcdn_dict[x]) > 0 if x in hcdn_dict.keys() else False ) return sites
def test_waterdata_byfilter(): wd = WaterData("huc12", "epsg:3857") wb = wd.byfilter(f"{wd.layer} LIKE '17030001%'") assert wb.shape[0] == 52
def get_info(self, queries: Union[Dict[str, str], List[Dict[str, str]]], expanded: bool = False) -> gpd.GeoDataFrame: """Send multiple queries to USGS Site Web Service. Parameters ---------- queries : dict or list of dict A single or a list of valid queries. expanded : bool, optional Whether to get expanded sit information for example drainage area, default to False. Returns ------- geopandas.GeoDataFrame A correctly typed ``GeoDataFrame`` containing site(s) information. """ queries = [queries] if isinstance(queries, dict) else queries payloads = self._validate_usgs_queries(queries, False) sites = self.retrieve_rdb(f"{self.url}/site", payloads) float_cols = ["dec_lat_va", "dec_long_va", "alt_va", "alt_acy_va"] if expanded: payloads = self._validate_usgs_queries(queries, True) sites = sites.merge( self.retrieve_rdb(f"{self.url}/site", payloads), on="site_no", how="outer", suffixes=("", "_overlap"), ) sites = sites.filter(regex="^(?!.*_overlap)") float_cols += ["drain_area_va", "contrib_drain_area_va"] with contextlib.suppress(KeyError): sites["begin_date"] = pd.to_datetime(sites["begin_date"]) sites["end_date"] = pd.to_datetime(sites["end_date"]) gii = WaterData("gagesii", DEF_CRS) def _get_hcdn(site_no: str) -> Tuple[float, Optional[bool]]: try: gage = gii.byid("staid", site_no) return gage.drain_sqkm.iloc[0], len( gage.hcdn_2009.iloc[0]) > 0 # noqa: TC300 except (AttributeError, KeyError, ZeroMatchedOGC): return np.nan, None sites["drain_sqkm"], sites["hcdn_2009"] = zip( *[_get_hcdn(n) for n in sites.site_no]) float_cols += ["drain_sqkm"] sites[float_cols] = sites[float_cols].apply(pd.to_numeric, errors="coerce") return gpd.GeoDataFrame( sites, geometry=gpd.points_from_xy(sites.dec_long_va, sites.dec_lat_va), crs="epsg:4326", )