Ejemplo n.º 1
0
def test_waterdata_byfilter():
    crs = "epsg:3857"
    wd = WaterData("huc12", crs)
    wb = wd.byfilter(f"{wd.layer} LIKE '17030001%'")
    coords = (wb.iloc[0].geometry.centroid.x, wb.iloc[0].geometry.centroid.y)
    hucs = wd.bydistance(coords, 100, crs)
    assert wb.shape[0] == 52 and hucs.huc12.values[0] == "170300010602"
Ejemplo n.º 2
0
def test_waterdata_bybox():
    wd = WaterData("nhdwaterbody")
    print(wd)
    wb_g = wd.bygeom(box(-69.7718, 45.0742, -69.3141, 45.4534),
                     predicate="INTERSECTS",
                     xy=True)
    wb_b = wd.bybox((-69.7718, 45.0742, -69.3141, 45.4534))
    assert abs(wb_b.areasqkm.sum() - wb_g.areasqkm.sum()) < 1e-3
Ejemplo n.º 3
0
def test_acc():
    wd = WaterData("nhdflowline_network")
    comids = nldi.navigate_byid("nwissite", "USGS-11092450", UT, "flowlines")
    comid_list = comids.nhdplus_comid.tolist()
    trib = wd.byid("comid", comid_list)

    nhd.prepare_nhdplus(trib, 0, 0, 0, False, False)
    flw = nhd.prepare_nhdplus(trib, 1, 1, 1, True, True)

    def routing(qin, q):
        return qin + q

    qsim = nhd.vector_accumulation(
        flw[["comid", "tocomid", "lengthkm"]],
        routing,
        "lengthkm",
        ["lengthkm"],
    )
    flw = flw.merge(qsim, on="comid")
    diff = flw.arbolatesu - flw.acc_lengthkm

    assert diff.abs().sum() < 1e-5
Ejemplo n.º 4
0
def test_waterdata_byid():
    comids = nldi.navigate_byid(site, station_id, UT, "flowlines")
    comid_list = comids.nhdplus_comid.tolist()

    wd = WaterData("nhdflowline_network")
    trib = wd.byid("comid", comid_list)

    wd = WaterData("catchmentsp")
    ct = wd.byid("featureid", comid_list)

    assert (trib.shape[0] == 432 and abs(trib.lengthkm.sum() - 565.755) < 1e-3
            and abs(ct.areasqkm.sum() - 773.954) < 1e-3)
Ejemplo n.º 5
0
    def get_info(self, query: Dict[str, str], expanded: bool = False) -> pd.DataFrame:
        """Get NWIS stations by a list of IDs or within a bounding box.

        Only stations that record(ed) daily streamflow data are returned.
        The following columns are included in the dataframe with expanded
        set to False:

        ==================  ==================================
        Name                Description
        ==================  ==================================
        site_no             Site identification number
        station_nm          Site name
        site_tp_cd          Site type
        dec_lat_va          Decimal latitude
        dec_long_va         Decimal longitude
        coord_acy_cd        Latitude-longitude accuracy
        dec_coord_datum_cd  Decimal Latitude-longitude datum
        alt_va              Altitude of Gage/land surface
        alt_acy_va          Altitude accuracy
        alt_datum_cd        Altitude datum
        huc_cd              Hydrologic unit code
        parm_cd             Parameter code
        stat_cd             Statistical code
        ts_id               Internal timeseries ID
        loc_web_ds          Additional measurement description
        medium_grp_cd       Medium group code
        parm_grp_cd         Parameter group code
        srs_id              SRS ID
        access_cd           Access code
        begin_date          Begin date
        end_date            End date
        count_nu            Record count
        hcdn_2009           Whether is in HCDN-2009 stations
        ==================  ==================================

        Parameters
        ----------
        query : dict
            A dictionary containing query by IDs or BBOX. Use ``query_byid`` or ``query_bbox``
            class methods to generate the queries.
        expanded : bool, optional
            Whether to get expanded sit information for example drainage area.

        Returns
        -------
        pandas.DataFrame
            NWIS stations
        """
        if not isinstance(query, dict):
            raise InvalidInputType("query", "dict")

        output_type = [{"outputDataTypeCd": "dv"}]
        if expanded:
            output_type.append({"siteOutput": "expanded"})

        site_list = []
        for t in output_type:
            payload = {
                **query,
                **t,
                "format": "rdb",
                "parameterCd": "00060",
                "siteStatus": "all",
                "hasDataTypeCd": "dv",
            }

            resp = self.session.post(f"{self.url}/site", payload).text.split("\n")

            r_list = [txt.split("\t") for txt in resp if "#" not in txt]
            r_dict = [dict(zip(r_list[0], st)) for st in r_list[2:]]

            site_list.append(pd.DataFrame.from_dict(r_dict).dropna())

        if expanded:
            sites = pd.merge(
                *site_list, on="site_no", how="outer", suffixes=("", "_overlap")
            ).filter(regex="^(?!.*_overlap)")
        else:
            sites = site_list[0]

        sites = sites.drop(sites[sites.alt_va == ""].index)
        try:
            sites = sites[sites.parm_cd == "00060"]
            sites["begin_date"] = pd.to_datetime(sites["begin_date"])
            sites["end_date"] = pd.to_datetime(sites["end_date"])
        except AttributeError:
            pass

        float_cols = ["dec_lat_va", "dec_long_va", "alt_va", "alt_acy_va"]
        if expanded:
            float_cols += ["drain_area_va", "contrib_drain_area_va"]

        sites[float_cols] = sites[float_cols].apply(lambda x: pd.to_numeric(x, errors="coerce"))

        sites = sites[sites.site_no.apply(len) == 8]

        gii = WaterData("gagesii", DEF_CRS)
        hcdn = gii.byid("staid", sites.site_no.tolist())
        hcdn_dict = hcdn[["staid", "hcdn_2009"]].set_index("staid").hcdn_2009.to_dict()
        sites["hcdn_2009"] = sites.site_no.apply(
            lambda x: len(hcdn_dict[x]) > 0 if x in hcdn_dict.keys() else False
        )

        return sites
Ejemplo n.º 6
0
def test_waterdata_byfilter():
    wd = WaterData("huc12", "epsg:3857")
    wb = wd.byfilter(f"{wd.layer} LIKE '17030001%'")
    assert wb.shape[0] == 52
Ejemplo n.º 7
0
    def get_info(self,
                 queries: Union[Dict[str, str], List[Dict[str, str]]],
                 expanded: bool = False) -> gpd.GeoDataFrame:
        """Send multiple queries to USGS Site Web Service.

        Parameters
        ----------
        queries : dict or list of dict
            A single or a list of valid queries.
        expanded : bool, optional
            Whether to get expanded sit information for example drainage area,
            default to False.

        Returns
        -------
        geopandas.GeoDataFrame
            A correctly typed ``GeoDataFrame`` containing site(s) information.
        """
        queries = [queries] if isinstance(queries, dict) else queries

        payloads = self._validate_usgs_queries(queries, False)
        sites = self.retrieve_rdb(f"{self.url}/site", payloads)

        float_cols = ["dec_lat_va", "dec_long_va", "alt_va", "alt_acy_va"]

        if expanded:
            payloads = self._validate_usgs_queries(queries, True)
            sites = sites.merge(
                self.retrieve_rdb(f"{self.url}/site", payloads),
                on="site_no",
                how="outer",
                suffixes=("", "_overlap"),
            )
            sites = sites.filter(regex="^(?!.*_overlap)")
            float_cols += ["drain_area_va", "contrib_drain_area_va"]

        with contextlib.suppress(KeyError):
            sites["begin_date"] = pd.to_datetime(sites["begin_date"])
            sites["end_date"] = pd.to_datetime(sites["end_date"])

        gii = WaterData("gagesii", DEF_CRS)

        def _get_hcdn(site_no: str) -> Tuple[float, Optional[bool]]:
            try:
                gage = gii.byid("staid", site_no)
                return gage.drain_sqkm.iloc[0], len(
                    gage.hcdn_2009.iloc[0]) > 0  # noqa: TC300
            except (AttributeError, KeyError, ZeroMatchedOGC):
                return np.nan, None

        sites["drain_sqkm"], sites["hcdn_2009"] = zip(
            *[_get_hcdn(n) for n in sites.site_no])

        float_cols += ["drain_sqkm"]
        sites[float_cols] = sites[float_cols].apply(pd.to_numeric,
                                                    errors="coerce")

        return gpd.GeoDataFrame(
            sites,
            geometry=gpd.points_from_xy(sites.dec_long_va, sites.dec_lat_va),
            crs="epsg:4326",
        )