Example #1
0
class NWIS:
    """Access NWIS web service."""

    def __init__(self):
        self.session = RetrySession()
        self.url = ServiceURL().restful.nwis

    @staticmethod
    def query_byid(ids: Union[str, List[str]]) -> Dict[str, str]:
        """Generate the geometry keys and values of an ArcGISRESTful query."""
        if not isinstance(ids, (str, list)):
            raise InvalidInputType("ids", "str or list")

        ids = [str(i) for i in ids] if isinstance(ids, list) else [str(ids)]
        query = {"sites": ",".join(ids)}

        return query

    @staticmethod
    def query_bybox(bbox: Tuple[float, float, float, float]) -> Dict[str, str]:
        """Generate the geometry keys and values of an ArcGISRESTful query."""
        geoutils.check_bbox(bbox)
        query = {"bBox": ",".join(f"{b:.06f}" for b in bbox)}

        return query

    def get_info(self, query: Dict[str, str], expanded: bool = False) -> pd.DataFrame:
        """Get NWIS stations by a list of IDs or within a bounding box.

        Only stations that record(ed) daily streamflow data are returned.
        The following columns are included in the dataframe with expanded
        set to False:

        ==================  ==================================
        Name                Description
        ==================  ==================================
        site_no             Site identification number
        station_nm          Site name
        site_tp_cd          Site type
        dec_lat_va          Decimal latitude
        dec_long_va         Decimal longitude
        coord_acy_cd        Latitude-longitude accuracy
        dec_coord_datum_cd  Decimal Latitude-longitude datum
        alt_va              Altitude of Gage/land surface
        alt_acy_va          Altitude accuracy
        alt_datum_cd        Altitude datum
        huc_cd              Hydrologic unit code
        parm_cd             Parameter code
        stat_cd             Statistical code
        ts_id               Internal timeseries ID
        loc_web_ds          Additional measurement description
        medium_grp_cd       Medium group code
        parm_grp_cd         Parameter group code
        srs_id              SRS ID
        access_cd           Access code
        begin_date          Begin date
        end_date            End date
        count_nu            Record count
        hcdn_2009           Whether is in HCDN-2009 stations
        ==================  ==================================

        Parameters
        ----------
        query : dict
            A dictionary containing query by IDs or BBOX. Use ``query_byid`` or ``query_bbox``
            class methods to generate the queries.
        expanded : bool, optional
            Whether to get expanded sit information for example drainage area.

        Returns
        -------
        pandas.DataFrame
            NWIS stations
        """
        if not isinstance(query, dict):
            raise InvalidInputType("query", "dict")

        output_type = [{"outputDataTypeCd": "dv"}]
        if expanded:
            output_type.append({"siteOutput": "expanded"})

        site_list = []
        for t in output_type:
            payload = {
                **query,
                **t,
                "format": "rdb",
                "parameterCd": "00060",
                "siteStatus": "all",
                "hasDataTypeCd": "dv",
            }

            resp = self.session.post(f"{self.url}/site", payload).text.split("\n")

            r_list = [txt.split("\t") for txt in resp if "#" not in txt]
            r_dict = [dict(zip(r_list[0], st)) for st in r_list[2:]]

            site_list.append(pd.DataFrame.from_dict(r_dict).dropna())

        if expanded:
            sites = pd.merge(
                *site_list, on="site_no", how="outer", suffixes=("", "_overlap")
            ).filter(regex="^(?!.*_overlap)")
        else:
            sites = site_list[0]

        sites = sites.drop(sites[sites.alt_va == ""].index)
        try:
            sites = sites[sites.parm_cd == "00060"]
            sites["begin_date"] = pd.to_datetime(sites["begin_date"])
            sites["end_date"] = pd.to_datetime(sites["end_date"])
        except AttributeError:
            pass

        float_cols = ["dec_lat_va", "dec_long_va", "alt_va", "alt_acy_va"]
        if expanded:
            float_cols += ["drain_area_va", "contrib_drain_area_va"]

        sites[float_cols] = sites[float_cols].apply(lambda x: pd.to_numeric(x, errors="coerce"))

        sites = sites[sites.site_no.apply(len) == 8]

        gii = WaterData("gagesii", DEF_CRS)
        hcdn = gii.byid("staid", sites.site_no.tolist())
        hcdn_dict = hcdn[["staid", "hcdn_2009"]].set_index("staid").hcdn_2009.to_dict()
        sites["hcdn_2009"] = sites.site_no.apply(
            lambda x: len(hcdn_dict[x]) > 0 if x in hcdn_dict.keys() else False
        )

        return sites

    def get_streamflow(
        self, station_ids: Union[List[str], str], dates: Tuple[str, str], mmd: bool = False
    ) -> pd.DataFrame:
        """Get daily streamflow observations from USGS.

        Parameters
        ----------
        station_ids : str, list
            The gage ID(s)  of the USGS station.
        dates : tuple
            Start and end dates as a tuple (start, end).
        mmd : bool
            Convert cms to mm/day based on the contributing drainage area of the stations.

        Returns
        -------
        pandas.DataFrame
            Streamflow data observations in cubic meter per second (cms)
        """
        if not isinstance(station_ids, (str, list)):
            raise InvalidInputType("ids", "str or list")

        station_ids = station_ids if isinstance(station_ids, list) else [station_ids]

        if not isinstance(dates, tuple) or len(dates) != 2:
            raise InvalidInputType("dates", "tuple", "(start, end)")

        start = pd.to_datetime(dates[0])
        end = pd.to_datetime(dates[1])

        siteinfo = self.get_info(self.query_byid(station_ids))
        check_dates = siteinfo.loc[
            (
                (siteinfo.stat_cd == "00003")
                & (start < siteinfo.begin_date)
                & (end > siteinfo.end_date)
            ),
            "site_no",
        ].tolist()
        nas = [s for s in station_ids if s in check_dates]
        if len(nas) > 0:
            raise InvalidInputRange(
                "Daily Mean data unavailable for the specified time "
                + "period for the following stations:\n"
                + ", ".join(nas)
            )

        payload = {
            "format": "json",
            "sites": ",".join(station_ids),
            "startDT": start.strftime("%Y-%m-%d"),
            "endDT": end.strftime("%Y-%m-%d"),
            "parameterCd": "00060",
            "statCd": "00003",
            "siteStatus": "all",
        }

        resp = self.session.post(f"{self.url}/dv", payload)

        time_series = resp.json()["value"]["timeSeries"]
        r_ts = {
            t["sourceInfo"]["siteCode"][0]["value"]: t["values"][0]["value"] for t in time_series
        }

        def to_df(col, dic):
            discharge = pd.DataFrame.from_records(dic, exclude=["qualifiers"], index=["dateTime"])
            discharge.index = pd.to_datetime(discharge.index)
            discharge.columns = [col]
            return discharge

        qobs = pd.concat([to_df(f"USGS-{s}", t) for s, t in r_ts.items()], axis=1)

        # Convert cfs to cms
        qobs = qobs.astype("float64") * 0.028316846592

        if mmd:
            nldi = NLDI()
            basins_dict = {
                f"USGS-{s}": nldi.getfeature_byid("nwissite", f"USGS-{s}", basin=True).geometry
                for s in station_ids
            }
            basins = gpd.GeoDataFrame.from_dict(basins_dict, orient="index")
            basins.columns = ["geometry"]
            basins = basins.set_crs(DEF_CRS)
            eck4 = "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"
            area = basins.to_crs(eck4).area
            ms2mmd = 1000.0 * 24.0 * 3600.0
            qobs = qobs.apply(lambda x: x / area.loc[x.name] * ms2mmd)
        return qobs
Example #2
0
def post_connection_error():
    url = "https://somefailedurl.com"
    s = RetrySession(retries=2)
    s.post(url)