Beispiel #1
0
 def setUp(self):
     self.c = UsgsRest()
Beispiel #2
0
def get_usgs_data(station_id,
                  start_date,
                  end_date,
                  parameter="00060",
                  cache_dir=None):
    """Get river discharge data from the USGS REST web service.

    See `U.S. Geological Survey Water Services
    <https://waterservices.usgs.gov/>`_ (USGS)

    Parameters
    ----------
    station_id : str
        The station id to get
    start_date : str
        String for start date in the format: 'YYYY-MM-dd', e.g. '1980-01-01'
    end_date : str
        String for start date in the format: 'YYYY-MM-dd', e.g. '2018-12-31'
    parameter : str
        The parameter code to get, e.g. ('00060') discharge, cubic feet per second
    cache_dir : str
        Directory where files retrieved from the web service are cached.
        If set to None then USGS_DATA_HOME env var will be used as cache directory.

    Examples
    --------
    >>> from ewatercycle.observation.usgs import get_usgs_data
    >>> data = get_usgs_data('03109500', '2000-01-01', '2000-12-31', cache_dir='.')
    >>> data
        <xarray.Dataset>
        Dimensions:     (time: 8032)
        Coordinates:
          * time        (time) datetime64[ns] 2000-01-04T05:00:00 ... 2000-12-23T04:00:00
        Data variables:
            Streamflow  (time) float32 8.296758 10.420501 ... 10.647034 11.694747
        Attributes:
            title:      USGS Data from streamflow data
            station:    Little Beaver Creek near East Liverpool OH
            stationid:  03109500
            location:   (40.6758974, -80.5406244)
    """  # noqa: E501
    if cache_dir is None:
        cache_dir = os.environ["USGS_DATA_HOME"]

    # Check if we have the netcdf data
    netcdf = os.path.join(
        cache_dir,
        "USGS_" + station_id + "_" + parameter + "_" + start_date + "_" +
        end_date + ".nc",
    )
    if os.path.exists(netcdf):
        return xr.open_dataset(netcdf)

    # Download the data if needed
    out = os.path.join(
        cache_dir,
        "USGS_" + station_id + "_" + parameter + "_" + start_date + "_" +
        end_date + ".wml",
    )
    if not os.path.exists(out):
        collector = UsgsRest()
        collector.filter(
            start=datetime.strptime(start_date, "%Y-%m-%d"),
            end=datetime.strptime(end_date, "%Y-%m-%d"),
            variables=[parameter],
            features=[station_id],
        )
        data = collector.raw()
        with open(out, "w") as file:
            file.write(data)
        collector.clear()
    else:
        with open(out, "r") as file:
            data = file.read()

    # Convert the raw data to an xarray
    data = WaterML11ToPaegan(data).feature

    # We expect only 1 station
    if len(data.elements) == 0:
        raise ValueError("Data does not contain any station data")
    else:
        station = data.elements[0]

        # Unit conversion from cubic feet to cubic meter per second
        values = np.array(
            [
                float(point.members[0]["value"]) / 35.315
                for point in station.elements
            ],
            dtype=np.float32,
        )
        times = [point.time for point in station.elements]

        attrs = {
            "units": "cubic meters per second",
        }

        # Create the xarray dataset
        ds = xr.Dataset({"streamflow": (["time"], values, attrs)},
                        coords={"time": times})

        # Set some nice attributes
        ds.attrs["title"] = "USGS Data from streamflow data"
        ds.attrs["station"] = station.name
        ds.attrs["stationid"] = station.get_uid()
        ds.attrs["location"] = (station.location.y, station.location.x)

        ds.to_netcdf(netcdf)

        return ds