Example #1
0
def hfd_mean(da: DataArray, datetime_coord: str = None) -> float:
    """Calculate mean half-flow duration.

    Mean half-flow date (step on which the cumulative discharge since October 1st
    reaches half of the annual discharge) [#]_.

    Parameters
    ----------
    da : DataArray
        Array of flow values.
    datetime_coord : str, optional
        Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified.

    Returns
    -------
    float
        Mean half-flow duration

    References
    ----------
    .. [#] Court, A.: Measures of streamflow timing. Journal of Geophysical Research (1896-1977), 1962, 67, 4335--4339,
        doi:10.1029/JZ067i011p04335
    """
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(da)

    # determine the date of the first October 1st in the data period
    first_date = da.coords[datetime_coord][0].values.astype(
        'datetime64[s]').astype(datetime)
    last_date = da.coords[datetime_coord][-1].values.astype(
        'datetime64[s]').astype(datetime)

    if first_date > datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d'):
        start_date = datetime.strptime(f'{first_date.year + 1}-10-01',
                                       '%Y-%m-%d')
    else:
        start_date = datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d')

    end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1)

    doys = []
    while end_date < last_date:

        # compute cumulative sum for the selected period
        data = da.sel({datetime_coord: slice(start_date, end_date)})
        cs = data.cumsum(skipna=True)

        # find steps with more cumulative discharge than the half annual sum
        hf_steps = np.where(
            ~np.isnan(cs.where(cs > data.sum(skipna=True) / 2).values))[0]

        # ignore days without discharge
        if len(hf_steps) > 0:
            # store the first step in the result array
            doys.append(hf_steps[0])

        start_date += relativedelta(years=1)
        end_date += relativedelta(years=1)

    return np.mean(doys)
Example #2
0
def runoff_ratio(da: DataArray,
                 prcp: DataArray,
                 datetime_coord: str = None) -> float:
    """Calculate runoff ratio.

    Runoff ratio (ratio of mean discharge to mean precipitation) [#]_ (Eq. 2).

    Parameters
    ----------
    da : DataArray
        Array of flow values.
    prcp : DataArray
        Array of precipitation values.
    datetime_coord : str, optional
        Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified.

    Returns
    -------
    float
        Runoff ratio.

    References
    ----------
    .. [#] Sawicz, K., Wagener, T., Sivapalan, M., Troch, P. A., and Carrillo, G.: Catchment classification: empirical
        analysis of hydrologic similarity based on catchment function in the eastern USA.
        Hydrology and Earth System Sciences, 2011, 15, 2895--2911, doi:10.5194/hess-15-2895-2011
    """
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(da)

    # rename precip coordinate name (to avoid problems with 'index' or 'date')
    prcp = prcp.rename({list(prcp.coords.keys())[0]: datetime_coord})

    # slice prcp to the same time window as the discharge
    prcp = prcp.sel({
        datetime_coord:
        slice(da.coords[datetime_coord][0], da.coords[datetime_coord][-1])
    })

    # calculate runoff ratio
    value = da.mean() / prcp.mean()

    return float(value)
Example #3
0
def calculate_all_signatures(da: DataArray,
                             prcp: DataArray,
                             datetime_coord: str = None) -> Dict[str, float]:
    """Calculate all signatures with default values.

    Parameters
    ----------
    da : DataArray
        Array of discharge values for which the signatures will be calculated.
    prcp : DataArray
        Array of precipitation values.
    datetime_coord : str, optional
        Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified.

    Returns
    -------
    Dict[str, float]
        Dictionary with signature names as keys and signature values as values.
    """
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(da)

    results = {
        "high_q_freq": high_q_freq(da, datetime_coord=datetime_coord),
        "high_q_dur": high_q_dur(da),
        "low_q_freq": low_q_freq(da, datetime_coord=datetime_coord),
        "low_q_dur": low_q_dur(da),
        "zero_q_freq": zero_q_freq(da),
        "q95": q95(da),
        "q5": q5(da),
        "q_mean": q_mean(da),
        "hfd_mean": hfd_mean(da, datetime_coord=datetime_coord),
        "baseflow_index": baseflow_index(da)[0],
        "slope_fdc": slope_fdc(da),
        "stream_elas": stream_elas(da, prcp, datetime_coord=datetime_coord),
        "runoff_ratio": runoff_ratio(da, prcp, datetime_coord=datetime_coord)
    }
    return results
Example #4
0
def calculate_signatures(da: DataArray,
                         signatures: List[str],
                         datetime_coord: str = None,
                         prcp: DataArray = None) -> Dict[str, float]:
    """Calculate the specified signatures with default values.

    Parameters
    ----------
    da : DataArray
        Array of discharge values for which the signatures will be calculated.
    signatures : List[str]
        List of names of the signatures to calculate.
    datetime_coord : str, optional
        Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified.
    prcp : DataArray, optional
        Array of precipitation values. Required for signatures 'runoff_ratio' and 'streamflow_elas'.

    Returns
    -------
    Dict[str, float]
        Dictionary with signature names as keys and signature values as values.

    Raises
    ------
    ValueError
        If a passed signature name does not exist.
    """
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(da)

    values = {}
    for signature in signatures:
        if signature == "high_q_freq":
            values["high_q_freq"] = high_q_freq(da,
                                                datetime_coord=datetime_coord)
        elif signature == "high_q_dur":
            values["high_q_dur"] = high_q_dur(da)
        elif signature == "low_q_freq":
            values["low_q_freq"] = low_q_freq(da,
                                              datetime_coord=datetime_coord)
        elif signature == "low_q_dur":
            values["low_q_dur"] = low_q_dur(da)
        elif signature == "zero_q_freq":
            values["zero_q_freq"] = zero_q_freq(da)
        elif signature == "q95":
            values["q95"] = q95(da)
        elif signature == "q5":
            values["q5"] = q5(da)
        elif signature == "q_mean":
            values["q_mean"] = q_mean(da)
        elif signature == "hfd_mean":
            values["hfd_mean"] = hfd_mean(da, datetime_coord=datetime_coord)
        elif signature == "baseflow_index":
            values["baseflow_index"] = baseflow_index(
                da, datetime_coord=datetime_coord)[0]
        elif signature == "slope_fdc":
            values["slope_fdc"] = slope_fdc(da)
        elif signature == "runoff_ratio":
            values["runoff_ratio"] = runoff_ratio(
                da, prcp, datetime_coord=datetime_coord)
        elif signature == "stream_elas":
            values["stream_elas"] = stream_elas(da,
                                                prcp,
                                                datetime_coord=datetime_coord)
        else:
            ValueError(f"Unknown signatures {signature}")
    return values
Example #5
0
def stream_elas(da: DataArray,
                prcp: DataArray,
                datetime_coord: str = None) -> float:
    """Calculate stream elasticity.

    Streamflow precipitation elasticity (sensitivity of streamflow to changes in precipitation at
    the annual time scale) [#]_.

    Parameters
    ----------
    da : DataArray
        Array of flow values.
    prcp : DataArray
        Array of precipitation values.
    datetime_coord : str, optional
        Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified.

    Returns
    -------
    float
        Stream elasticity.

    References
    ----------
    .. [#] Sankarasubramanian, A., Vogel, R. M., and Limbrunner, J. F.: Climate elasticity of streamflow in the
        United States. Water Resources Research, 2001, 37, 1771--1781, doi:10.1029/2000WR900330
    """
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(da)

    # rename precip coordinate name (to avoid problems with 'index' or 'date')
    prcp = prcp.rename({list(prcp.coords.keys())[0]: datetime_coord})

    # slice prcp to the same time window as the discharge
    prcp = prcp.sel({
        datetime_coord:
        slice(da.coords[datetime_coord][0], da.coords[datetime_coord][-1])
    })

    # determine the date of the first October 1st in the data period
    first_date = da.coords[datetime_coord][0].values.astype(
        'datetime64[s]').astype(datetime)
    last_date = da.coords[datetime_coord][-1].values.astype(
        'datetime64[s]').astype(datetime)

    if first_date > datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d'):
        start_date = datetime.strptime(f'{first_date.year + 1}-10-01',
                                       '%Y-%m-%d')
    else:
        start_date = datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d')

    end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1)

    # mask only valid time steps (only discharge has missing values)
    idx = (da >= 0) & (~da.isnull())
    da = da[idx]
    prcp = prcp[idx]

    # calculate long-term means
    q_mean_total = da.mean()
    p_mean_total = prcp.mean()

    values = []
    while end_date < last_date:
        q = da.sel({datetime_coord: slice(start_date, end_date)})
        p = prcp.sel({datetime_coord: slice(start_date, end_date)})

        val = (q.mean() - q_mean_total) / (p.mean() - p_mean_total) * (
            p_mean_total / q_mean_total)
        values.append(val)

        start_date += relativedelta(years=1)
        end_date += relativedelta(years=1)

    return np.median([float(v) for v in values])
Example #6
0
def baseflow_index(da: DataArray,
                   alpha: float = 0.98,
                   warmup: int = 30,
                   n_passes: int = None,
                   datetime_coord: str = None) -> Tuple[float, DataArray]:
    """Calculate baseflow index.

    Ratio of mean baseflow to mean discharge [#]_. If `da` contains NaN values, the baseflow is calculated for each
    consecutive segment of more than `warmup` non-NaN values.

    Parameters
    ----------
    da : DataArray
        Array of flow values.
    alpha : float, optional
        alpha filter parameter.
    warmup : int, optional
        Number of warmup steps.
    n_passes : int, optional
        Number of passes (alternating forward and backward) to perform. Should be an odd number. If None, will use
        3 for daily and 9 for hourly data and fail for all other input frequencies.
    datetime_coord : str, optional
        Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Used to infer the 
        frequency if `n_passes` is None.

    Returns
    -------
    Tuple[float, DataArray]
        Baseflow index and baseflow array. The baseflow array contains NaNs wherever no baseflow was
        calculated due to NaNs in `da`.

    Raises
    ------
    ValueError
        If `da` has a frequency other than daily or hourly and `n_passes` is None.

    References
    ----------
    .. [#] Ladson, T. R., Brown, R., Neal, B., and Nathan, R.: A Standard Approach to Baseflow Separation Using The
        Lyne and Hollick Filter. Australasian Journal of Water Resources, Taylor & Francis, 2013, 17, 25--34,
        doi:10.7158/13241583.2013.11465417
    """
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(da)

    if n_passes is None:
        freq = utils.infer_frequency(da[datetime_coord].values)
        if freq == '1D':
            n_passes = 3
        elif freq == '1H':
            n_passes = 9
        else:
            raise ValueError(
                f'For frequencies other than daily or hourly, n_passes must be specified.'
            )
    if n_passes % 2 != 1:
        warnings.warn(
            'n_passes should be an even number. The returned baseflow will be reversed.'
        )

    # call jit compiled function to calculate baseflow
    bf_index, baseflow = _baseflow_index_jit(da.values, alpha, warmup,
                                             n_passes)

    # parse baseflow as a DataArray using the coordinates of the streamflow array
    da_baseflow = da.copy()
    da_baseflow.data = baseflow

    return bf_index, da_baseflow
Example #7
0
def low_q_freq(da: DataArray,
               datetime_coord: str = None,
               threshold: float = 0.2) -> float:
    """Calculate Low-flow frequency.

    Frequency of low-flow events (<`threshold` times the median flow) [#]_, [#]_ (Table 2).

    Parameters
    ----------
    da : DataArray
        Array of flow values.
    datetime_coord : str, optional
        Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified.
    threshold : float, optional
        Low-flow threshold. Values below ``threshold * median`` are considered low flows.

    Returns
    -------
    float
        Low-flow frequency

    References
    ----------
    .. [#] Olden, J. D. and Poff, N. L.: Redundancy and the choice of hydrologic indices for characterizing streamflow
        regimes. River Research and Applications, 2003, 19, 101--121, doi:10.1002/rra.700
    .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures.
        Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015
    """
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(da)

    # determine the date of the first January 1st in the data period
    first_date = da.coords[datetime_coord][0].values.astype(
        'datetime64[s]').astype(datetime)
    last_date = da.coords[datetime_coord][-1].values.astype(
        'datetime64[s]').astype(datetime)

    if first_date == datetime.strptime(f'{first_date.year}-01-01', '%Y-%m-%d'):
        start_date = first_date
    else:
        start_date = datetime.strptime(f'{first_date.year + 1}-01-01',
                                       '%Y-%m-%d')

    # end date of the first full year period
    end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1)

    # determine the mean flow over the entire period
    mean_flow = da.mean(skipna=True)

    lqfs = []
    while end_date < last_date:

        data = da.sel({datetime_coord: slice(start_date, end_date)})

        # number of steps with discharge lower than threshold * median in a one year period
        n_steps = (data < (threshold * mean_flow)).sum()

        lqfs.append(float(n_steps))

        start_date += relativedelta(years=1)
        end_date += relativedelta(years=1)

    return np.mean(lqfs)
Example #8
0
def high_q_freq(da: DataArray,
                datetime_coord: str = None,
                threshold: float = 9.) -> float:
    """Calculate high-flow frequency.

    Frequency of high-flow events (>`threshold` times the median flow) [#]_, [#]_ (Table 2).

    Parameters
    ----------
    da : DataArray
        Array of flow values.
    datetime_coord : str, optional
        Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified.
    threshold : float, optional
        High-flow threshold. Values larger than ``threshold * median`` are considered high flows.

    Returns
    -------
    float
        High-flow frequency

    References
    ----------
    .. [#] Clausen, B. and Biggs, B. J. F.: Flow variables for ecological studies in temperate streams: groupings based
        on covariance. Journal of Hydrology, 2000, 237, 184--197, doi:10.1016/S0022-1694(00)00306-1
    .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures.
        Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015
    """
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(da)

    # determine the date of the first January 1st in the data period
    first_date = da.coords[datetime_coord][0].values.astype(
        'datetime64[s]').astype(datetime)
    last_date = da.coords[datetime_coord][-1].values.astype(
        'datetime64[s]').astype(datetime)

    if first_date == datetime.strptime(f'{first_date.year}-01-01', '%Y-%m-%d'):
        start_date = first_date
    else:
        start_date = datetime.strptime(f'{first_date.year + 1}-01-01',
                                       '%Y-%m-%d')

    # end date of the first full year period
    end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1)

    # determine the median flow over the entire period
    median_flow = da.median(skipna=True)

    hqfs = []
    while end_date < last_date:

        data = da.sel({datetime_coord: slice(start_date, end_date)})

        # number of steps with discharge higher than threshold * median in a one year period
        n_steps = (data > (threshold * median_flow)).sum()

        hqfs.append(float(n_steps))

        start_date += relativedelta(years=1)
        end_date += relativedelta(years=1)

    return np.mean(hqfs)
Example #9
0
def mean_peak_timing(obs: DataArray,
                     sim: DataArray,
                     window: int = None,
                     resolution: str = '1D',
                     datetime_coord: str = None) -> float:
    """Mean difference in peak flow timing.
    
    Uses scipy.find_peaks to find peaks in the observed time series. Starting with all observed peaks, those with a
    prominence of less than the standard deviation of the observed time series are discarded. Next, the lowest peaks
    are subsequently discarded until all remaining peaks have a distance of at least 100 steps. Finally, the
    corresponding peaks in the simulated time series are searched in a window of size `window` on either side of the
    observed peaks and the absolute time differences between observed and simulated peaks is calculated.
    The final metric is the mean absolute time difference across all peaks. For more details, see Appendix of [#]_
    
    Parameters
    ----------
    obs : DataArray
        Observed time series.
    sim : DataArray
        Simulated time series.
    window : int, optional
        Size of window to consider on each side of the observed peak for finding the simulated peak. That is, the total
        window length to find the peak in the simulations is :math:`2 * \\text{window} + 1` centered at the observed
        peak. The default depends on the temporal resolution, e.g. for a resolution of '1D', a window of 3 is used and 
        for a resolution of '1H' the the window size is 12.
    resolution : str, optional
        Temporal resolution of the time series in pandas format, e.g. '1D' for daily and '1H' for hourly.
    datetime_coord : str, optional
        Name of datetime coordinate. Tried to infer automatically if not specified.
        

    Returns
    -------
    float
        Mean peak time difference.

    References
    ----------
    .. [#] Kratzert, F., Klotz, D., Hochreiter, S., and Nearing, G. S.: A note on leveraging synergy in multiple 
        meteorological datasets with deep learning for rainfall-runoff modeling, Hydrol. Earth Syst. Sci. Discuss., 
        https://doi.org/10.5194/hess-2020-221, in review, 2020. 
    """
    # verify inputs
    _validate_inputs(obs, sim)

    # get time series with only valid observations (scipy's find_peaks doesn't guarantee correctness with NaNs)
    obs, sim = _mask_valid(obs, sim)

    # heuristic to get indices of peaks and their corresponding height.
    peaks, _ = signal.find_peaks(obs.values,
                                 distance=100,
                                 prominence=np.std(obs.values))

    # infer name of datetime index
    if datetime_coord is None:
        datetime_coord = utils.infer_datetime_coord(obs)

    if window is None:
        # infer a reasonable window size
        window = max(int(utils.get_frequency_factor('12H', resolution)), 3)

    # evaluate timing
    timing_errors = []
    for idx in peaks:
        # skip peaks at the start and end of the sequence and peaks around missing observations
        # (NaNs that were removed in obs & sim would result in windows that span too much time).
        if (idx - window < 0) or (idx + window >= len(obs)) or (pd.date_range(
                obs[idx - window][datetime_coord].values,
                obs[idx + window][datetime_coord].values,
                freq=resolution).size != 2 * window + 1):
            continue

        # check if the value at idx is a peak (both neighbors must be smaller)
        if (sim[idx] > sim[idx - 1]) and (sim[idx] > sim[idx + 1]):
            peak_sim = sim[idx]
        else:
            # define peak around idx as the max value inside of the window
            values = sim[idx - window:idx + window + 1]
            peak_sim = values[values.argmax()]

        # get xarray object of qobs peak, for getting the date and calculating the datetime offset
        peak_obs = obs[idx]

        # calculate the time difference between the peaks
        delta = peak_obs.coords[datetime_coord] - peak_sim.coords[
            datetime_coord]

        timing_error = np.abs(delta.values / pd.to_timedelta(resolution))

        timing_errors.append(timing_error)

    return np.mean(timing_errors) if len(timing_errors) > 0 else np.nan