def hfd_mean(da: DataArray, datetime_coord: str = None) -> float: """Calculate mean half-flow duration. Mean half-flow date (step on which the cumulative discharge since October 1st reaches half of the annual discharge) [#]_. Parameters ---------- da : DataArray Array of flow values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- float Mean half-flow duration References ---------- .. [#] Court, A.: Measures of streamflow timing. Journal of Geophysical Research (1896-1977), 1962, 67, 4335--4339, doi:10.1029/JZ067i011p04335 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) # determine the date of the first October 1st in the data period first_date = da.coords[datetime_coord][0].values.astype( 'datetime64[s]').astype(datetime) last_date = da.coords[datetime_coord][-1].values.astype( 'datetime64[s]').astype(datetime) if first_date > datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d'): start_date = datetime.strptime(f'{first_date.year + 1}-10-01', '%Y-%m-%d') else: start_date = datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d') end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1) doys = [] while end_date < last_date: # compute cumulative sum for the selected period data = da.sel({datetime_coord: slice(start_date, end_date)}) cs = data.cumsum(skipna=True) # find steps with more cumulative discharge than the half annual sum hf_steps = np.where( ~np.isnan(cs.where(cs > data.sum(skipna=True) / 2).values))[0] # ignore days without discharge if len(hf_steps) > 0: # store the first step in the result array doys.append(hf_steps[0]) start_date += relativedelta(years=1) end_date += relativedelta(years=1) return np.mean(doys)
def runoff_ratio(da: DataArray, prcp: DataArray, datetime_coord: str = None) -> float: """Calculate runoff ratio. Runoff ratio (ratio of mean discharge to mean precipitation) [#]_ (Eq. 2). Parameters ---------- da : DataArray Array of flow values. prcp : DataArray Array of precipitation values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- float Runoff ratio. References ---------- .. [#] Sawicz, K., Wagener, T., Sivapalan, M., Troch, P. A., and Carrillo, G.: Catchment classification: empirical analysis of hydrologic similarity based on catchment function in the eastern USA. Hydrology and Earth System Sciences, 2011, 15, 2895--2911, doi:10.5194/hess-15-2895-2011 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) # rename precip coordinate name (to avoid problems with 'index' or 'date') prcp = prcp.rename({list(prcp.coords.keys())[0]: datetime_coord}) # slice prcp to the same time window as the discharge prcp = prcp.sel({ datetime_coord: slice(da.coords[datetime_coord][0], da.coords[datetime_coord][-1]) }) # calculate runoff ratio value = da.mean() / prcp.mean() return float(value)
def calculate_all_signatures(da: DataArray, prcp: DataArray, datetime_coord: str = None) -> Dict[str, float]: """Calculate all signatures with default values. Parameters ---------- da : DataArray Array of discharge values for which the signatures will be calculated. prcp : DataArray Array of precipitation values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- Dict[str, float] Dictionary with signature names as keys and signature values as values. """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) results = { "high_q_freq": high_q_freq(da, datetime_coord=datetime_coord), "high_q_dur": high_q_dur(da), "low_q_freq": low_q_freq(da, datetime_coord=datetime_coord), "low_q_dur": low_q_dur(da), "zero_q_freq": zero_q_freq(da), "q95": q95(da), "q5": q5(da), "q_mean": q_mean(da), "hfd_mean": hfd_mean(da, datetime_coord=datetime_coord), "baseflow_index": baseflow_index(da)[0], "slope_fdc": slope_fdc(da), "stream_elas": stream_elas(da, prcp, datetime_coord=datetime_coord), "runoff_ratio": runoff_ratio(da, prcp, datetime_coord=datetime_coord) } return results
def calculate_signatures(da: DataArray, signatures: List[str], datetime_coord: str = None, prcp: DataArray = None) -> Dict[str, float]: """Calculate the specified signatures with default values. Parameters ---------- da : DataArray Array of discharge values for which the signatures will be calculated. signatures : List[str] List of names of the signatures to calculate. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. prcp : DataArray, optional Array of precipitation values. Required for signatures 'runoff_ratio' and 'streamflow_elas'. Returns ------- Dict[str, float] Dictionary with signature names as keys and signature values as values. Raises ------ ValueError If a passed signature name does not exist. """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) values = {} for signature in signatures: if signature == "high_q_freq": values["high_q_freq"] = high_q_freq(da, datetime_coord=datetime_coord) elif signature == "high_q_dur": values["high_q_dur"] = high_q_dur(da) elif signature == "low_q_freq": values["low_q_freq"] = low_q_freq(da, datetime_coord=datetime_coord) elif signature == "low_q_dur": values["low_q_dur"] = low_q_dur(da) elif signature == "zero_q_freq": values["zero_q_freq"] = zero_q_freq(da) elif signature == "q95": values["q95"] = q95(da) elif signature == "q5": values["q5"] = q5(da) elif signature == "q_mean": values["q_mean"] = q_mean(da) elif signature == "hfd_mean": values["hfd_mean"] = hfd_mean(da, datetime_coord=datetime_coord) elif signature == "baseflow_index": values["baseflow_index"] = baseflow_index( da, datetime_coord=datetime_coord)[0] elif signature == "slope_fdc": values["slope_fdc"] = slope_fdc(da) elif signature == "runoff_ratio": values["runoff_ratio"] = runoff_ratio( da, prcp, datetime_coord=datetime_coord) elif signature == "stream_elas": values["stream_elas"] = stream_elas(da, prcp, datetime_coord=datetime_coord) else: ValueError(f"Unknown signatures {signature}") return values
def stream_elas(da: DataArray, prcp: DataArray, datetime_coord: str = None) -> float: """Calculate stream elasticity. Streamflow precipitation elasticity (sensitivity of streamflow to changes in precipitation at the annual time scale) [#]_. Parameters ---------- da : DataArray Array of flow values. prcp : DataArray Array of precipitation values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- float Stream elasticity. References ---------- .. [#] Sankarasubramanian, A., Vogel, R. M., and Limbrunner, J. F.: Climate elasticity of streamflow in the United States. Water Resources Research, 2001, 37, 1771--1781, doi:10.1029/2000WR900330 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) # rename precip coordinate name (to avoid problems with 'index' or 'date') prcp = prcp.rename({list(prcp.coords.keys())[0]: datetime_coord}) # slice prcp to the same time window as the discharge prcp = prcp.sel({ datetime_coord: slice(da.coords[datetime_coord][0], da.coords[datetime_coord][-1]) }) # determine the date of the first October 1st in the data period first_date = da.coords[datetime_coord][0].values.astype( 'datetime64[s]').astype(datetime) last_date = da.coords[datetime_coord][-1].values.astype( 'datetime64[s]').astype(datetime) if first_date > datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d'): start_date = datetime.strptime(f'{first_date.year + 1}-10-01', '%Y-%m-%d') else: start_date = datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d') end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1) # mask only valid time steps (only discharge has missing values) idx = (da >= 0) & (~da.isnull()) da = da[idx] prcp = prcp[idx] # calculate long-term means q_mean_total = da.mean() p_mean_total = prcp.mean() values = [] while end_date < last_date: q = da.sel({datetime_coord: slice(start_date, end_date)}) p = prcp.sel({datetime_coord: slice(start_date, end_date)}) val = (q.mean() - q_mean_total) / (p.mean() - p_mean_total) * ( p_mean_total / q_mean_total) values.append(val) start_date += relativedelta(years=1) end_date += relativedelta(years=1) return np.median([float(v) for v in values])
def baseflow_index(da: DataArray, alpha: float = 0.98, warmup: int = 30, n_passes: int = None, datetime_coord: str = None) -> Tuple[float, DataArray]: """Calculate baseflow index. Ratio of mean baseflow to mean discharge [#]_. If `da` contains NaN values, the baseflow is calculated for each consecutive segment of more than `warmup` non-NaN values. Parameters ---------- da : DataArray Array of flow values. alpha : float, optional alpha filter parameter. warmup : int, optional Number of warmup steps. n_passes : int, optional Number of passes (alternating forward and backward) to perform. Should be an odd number. If None, will use 3 for daily and 9 for hourly data and fail for all other input frequencies. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Used to infer the frequency if `n_passes` is None. Returns ------- Tuple[float, DataArray] Baseflow index and baseflow array. The baseflow array contains NaNs wherever no baseflow was calculated due to NaNs in `da`. Raises ------ ValueError If `da` has a frequency other than daily or hourly and `n_passes` is None. References ---------- .. [#] Ladson, T. R., Brown, R., Neal, B., and Nathan, R.: A Standard Approach to Baseflow Separation Using The Lyne and Hollick Filter. Australasian Journal of Water Resources, Taylor & Francis, 2013, 17, 25--34, doi:10.7158/13241583.2013.11465417 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) if n_passes is None: freq = utils.infer_frequency(da[datetime_coord].values) if freq == '1D': n_passes = 3 elif freq == '1H': n_passes = 9 else: raise ValueError( f'For frequencies other than daily or hourly, n_passes must be specified.' ) if n_passes % 2 != 1: warnings.warn( 'n_passes should be an even number. The returned baseflow will be reversed.' ) # call jit compiled function to calculate baseflow bf_index, baseflow = _baseflow_index_jit(da.values, alpha, warmup, n_passes) # parse baseflow as a DataArray using the coordinates of the streamflow array da_baseflow = da.copy() da_baseflow.data = baseflow return bf_index, da_baseflow
def low_q_freq(da: DataArray, datetime_coord: str = None, threshold: float = 0.2) -> float: """Calculate Low-flow frequency. Frequency of low-flow events (<`threshold` times the median flow) [#]_, [#]_ (Table 2). Parameters ---------- da : DataArray Array of flow values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. threshold : float, optional Low-flow threshold. Values below ``threshold * median`` are considered low flows. Returns ------- float Low-flow frequency References ---------- .. [#] Olden, J. D. and Poff, N. L.: Redundancy and the choice of hydrologic indices for characterizing streamflow regimes. River Research and Applications, 2003, 19, 101--121, doi:10.1002/rra.700 .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures. Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) # determine the date of the first January 1st in the data period first_date = da.coords[datetime_coord][0].values.astype( 'datetime64[s]').astype(datetime) last_date = da.coords[datetime_coord][-1].values.astype( 'datetime64[s]').astype(datetime) if first_date == datetime.strptime(f'{first_date.year}-01-01', '%Y-%m-%d'): start_date = first_date else: start_date = datetime.strptime(f'{first_date.year + 1}-01-01', '%Y-%m-%d') # end date of the first full year period end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1) # determine the mean flow over the entire period mean_flow = da.mean(skipna=True) lqfs = [] while end_date < last_date: data = da.sel({datetime_coord: slice(start_date, end_date)}) # number of steps with discharge lower than threshold * median in a one year period n_steps = (data < (threshold * mean_flow)).sum() lqfs.append(float(n_steps)) start_date += relativedelta(years=1) end_date += relativedelta(years=1) return np.mean(lqfs)
def high_q_freq(da: DataArray, datetime_coord: str = None, threshold: float = 9.) -> float: """Calculate high-flow frequency. Frequency of high-flow events (>`threshold` times the median flow) [#]_, [#]_ (Table 2). Parameters ---------- da : DataArray Array of flow values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. threshold : float, optional High-flow threshold. Values larger than ``threshold * median`` are considered high flows. Returns ------- float High-flow frequency References ---------- .. [#] Clausen, B. and Biggs, B. J. F.: Flow variables for ecological studies in temperate streams: groupings based on covariance. Journal of Hydrology, 2000, 237, 184--197, doi:10.1016/S0022-1694(00)00306-1 .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures. Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) # determine the date of the first January 1st in the data period first_date = da.coords[datetime_coord][0].values.astype( 'datetime64[s]').astype(datetime) last_date = da.coords[datetime_coord][-1].values.astype( 'datetime64[s]').astype(datetime) if first_date == datetime.strptime(f'{first_date.year}-01-01', '%Y-%m-%d'): start_date = first_date else: start_date = datetime.strptime(f'{first_date.year + 1}-01-01', '%Y-%m-%d') # end date of the first full year period end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1) # determine the median flow over the entire period median_flow = da.median(skipna=True) hqfs = [] while end_date < last_date: data = da.sel({datetime_coord: slice(start_date, end_date)}) # number of steps with discharge higher than threshold * median in a one year period n_steps = (data > (threshold * median_flow)).sum() hqfs.append(float(n_steps)) start_date += relativedelta(years=1) end_date += relativedelta(years=1) return np.mean(hqfs)
def mean_peak_timing(obs: DataArray, sim: DataArray, window: int = None, resolution: str = '1D', datetime_coord: str = None) -> float: """Mean difference in peak flow timing. Uses scipy.find_peaks to find peaks in the observed time series. Starting with all observed peaks, those with a prominence of less than the standard deviation of the observed time series are discarded. Next, the lowest peaks are subsequently discarded until all remaining peaks have a distance of at least 100 steps. Finally, the corresponding peaks in the simulated time series are searched in a window of size `window` on either side of the observed peaks and the absolute time differences between observed and simulated peaks is calculated. The final metric is the mean absolute time difference across all peaks. For more details, see Appendix of [#]_ Parameters ---------- obs : DataArray Observed time series. sim : DataArray Simulated time series. window : int, optional Size of window to consider on each side of the observed peak for finding the simulated peak. That is, the total window length to find the peak in the simulations is :math:`2 * \\text{window} + 1` centered at the observed peak. The default depends on the temporal resolution, e.g. for a resolution of '1D', a window of 3 is used and for a resolution of '1H' the the window size is 12. resolution : str, optional Temporal resolution of the time series in pandas format, e.g. '1D' for daily and '1H' for hourly. datetime_coord : str, optional Name of datetime coordinate. Tried to infer automatically if not specified. Returns ------- float Mean peak time difference. References ---------- .. [#] Kratzert, F., Klotz, D., Hochreiter, S., and Nearing, G. S.: A note on leveraging synergy in multiple meteorological datasets with deep learning for rainfall-runoff modeling, Hydrol. Earth Syst. Sci. Discuss., https://doi.org/10.5194/hess-2020-221, in review, 2020. """ # verify inputs _validate_inputs(obs, sim) # get time series with only valid observations (scipy's find_peaks doesn't guarantee correctness with NaNs) obs, sim = _mask_valid(obs, sim) # heuristic to get indices of peaks and their corresponding height. peaks, _ = signal.find_peaks(obs.values, distance=100, prominence=np.std(obs.values)) # infer name of datetime index if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(obs) if window is None: # infer a reasonable window size window = max(int(utils.get_frequency_factor('12H', resolution)), 3) # evaluate timing timing_errors = [] for idx in peaks: # skip peaks at the start and end of the sequence and peaks around missing observations # (NaNs that were removed in obs & sim would result in windows that span too much time). if (idx - window < 0) or (idx + window >= len(obs)) or (pd.date_range( obs[idx - window][datetime_coord].values, obs[idx + window][datetime_coord].values, freq=resolution).size != 2 * window + 1): continue # check if the value at idx is a peak (both neighbors must be smaller) if (sim[idx] > sim[idx - 1]) and (sim[idx] > sim[idx + 1]): peak_sim = sim[idx] else: # define peak around idx as the max value inside of the window values = sim[idx - window:idx + window + 1] peak_sim = values[values.argmax()] # get xarray object of qobs peak, for getting the date and calculating the datetime offset peak_obs = obs[idx] # calculate the time difference between the peaks delta = peak_obs.coords[datetime_coord] - peak_sim.coords[ datetime_coord] timing_error = np.abs(delta.values / pd.to_timedelta(resolution)) timing_errors.append(timing_error) return np.mean(timing_errors) if len(timing_errors) > 0 else np.nan