def dropout_rate_per_period(data, rule, window_start=None, window_end=None): """ Parameters ---------- data : pandas.DataFrame or Series or DatetimeIndex rule : pandas Offset string (or what ever the `rule` parameter in pd.Series.resample accepts) window_start, window_end : pd.Timestamp The start and end of the window of interest. If this window is larger than the duration of `data` then gaps will be appended to the front / back as necessary. If this window is shorter than the duration of `data` data will be cropped. Returns ------- pd.Series Index is a regular DatetimeIndex with freq=rule and timezone=data.index.tzinfo Values are the number of dropped in that time period. """ # TODO: this might be a rather nasty hack to fix the circular dependency from nilmtk.preprocessing.electricity.single import reframe_index try: data = data.dropna() except AttributeError: # if data is DatetimeIndex then it has no `dropna()` method pass sample_period_secs = get_sample_period(data) n_expected_samples_per_period = (secs_per_period_alias(rule) / sample_period_secs) if n_expected_samples_per_period < 1.0: raise ValueError('Date period specified by rule is shorter than' ' sample period!') index = _get_index(data) index = reframe_index(index, window_start, window_end) n_samples_per_period = (pd.Series(1, index=index) .resample(rule=rule, how='sum') .fillna(0)) dropout_rate_per_period_ = 1 - (n_samples_per_period / n_expected_samples_per_period) return dropout_rate_per_period_
def get_gap_starts_and_gap_ends(data, max_sample_period, window_start=None, window_end=None): """ Parameters --------- data : pandas.DataFrame or Series or DatetimeIndex max_sample_period : int or float Maximum allowed sample period in seconds. This defines what counts as a 'gap'. window_start, window_end : pd.Timestamp The start and end of the window of interest. If this window is larger than the duration of `data` then gaps will be appended to the front / back as necessary. If this window is shorter than the duration of `data` data will be cropped. Returns ------- gap_starts, gap_ends: DatetimeIndex """ # TODO: this might be a rather nasty hack to fix the circular dependency from nilmtk.preprocessing.electricity.single import reframe_index try: data = data.dropna() except AttributeError: # if data is DatetimeIndex then it has no `dropna()` method pass index = _get_index(data) index = reframe_index(index, window_start, window_end) timedeltas_sec = timedelta64_to_secs(np.diff(index.values)) overlong_timedeltas = timedeltas_sec > max_sample_period gap_starts = index[:-1][overlong_timedeltas] gap_ends = index[1:][overlong_timedeltas] return gap_starts, gap_ends