Example #1
0
def dropout_rate_per_period(data, rule, window_start=None, window_end=None):
    """
    Parameters
    ----------
    data : pandas.DataFrame or Series or DatetimeIndex

    rule : pandas Offset string (or what ever the `rule` parameter in
        pd.Series.resample accepts)

    window_start, window_end : pd.Timestamp
        The start and end of the window of interest.  If this window
        is larger than the duration of `data` then gaps will be
        appended to the front / back as necessary.  If this window
        is shorter than the duration of `data` data will be cropped.
    
    Returns
    -------
    pd.Series
        Index is a regular DatetimeIndex with freq=rule and
        timezone=data.index.tzinfo
        Values are the number of dropped in that time period.
    """
    # TODO: this might be a rather nasty hack to fix the circular dependency
    from nilmtk.preprocessing.electricity.single import reframe_index

    try:
        data = data.dropna()
    except AttributeError:
        # if data is DatetimeIndex then it has no `dropna()` method
        pass
    
    sample_period_secs = get_sample_period(data)
    n_expected_samples_per_period = (secs_per_period_alias(rule) / 
                                     sample_period_secs)
    if n_expected_samples_per_period < 1.0:
        raise ValueError('Date period specified by rule is shorter than'
                         ' sample period!')

    index = _get_index(data)
    index = reframe_index(index, window_start, window_end)
    n_samples_per_period = (pd.Series(1, index=index)
                            .resample(rule=rule, how='sum')
                            .fillna(0))

    dropout_rate_per_period_ = 1 - (n_samples_per_period / 
                                    n_expected_samples_per_period)

    return dropout_rate_per_period_
Example #2
0
def get_gap_starts_and_gap_ends(data, max_sample_period, 
                                window_start=None, window_end=None):
    """
    Parameters
    ---------
    data : pandas.DataFrame or Series or DatetimeIndex

    max_sample_period : int or float
        Maximum allowed sample period in seconds.  This defines what
        counts as a 'gap'.

    window_start, window_end : pd.Timestamp
        The start and end of the window of interest.  If this window
        is larger than the duration of `data` then gaps will be
        appended to the front / back as necessary.  If this window
        is shorter than the duration of `data` data will be cropped.

    Returns
    -------
    gap_starts, gap_ends: DatetimeIndex
    """
    # TODO: this might be a rather nasty hack to fix the circular dependency
    from nilmtk.preprocessing.electricity.single import reframe_index

    try:
        data = data.dropna()
    except AttributeError:
        # if data is DatetimeIndex then it has no `dropna()` method
        pass
    
    index = _get_index(data)
    index = reframe_index(index, window_start, window_end)
    timedeltas_sec = timedelta64_to_secs(np.diff(index.values))
    overlong_timedeltas = timedeltas_sec > max_sample_period
    gap_starts = index[:-1][overlong_timedeltas]
    gap_ends = index[1:][overlong_timedeltas]        

    return gap_starts, gap_ends