예제 #1
0
def mask_appliances_with_mains(electricity, sample_period_multiplier=4):
    """Finds gaps in first mains channel and then removes 
    these gaps from all appliance data. 

    The assumption is that if the mains channel is dead for any
    timeslice then we should ignore this timeslice for all appliance
    channels too.

    Parameters
    ----------
    electricity : Electricity object

    sample_period_multiplier : int, optional
        Default = 4
        max_sample_period = sample_period x sample_period_multiplier
        max_sample_period defines a 'gap'.
    
    Returns
    -------
    copy of electricity
    
    .. warning:: currently only uses gaps from first mains dataframe and ignores
                 all other mains dataframes.

    """

    # TODO: handle multiple mains channels and take intersection of gaps

    print("Masking appliances with mains... may take a little while...", end='')
    sys.stdout.flush()
    mains = electricity.mains.values()[0]
    max_sample_period = get_sample_period(mains) * sample_period_multiplier
    print("Mains sample period = {:.1f}, max_sample_period = {:.1f}"
          .format(get_sample_period(mains), max_sample_period))
    print("Getting gap starts and ends...")
    gap_starts, gap_ends = get_gap_starts_and_gap_ends(mains, max_sample_period)
    print("Found {:d} gap starts and {:d} gap ends.".format(len(gap_starts), len(gap_ends)))

    def mask_appliances(appliance_df):
        """For each appliance dataframe, insert NaNs for any reading inside
        mains gaps.
        """
        print(".", end='')
        sys.stdout.flush()
        for gap_start, gap_end in zip(gap_starts, gap_ends):
            index = appliance_df.index
            try:
                appliance_df[(index >= gap_start) & (index <= gap_end)] = np.NaN
            except ValueError:
                # some DFs are int32, which can't accept NaNs, so convert to float32:
                # TODO: remove this once #105 is fixed
                appliance_df = appliance_df.astype(np.float32)
                appliance_df[(index >= gap_start) & (index <= gap_end)] = np.NaN
        return appliance_df
    
    masked = apply_func_to_values_of_dicts(electricity, mask_appliances, 
                                           ['appliances'])
    print("done")
    return masked
예제 #2
0
    def train(self, building, aggregate='mains', submetered='appliances',
              disagg_features=[Measurement('power', 'active')],
              environmental=None):
        """Train using 1d FHMM. Places the learnt model in `model` attribute
        """

         # Get a dataframe of appliances; Since the algorithm is 1D, we need
        # only the first Measurement
        train_appliances = building.utility.electric.get_dataframe_of_appliances(
            measurement=disagg_features[0])

        train_mains = building.utility.electric.get_dataframe_of_mains(
            measurement=disagg_features[0])

        # Setting frequency
        self.freq = str(int(get_sample_period(train_mains.index))) + 's'

        learnt_model = OrderedDict()
        for appliance in train_appliances:
            print(appliance)
            learnt_model[appliance] = hmm.GaussianHMM(
                2, "full")

            # Data to fit
            X = []

            # Breaking data into contiguous blocks
            for start, end in contiguous_blocks(train_mains.index):
                #print(start, end)
                length = train_appliances[appliance][start:end].values.size
                # print(length)
                # Ignore small sequences
                if length > 50:
                    temp = train_appliances[appliance][
                        start:end].values.reshape(length, 1)
                    X.append(temp)
            # print(X)
            # Fit
            learnt_model[appliance].fit(X)

        # Combining to make a AFHMM
        new_learnt_models = OrderedDict()
        for appliance in learnt_model:
            startprob, means, covars, transmat = sort_learnt_parameters(
                learnt_model[appliance].startprob_, learnt_model[appliance].means_, learnt_model[appliance].covars_, learnt_model[appliance].transmat_)
            new_learnt_models[appliance] = hmm.GaussianHMM(
                startprob.size, "full", startprob, transmat)
            new_learnt_models[appliance].means_ = means
            new_learnt_models[appliance].covars_ = covars

        learnt_model_combined = create_combined_hmm(new_learnt_models)
        self.individual = new_learnt_models
        self.model = learnt_model_combined
예제 #3
0
파일: single.py 프로젝트: kanudutta/nilmtk
def contiguous_blocks(datetimeindex):
    sample_period = get_sample_period(datetimeindex)
    time_delta = timedelta64_to_secs(np.diff(datetimeindex.values))
    breaks = time_delta > sample_period
    if np.sum(breaks) == 0:
        # All contiguous data
        contiguous_time_tuples = [(datetimeindex[0], datetimeindex[-1])]
    # Data has breaks
    else:
        break_indices_int = np.where(breaks)[0]
        contiguous_time_tuples = []
        start = 0
        for end in break_indices_int:
            contiguous_time_tuples.append((datetimeindex[start], datetimeindex[end]))
            start = end + 1
        # Appending last block
        contiguous_time_tuples.append((datetimeindex[start], datetimeindex[-1]))
    return contiguous_time_tuples
예제 #4
0
파일: single.py 프로젝트: kanudutta/nilmtk
def insert_zeros(single_appliance_dataframe, sample_period_multiplier=4, round_sample_period=True):
    """Find all gaps in `single_appliance_dataframe` longer than
    `max_sample_period` and insert a zero 1 sample period after
    the start of the gap and insert a second zero 1 sample period
    before the end of the gap.

    In other words: "book-end" the gap with a zero at each end.

    Zeros are only inserted at the start of the gap if the gap
    starts with a reading above zero; and likewise for insertion
    of zeros at the end of the gap.

    Note that this function does not fill the entire gap with zeros,
    if you want that then try pandas.DataFrame.fillna

    What is `insert_zeros` useful for?

    There are two possible reasons for lost samples in individual
    appliance data: 

    1) a broken IAM (hence we do not have any information about the appliance)
    2) the IAM and appliance have been unplugged (hence we can infer that the
       appliance is off)

    Only the user who can decide which of these two assumptions best
    fits their data.  insert_zeros is applicable only in case 2.

    For example, say a hoover's IAM is permanently attached to the
    hoover's power cord, even when the hoover is unplugged and put
    away in the cupboard.

    Say the hoover was switched on when both the hoover and the
    hoover's IAM were unplugged.  This would result in the dataset
    having a gap immediately after an on-segment.  This combination of
    an on-segment followed (without any zeros) by a gap might confuse
    downstream statistics and disaggregation functions which assume
    that the power drawn by an appliance between reading[i] and
    reading[i+1] is held constant at reading[i] watts.

    TODO: a smarter version of this function might use information from
    the aggregate data to do a better job of estimating exactly when
    the appliance was turned off.

    Parameters
    ----------
    single_appliance_dataframe : pandas.DataFrame
        Data from a single appliance.

    max_sample_period : float or int, optional

    sample_period_multiplier : float or int, optional 
        default = 4.  Must be 4 or larger (to ensure we do not add zeros
        less than sample_period seconds apart).
        max_sample_period = sample_period x sample_period_multiplier.
        max_sample_period is the maximum permissible sample period (in
        seconds). Any gap longer than `max_sample_period` is assumed
        to imply that the IAM and appliance are off.

    round_sample_period : bool, optional
        default = True. Whether or not to round sample_period to the 
        nearest int.

    Returns
    -------
    df_with_zeros : pandas.DataFrame
        A copy of `single_appliance_dataframe` with zeros inserted 
        `max_sample_period` seconds after the last sample of each on-segment.

    """
    sample_period = get_sample_period(single_appliance_dataframe)
    if round_sample_period:
        sample_period = int(round(sample_period))

    max_sample_period = sample_period * sample_period_multiplier

    # Drop NaNs (because we want those to be gaps in the index)
    df = single_appliance_dataframe.dropna()

    # Get the length of time between each pair of consecutive samples. Seconds.
    timedeltas = np.diff(df.index.values) / np.timedelta64(1, "s")
    gaps_mask = timedeltas > max_sample_period
    readings_before_gaps = df[:-1][gaps_mask]
    readings_after_gaps = df[1:][gaps_mask]

    # we only add a 0 if the recorded value just before the gap is > 0
    readings_before_gaps = readings_before_gaps[readings_before_gaps.sum(axis=1) > 0]

    readings_after_gaps = readings_after_gaps[readings_after_gaps.sum(axis=1) > 0]

    # Find dates to insert zeros
    dates_to_insert_zeros_before_gaps = readings_before_gaps.index + pd.DateOffset(seconds=sample_period)

    dates_to_insert_zeros_after_gaps = readings_after_gaps.index - pd.DateOffset(seconds=sample_period)

    dates_to_insert_zeros = dates_to_insert_zeros_before_gaps.append(dates_to_insert_zeros_after_gaps)

    # Columns containing power
    power_columns = []
    non_power_columns = []
    for col in df.columns:
        try:
            physical_quantity = col.physical_quantity
        except AttributeError:  # DualSupply
            physical_quantity = col.measurement.physical_quantity
        if physical_quantity == "power":
            power_columns.append(col)
        else:
            non_power_columns.append(col)

    # Don't insert duplicate indicies
    # TODO: remove this assert when we're confident the code is correct
    assert (dates_to_insert_zeros & df.index).size == 0

    # Create new dataframe of zeros at new indicies ready for insertion
    zeros = pd.DataFrame(data=0, index=dates_to_insert_zeros, columns=power_columns, dtype=np.float32)

    # Check no zeros are closer than sample_period
    # TODO: remove this assert when we're confident the code is correct
    # also remove the sort_index().
    if len(zeros) > 1:
        zeros = zeros.sort_index()
        assert timedelta64_to_secs(np.diff(zeros.index.values).min()) > sample_period

    # Now, take median of non-power columns (like voltage)
    for measurement in non_power_columns:
        zeros[measurement] = single_appliance_dataframe[measurement].median()

    # Insert the dataframe of zeros into the data.
    df_with_zeros = deepcopy(single_appliance_dataframe)
    df_with_zeros = df_with_zeros.append(zeros)
    df_with_zeros = df_with_zeros.sort_index()

    # If input data had a regular frequency then resample
    # because appending turns off the regular frequency.
    original_freq = single_appliance_dataframe.index.freq
    if original_freq is not None:
        df_with_zeros = df_with_zeros.resample(rule=original_freq)

    return df_with_zeros