Ejemplo n.º 1
0
def energy_per_dataframe(electricity, sample_period_multiplier=20, unit="kwh"):
    """pre-processes electricity and then gets total energy per channel, 
    after masking out all gaps in mains.

    Returns
    -------
    mains_total_energy, totals_per_appliance

    total_mains_energy : float
    totals_per_appliance : pd.Series
        each key is an ApplianceName
        each value is total energy
    """

    # TODO: this might be an ugly hack to resolve circular dependencies.
    from nilmtk.preprocessing.electricity.building import mask_appliances_with_mains
    from nilmtk.preprocessing.electricity.single import insert_zeros

    # remove 'unmetered' and 'subpanels' from appliances
    electricity.appliances = electricity.remove_channels_from_appliances()

    # Sum split mains and DualSupply appliances
    electricity = electricity.sum_split_supplies()

    # TODO: Select common measurements. Maybe use electricity.select_common_measurements?
    # MEASUREMENT_PREFERENCES = [Measurement('power', 'active'),
    #                            Measurement('power', 'apparent')]

    # # Check if all channels share at least one Measurement (e.g. ('power', 'active'))
    # common_measurements = find_common_measurements(electricity)
    # common_measurement = None
    # for measurement_preference in MEASUREMENT_PREFERENCES:
    #     if measurement_preference in common_measurements:
    #         common_measurement = measurement_preference
    #         print("Using common_measurement:", common_measurement)
    #         break
    # if common_measurements is None and require_common_measurements:
    #     raise NoCommonMeasurementError

    # Find large gaps in mains data and ignore those gaps for all appliance channels
    electricity = mask_appliances_with_mains(electricity, sample_period_multiplier)

    # Drop NaNs on all channels
    electricity = apply_func_to_values_of_dicts(electricity, lambda df: df.dropna(), ["appliances", "mains"])

    # Insert_zeros on appliance data.
    print("Inserting zeros... may take a little while...", end="")
    sys.stdout.flush()
    single_insert_zeros = lambda df: insert_zeros(df, sample_period_multiplier=sample_period_multiplier)
    electricity = apply_func_to_values_of_dicts(electricity, single_insert_zeros, ["appliances", "mains"])
    print("done inserting zeros")

    # Total energy used for mains
    total_mains_energy = get_total_energy_per_dict(electricity, "mains", unit)

    totals_per_appliance = {}
    for name, df in electricity.appliances.iteritems():
        totals_per_appliance[name] = single.energy(df, unit=unit)

    return total_mains_energy, pd.Series(totals_per_appliance)
Ejemplo n.º 2
0
def fill_appliance_gaps(building, sample_period_multiplier=4):
    """Book-ends all large gaps with zeros using
    `nilmtk.preprocessing.electric.single.insert_zeros`
    on all appliances in `building` and then forward fills any remaining NaNs.
    This will result in forward-filling small gaps with
    the recorded value which precedes the gap, and forward-filling zeros
    in large gaps.

    NOTE: This function assumes that any gaps in the appliance data is the
    result of the appliance monitor and the appliance being off.  Do not
    use this function if gaps in appliance data are the result of the
    IAM being broken (and hence the state of the appliance is unknown).

    Parameters
    ----------
    building : nilmtk.Building
    sample_period_multiplier : float or int, optional
        The permissible  maximum sample period expressed as a multiple
        of each dataframe's sample period. Any gap longer
        than the max sample period is assumed to imply that the IAM
        and appliance are off.  If None then will default to
        4 x the sample period of each dataframe.

    Returns
    -------
    building_copy : nilmtk.Building

    See Also
    --------
    nilmtk.preprocessing.electric.single.insert_zeros()
    """

    # TODO: should probably remove any periods where all appliances
    # are not recording (which indicates that things are broken)

    # "book-end" each gap with a zero at each end
    single_insert_zeros = lambda df: single.insert_zeros(
        df, sample_period_multiplier=sample_period_multiplier)

    APPLIANCES = ['utility.electric.appliances']
    new_building = apply_func_to_values_of_dicts(building, single_insert_zeros,
                                                 APPLIANCES)

    # Now fill forward
    ffill = lambda df: pd.DataFrame.fillna(df, method='ffill')
    new_building = apply_func_to_values_of_dicts(new_building, ffill,
                                                 APPLIANCES)

    return new_building
Ejemplo n.º 3
0
def downsample(building, rule='1T', how='mean', dropna=False):
    """Downsample all electrical data

    Parameters
    ----------
    building : nilmtk.Building
    rule : string
        refer to pandas.resample docs for rules; default '1T' or 1 minute
    how : string
        refer to pandas.resample docs for how; default 'mean'
    dropna : boolean, optional
        default = False.  Whether to drop NaNs after resampling.

    Returns
    --------
    building_copy: nilmtk.Building

    """
    # Define a resample function
    if dropna:
        resample = lambda df: pd.DataFrame.resample(
            df, rule=rule, how=how).dropna()
    else:
        resample = lambda df: pd.DataFrame.resample(df, rule=rule, how=how)

    return apply_func_to_values_of_dicts(building, resample,
                                         BUILDING_ELECTRICITY_DICTS)
Ejemplo n.º 4
0
def prepend_append_zeros(building, start_datetime, end_datetime, freq, timezone):
    """Fill zeros from `start` to `appliance`.index[0] and from 
    `appliance`.index[-1] to end at `frequency`"""

    # TODO: can this function be merged with or make use of
    # preprocessing.building.single.reframe_index ?

    APPLIANCES = ['utility.electric.appliances']
    idx = pd.DatetimeIndex(start=start_datetime, end=end_datetime, freq=freq)
    idx = idx.tz_localize('GMT').tz_convert(timezone)

    def reindex_fill_na(df):
        df_copy = deepcopy(df)
        df_copy = df_copy.reindex(idx)

        power_columns = [
            x for x in df.columns if x.physical_quantity in ['power']]
        non_power_columns = [x for x in df.columns if x not in power_columns]

        for power in power_columns:
            df_copy[power].fillna(0, inplace=True)
        for measurement in non_power_columns:
            df_copy[measurement].fillna(
                df[measurement].median(), inplace=True)

        return df_copy

    new_building = apply_func_to_values_of_dicts(building, reindex_fill_na,
                                                 APPLIANCES)
    return new_building
Ejemplo n.º 5
0
def mask_appliances_with_mains(electricity, sample_period_multiplier=4):
    """Finds gaps in first mains channel and then removes 
    these gaps from all appliance data. 

    The assumption is that if the mains channel is dead for any
    timeslice then we should ignore this timeslice for all appliance
    channels too.

    Parameters
    ----------
    electricity : Electricity object

    sample_period_multiplier : int, optional
        Default = 4
        max_sample_period = sample_period x sample_period_multiplier
        max_sample_period defines a 'gap'.
    
    Returns
    -------
    copy of electricity
    
    .. warning:: currently only uses gaps from first mains dataframe and ignores
                 all other mains dataframes.

    """

    # TODO: handle multiple mains channels and take intersection of gaps

    print("Masking appliances with mains... may take a little while...", end='')
    sys.stdout.flush()
    mains = electricity.mains.values()[0]
    max_sample_period = get_sample_period(mains) * sample_period_multiplier
    print("Mains sample period = {:.1f}, max_sample_period = {:.1f}"
          .format(get_sample_period(mains), max_sample_period))
    print("Getting gap starts and ends...")
    gap_starts, gap_ends = get_gap_starts_and_gap_ends(mains, max_sample_period)
    print("Found {:d} gap starts and {:d} gap ends.".format(len(gap_starts), len(gap_ends)))

    def mask_appliances(appliance_df):
        """For each appliance dataframe, insert NaNs for any reading inside
        mains gaps.
        """
        print(".", end='')
        sys.stdout.flush()
        for gap_start, gap_end in zip(gap_starts, gap_ends):
            index = appliance_df.index
            try:
                appliance_df[(index >= gap_start) & (index <= gap_end)] = np.NaN
            except ValueError:
                # some DFs are int32, which can't accept NaNs, so convert to float32:
                # TODO: remove this once #105 is fixed
                appliance_df = appliance_df.astype(np.float32)
                appliance_df[(index >= gap_start) & (index <= gap_end)] = np.NaN
        return appliance_df
    
    masked = apply_func_to_values_of_dicts(electricity, mask_appliances, 
                                           ['appliances'])
    print("done")
    return masked
Ejemplo n.º 6
0
def make_common_index(building):
    building_copy = deepcopy(building)
    appliances_index = building.utility.electric.appliances.values()[0].index
    mains_index = building.utility.electric.mains.values()[0].index
    freq = building.utility.electric.mains.values()[0].index.freq
    # TODO: can the line below be replace with
    # common_index = mains_index & appliances_index
    # This might be a lot faster and as far as I can tell gives the same
    # answer.
    common_index = pd.DatetimeIndex(
        np.sort(list(set(mains_index).intersection(set(appliances_index)))),
        freq=freq)
    take_common_index = lambda df: df.ix[common_index]
    return apply_func_to_values_of_dicts(building, take_common_index,
                                         BUILDING_ELECTRICITY_DICTS)
Ejemplo n.º 7
0
def drop_missing_mains(building):
    MAINS = ['utility.electric.mains']
    return apply_func_to_values_of_dicts(
        building, lambda df: df.dropna(),
        MAINS)