def preprocess_redd(building, freq): building.utility.electric = building.utility.electric.sum_split_supplies() building = prepb.downsample(building, rule=freq) building = prepb.fill_appliance_gaps(building) building = prepb.drop_missing_mains(building) building = prepb.make_common_index(building) building.utility.electric.mains[(1, 1)].rename( columns={Measurement('power', 'apparent'): Measurement('power', 'active')}, inplace=True) building = prepb.filter_top_k_appliances(building, k=6) return building
def preprocess_iawe(building, freq): building.utility.electric = building.utility.electric.sum_split_supplies() building = prepb.filter_out_implausible_values( building, Measurement('voltage', ''), 160, 260) building = prepb.filter_datetime(building, '7-13-2013', '8-4-2013') building = prepb.downsample(building, rule=freq) building = prepb.fill_appliance_gaps(building) building = prepb.prepend_append_zeros( building, '7-13-2013', '8-4-2013', freq, 'Asia/Kolkata') building = prepb.drop_missing_mains(building) building = prepb.make_common_index(building) building = prepb.filter_top_k_appliances(building, k=6) return building
# 1. sum together split mains and DualSupply appliances building.utility.electric = building.utility.electric.sum_split_supplies() # optional. (required for iAWE) remove samples where voltage outside range # Fixing implausible voltage values building = prepb.filter_out_implausible_values( building, Measurement('voltage', ''), 160, 260) # optional. (required for iAWE) Note that this will remove motor as it does not have # any data in this period building = prepb.filter_datetime( building, '7-13-2013', '8-4-2013') # 2. downsample mains, circuits and appliances building = prepb.downsample(building, rule='1T') # 3. Fill large gaps in appliances with zeros and forward-fill small gaps building = prepb.fill_appliance_gaps(building) # optional. (required for iAWE) building = prepb.prepend_append_zeros( building, '7-13-2013', '8-4-2013', '1T', 'Asia/Kolkata') # 4. Drop missing samples from mains building = prepb.drop_missing_mains(building) # TODO: for some datasets (e.g. UKPD), we'll have to find a common index # for the subset of the appliances we want to use. # 5. Find intersection of mains and appliance datetime indicies
def preprocess_pecan(building, freq): building = prepb.downsample(building, rule=freq) building = prepb.filter_top_k_appliances(building, k=6) return building
def proportion_of_time_where_more_energy_submetered( building, min_proportion_submetered=0.7, require_common_indicies=True ): """Report the proportion of time slices where the sum of all the appliances submetered is greater than the mains * `min_proportion_submetered` Parameters ---------- building : Building min_proportion_submetered : float [0,1], optional default = 0.7 require_common_indicies : boolean, optional default = True. Decides what to use for the 'total duration' when calculating the proportion of time. If False then use the total duration between the first and last samples. If True then only use the non-NaN timeslices after finding the intersection of the appliance and mains indicies. Returns ------- float [0,1] proportion of time """ # mask appliance with mains & remove large gaps in mains # (take a look at proportion_of_time_where_more_energy_submetered) # put mains and appliances into one big DF (make sure we take the correct appliances!) # downsample to 10 minute chunks, using mean # # OR.... # ignore large gaps in mains... then... # chop into 10 min chunks and pass these to proportion_of_energy_submetered. # maybe can do this efficiently by putting everything into a big dataframe # and using indicies_of_periods, and then extracting these chunks back # into an Electricity object. building.utility.electric = building.utility.electric.sum_split_supplies() import nilmtk.preprocessing.electricity.building as prepb # downsample mains, circuits and appliances b_downsampled = prepb.downsample(building, "1T") electric = b_downsampled.utility.electric appliance_df = electric.get_dataframe_of_appliances().dropna() mains_df = electric.get_dataframe_of_mains().dropna() common_index = mains_df.index & appliance_df.index appliance_df = appliance_df.ix[common_index] mains_df = mains_df.ix[common_index] appliances_summed = appliance_df.sum(axis=1) timeslices_above_thresh = appliances_summed > (mains_df * min_proportion_submetered) mins_above_thresh = timeslices_above_thresh.sum().values[0] secs_above_thresh = mins_above_thresh * 60 # Calc total duration if require_common_indicies: total_duration_secs = len(common_index) * 60 else: start, end = building.utility.electric.get_start_and_end_dates() total_duration_secs = (end - start).total_seconds() return secs_above_thresh / total_duration_secs