def get_selected_metergroup(self, appliances, building, end, start, include_mains) -> MeterGroup: """ Gets a MeterGroup with the specified appliances for the given building during the given dates. Args: appliances (List): A list of appliances to read their records. building (int): The building to read the records from. start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". include_mains (bool): True if should include main meters. Returns: A MeterGroup containing the specified appliances. """ start_time = time.time() if TIMING else None self.dataset.set_window(start=start, end=end) elec = self.dataset.buildings[building].elec appliances_with_one_meter = [] appliances_with_more_meters = [] for appliance in appliances: metergroup = elec.select_using_appliances(type=appliances) if len(metergroup.meters) > 1: appliances_with_more_meters.append(appliance) else: appliances_with_one_meter.append(appliance) special_metergroup = None for appliance in appliances_with_more_meters: inst = 1 if appliance == 'sockets' and building == 3: inst = 4 if special_metergroup is None: special_metergroup = elec.select_using_appliances(type=appliance, instance=inst) else: special_metergroup = special_metergroup.union(elec.select_using_appliances(type=appliance, instance=1)) selected_metergroup = elec.select_using_appliances(type=appliances_with_one_meter) selected_metergroup = selected_metergroup.union(special_metergroup) if include_mains: mains_meter = self.dataset.buildings[building].elec.mains() if isinstance(mains_meter, MeterGroup): if len(mains_meter.meters) > 1: mains_meter = mains_meter.meters[0] mains_metergroup = MeterGroup(meters=[mains_meter]) else: mains_metergroup = mains_meter else: mains_metergroup = MeterGroup(meters=[mains_meter]) selected_metergroup = selected_metergroup.union(mains_metergroup) timing('NILMTK select using appliances: {}'.format(round(time.time() - start_time, 2))) return selected_metergroup
def test_select(self): fridge_meter = ElecMeter() fridge = Appliance({'type': 'fridge', 'instance': 1}) fridge_meter.appliances = [fridge] mg = MeterGroup([fridge_meter]) self.assertEqual(mg.select_using_appliances(category='cold'), mg)
def test_getitem(self): fridge_meter = ElecMeter() fridge = Appliance({'type': 'fridge', 'instance': 1}) fridge_meter.appliances = [fridge] mg = MeterGroup([fridge_meter]) # test good keys for key in [ 'fridge', ('fridge', 1), { 'type': 'fridge' }, { 'type': 'fridge', 'instance': 1 } ]: self.assertEqual(mg[key], fridge_meter) # test bad key values for key in [ 'foo', ('foo', 2), ('fridge', 2), { 'type': 'fridge', 'instance': -12 } ]: with self.assertRaises(KeyError): mg[key] # test bad key types for key in [True, False, ['fridge']]: with self.assertRaises(TypeError): mg[key]
def fraction_energy_assigned_correctly_nan(predictions, ground_truth): predictions_submeters = MeterGroup(meters=predictions.submeters().meters) ground_truth_submeters = MeterGroup(meters=ground_truth.submeters().meters) fraction_per_meter_predictions = predictions_submeters.fraction_per_meter() fraction_per_meter_ground_truth = ground_truth_submeters.fraction_per_meter( ) fraction_per_meter_ground_truth.index = fraction_per_meter_ground_truth.index.map( lambda meter: meter.instance) fraction_per_meter_predictions.index = fraction_per_meter_predictions.index.map( lambda meter: meter.instance) fraction = 0 for meter_instance in predictions_submeters.instance(): #if math.isnan(fraction_per_meter_ground_truth[meter_instance]) == False: fraction += min(fraction_per_meter_ground_truth[meter_instance], fraction_per_meter_predictions[meter_instance]) return fraction
def test_dual_supply(self): elec_meters = { 1: { 'data_location': '/building1/elec/meter1', 'device_model': 'Energy Meter' }, 2: { 'data_location': '/building1/elec/meter1', 'device_model': 'Energy Meter' }, 3: { 'data_location': '/building1/elec/meter1', 'device_model': 'Energy Meter' } } appliances = [{ 'type': 'washer dryer', 'instance': 1, 'meters': [1, 2] }, { 'type': 'fridge', 'instance': 1, 'meters': [3] }] mg = MeterGroup() mg.load(self.datastore, elec_meters, appliances, BuildingID(1, 'REDD')) self.assertEqual(mg['washer dryer'].total_energy()['active'], mg['fridge'].total_energy()['active'] * 2) self.assertIsInstance(mg['washer dryer'], MeterGroup) self.assertIsInstance(mg['fridge'], ElecMeter)
def test_proportion_of_energy_submetered(self): meters = [] for i in [1, 2, 3]: meter_meta = self.datastore.load_metadata( 'building1')['elec_meters'][i] meter_id = ElecMeterID(i, 1, 'REDD') meter = ElecMeter(self.datastore, meter_meta, meter_id) meters.append(meter) mains = meters[0] mg = MeterGroup(meters) self.assertEqual(mg.proportion_of_energy_submetered(), 1.0)
def test_wiring_graph(self): meter1 = ElecMeter(metadata={'site_meter': True}, meter_id=ElecMeterID(1,1,'REDD')) meter2 = ElecMeter(metadata={'submeter_of': 1}, meter_id=ElecMeterID(2,1,'REDD')) meter3 = ElecMeter(metadata={'submeter_of': 2}, meter_id=ElecMeterID(3,1,'REDD')) mg = MeterGroup([meter1, meter2, meter3]) wiring_graph = mg.wiring_graph() self.assertIs(mg.mains(), meter1) self.assertEqual(mg.meters_directly_downstream_of_mains().meters, [meter2]) self.assertEqual(wiring_graph.nodes(), [meter2, meter3, meter1])
def create_group(): nilm.create_nilm_model("FHMM") #"CombOpt") device_family = [] device_family.append(nilm.select_appliances_by_type("fridge")[0]) device_family.append(nilm.select_appliances_by_type("washing machine")[0]) device_family.append(nilm.select_appliances_by_type("dish washer")[0]) device_family.append(nilm.select_appliances_by_type("light")[0]) device_family.append(nilm.select_appliances_by_type("washer dryer")[0]) device_family.append( nilm.select_appliances_by_type("electric space heater")[0]) #top_devs = nilm.select_top_consuming_appliances_for_training(6, 5) print device_family return MeterGroup(device_family), device_family
def read_mains(self, start, end, sample_period=6, building=1) -> Tuple[DataFrame, MeterGroup]: """ Loads the data of the specified appliances. Args: start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". sample_period (int): The sample period of the records. building (int): The building to read the records from. Returns: Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read. """ self.dataset.set_window(start=start, end=end) mains_meter = self.dataset.buildings[building].elec.mains() if isinstance(mains_meter, MeterGroup): mains_metergroup = mains_meter else: mains_metergroup = MeterGroup(meters=[mains_meter]) start_time = time.time() if TIMING else None df = mains_metergroup.dataframe_of_meters(sample_period=sample_period) timing('NILMTK converting mains to dataframe: {}'.format(round(time.time() - start_time, 2))) df.fillna(0, inplace=True) return df, mains_metergroup
submeters = elec.meters_directly_downstream_of_mains() # Select appliances used in top K plot APPLIANCES = [ 'fridge freezer', 'HTPC', 'dish washer', 'washer dryer', 'kettle' ] selected_meters = [submeters[appliance] for appliance in APPLIANCES] remainder = [] for meter in submeters.meters: for appliance in APPLIANCES: if meter.matches_appliances({'type': appliance}): break else: remainder.append(meter) remainder = MeterGroup(remainder) remainder.name = 'Other submeters' selected_meters = MeterGroup(selected_meters[:2] + [remainder] + selected_meters[2:]) selected_meters['HTPC'].name = 'Home theatre PC' # Reverse the colour palette so it matches top_5_energy colors = sns.color_palette('deep') colors.reverse() colors = [colors[i] for i in [4, 2, 5, 1, 3, 0]] sns.set_palette(colors) # Set window DATE = "2014-12-07" next_day = pd.Timestamp(DATE) + timedelta(days=1) dataset.set_window(DATE, next_day)
def repair_overall_powerflow(self, timeframe, verbose=True): ''' Returns a repaired overallpowerflow where missing smart meters are calculated out. This function uses the GoodSections and calculates how many of all smart meters are active. It then uses this data to calculate the average powerflow per section. Like this deactivated smart meters are averaged out. Of course a basic dataquality is required and all smart meters are expected to have the same average power. ! Benutzt invalid meters as basically valid meters Paramters --------- timeframe = nilmtk.TimeFrame The section in which the powerflow is measured. verbose: bool Whether to print additonal output Returns ------- The repaired power ''' ## Get together all meters site_meters = MeterGroup() for i, dataset in enumerate(self.datasets): for building in dataset.buildings: if not dataset.buildings[building].metadata[ 'original_name'] in self.bad_meters: site_meters = site_meters.union( dataset.buildings[building].elec.sitemeters()) if verbose: print("Dataset {0} all_elecmeters".format(i)) ## Load all data s = pd.Timestamp("1.1.2016", tz="UTC") e = pd.Timestamp("13.3.2017", tz="UTC") section = TimeFrameGroup([TimeFrame(start=s, end=e)]) total_powerflow = pckl.load( open("/media/felix/HDD/6_PowerflowPckls/total_powerflow_new.pckl", "rb")) #total_powerflow = site_meters.power_series_all_data(sample_period=900, sections=section, verbose=True) #pckl.dump(total_powerflow, open("/media/felix/HDD/6_PowerflowPckls/total_powerflow_new.pckl", "wb")) # Count the missing meters for each point in time all_non_null_sections = pd.Series(0, index=pd.DatetimeIndex(start=s, end=e, freq='15min')) for i, meter in enumerate(site_meters.meters): if verbose: print("Meter {0}".format(i)) nonzeros = meter.nonzero_sections() if len(nonzeros._df) > 0: cur = TimeFrameGroup.calculate_upcounts([ meter.nonzero_sections(sections=section) ]).resample('1min').ffill().fillna(0).resample('15min', how='mean') all_non_null_sections = all_non_null_sections.add(cur, fill_value=0) # Correct the total powerflow avg_power = (total_powerflow / all_non_null_sections).dropna() power = avg_power * len(site_meters.meters) power = pd.DataFrame(power) power.columns = pd.MultiIndex.from_tuples( [('power', 'active')], names=['physical_quantity', 'type']) return power
def test_full_results_with_no_sections_raises_runtime_error(self): mg = MeterGroup([ElecMeter(), ElecMeter()]) with self.assertRaises(RuntimeError): mg.dropout_rate(full_results=True)
elec4 = dataset4.buildings[4].elec train_elec.append( elec4.submeters().select_using_appliances(type=['fridge', 'kettle'])) dataset5.set_window(start="2014-06-29", end="2014-09-01") elec5 = dataset5.buildings[5].elec train_elec.append( elec5.submeters().select_using_appliances(type=['washing machine'])) #fridges = nilmtk.global_meter_group.select_using_appliances(type='fridge') train = [] for item in train_elec: for meter in item.meters: train.append(meter) input_meter = MeterGroup(train) ######################################################### test = DataSet('ukdale.h5') #set window of interest. perhaps timestamp is thrwoing an error test.set_window(start="2014-10-16", end="2014-10-30") # to change to 5 house# to change to 5 house #defining the building of interest. to change for more than one buildings building = 1 test_elec = test.buildings[building].elec #selecting top applience. Change to kettle, fridge, washing machine, microwave and dish washer #top_5_train_elec = train_elec.submeters().select_top_k(k=5)
def calculate_correlations(self, meters, extDataSet, n_jobs=-1, tmp_folder=None, verbose=False): ''' Function only setting up the correlations without clustering. Sometimes it is also required to just get the correlations. Parameters ---------- meters: nilmtk.MeterGroup The meters to cluster, from which the demand is loaded. extDataSet: nilmtk.DataSet The External Dataset containing the fitting data. n_jobs: int ! Not used at the moment ! Defines the amount of processes. (-1 = amount of cpu cores) verbose: bool Whether to print additional output Returns ------- correlations: pd.DataFrame DataFrame with a column per external feature and a row for each meter. ''' #try: # return pckl.load(open(tmp_folder + "_" + self.model.params['grouping'],'rb')) #except: # We need global variables if we want to use multiprocessing global current_loadseries global group_data global corrdataframe clusterer_timeseries = [] # Declare amount of processes if n_jobs == -1: n_jobs = multiprocessing.cpu_count() # Prepare result frame dims = self.model.params['externalFeatures'] + self.model.params[ 'shifts'] weekdayFeatures = [('weekday', cur) for cur in self.model.params['weekdayFeatures']] hourFeatures = [('hour', cur) for cur in self.model.params['hourFeatures']] corrs = self.model.correlations = pd.DataFrame( columns=dims + weekdayFeatures + hourFeatures) # Load the external data specified in the params periodsExtData = [ int(dev['sample_period']) for dev in extDataSet.metadata['meter_devices'].values() ] min_sampling_period = min(periodsExtData + [self.model.params['self_corr_freq']]) * 2 # Group the meters by the designated strategy try: if self.model.params['grouping'] == 'single': metergroups = meters.groupby('zip', use_appliance_metadata=False) zips = list(metergroups.keys()) metergroups = metergroups.values() elif self.model.params['grouping'] == 'all': metergroups = [MeterGroup([meters])] zips = [meters.meters[0].building_metadata['zip']] elif self.model.params['grouping'] == 'cluster': metergroups = [meters] zips = [meters.meters[0].meters[0].building_metadata['zip']] except: zips = [meters.meters[0][1].building_metadata['zip']] * len(meters) processed_meters = 0 for i, group in enumerate(metergroups): zip = zips[i] # Load the groupspecific data (weather) group_data = extDataSet.get_data_for_group( zip, self.model.params['section'], 300, self.model.params['externalFeatures']) # min_sampling_period # Then go through all meters for processed_meters, meter in enumerate(group.meters): # meters load (Und wieder auf kontinuierliche Zeitreihe bringen) current_loadseries = meter.power_series_all_data( dtype='float16', sample_period=self.model.params['sample_period'], sections=self.model.params['section'], tmp_folder=tmp_folder, verbose=verbose ) #, load_kwargs={'sample_period':min_sampling_period}) if processed_meters == 0: self.set_up_corrdataframe( dims, self.model.params['weekdayFeatures'], self.model.params['hourFeatures']) # Multiprocessing currently deactivated #if processed_meters != 0: # newSeries = pd.Series(index = current_loadseries.asfreq('4s').index) # resampledload = current_loadseries.combine_first(newSeries) # resampledload = resampledload.interpolate() # current_loadseries = resampledload.resample('2min', how='mean') #pool = multiprocessing.Pool(processes=n_jobs) #corr_vector = pool.map(correlate, dims) corr_vector = [] corr_vector = self.correlate() corrs.loc[meter.identifier, :] = corr_vector if verbose: print('Correlation set up for {0} - {1}/{2}'.format( meter, processed_meters, len(group.meters))) #pckl.dump(corrs, open(tmp_folder + "_" + self.model.params['grouping'],'wb')) return corrs
test.set_window(start=split_point) train_elec = train.buildings[b_id].elec test_elec = test.buildings[b_id].elec test_mains = test_elec.mains() # Fridge elec fridge_elec_train = train_elec[('fridge', fridge_instance)] fridge_elec_test = test_elec[('fridge', fridge_instance)] num_states_dict = {fridge_elec_train: num_states} # Finding top N appliances top_k_train_list = top_k_dict[str(f_id)][:K] print("Top %d list is " %(K), top_k_train_list) top_k_train_elec = MeterGroup([m for m in ds.buildings[b_id].elec.meters if m.instance() in top_k_train_list]) if not os.path.exists(os.path.join(script_path, "..","..", "bash_runs_hart/%s" % (out_file_name))): os.makedirs(os.path.join(script_path, "..","..", "bash_runs_hart/%s" % (out_file_name))) # Add this fridge to training if this fridge is not in top-k if fridge_elec_train not in top_k_train_elec.meters: top_k_train_elec.meters.append(fridge_elec_train) try: for clf_name, clf in cls_dict.iteritems(): print("-"*80) print("Training on %s" %clf_name) disag_filename = '%s/%d.h5' % (clf_name, f_id) ds_filename_total = "../../bash_runs_hart/%s/%s" % (out_file_name, disag_filename) if not os.path.exists(ds_filename_total):
def fhmm(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec appliances = [meter_key] selected_meters = [train_elec[app] for app in appliances] selected_meters.append(train_elec.mains()) selected = MeterGroup(selected_meters) fhmm = FHMM() # print("========== TRAIN ============") fhmm.train(selected, sample_period=sample_period) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') fhmm.disaggregate(val_elec.mains(), output_datastore=output) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') fhmm.disaggregate(test_elec.mains(), output_datastore=output) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'FHMM', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
def all_elecmeters(self): elec_meters = [] for cur in self.elecs(): elec_meters.extend(cur.all_elecmeters()) all = MeterGroup(elec_meters) return all