def disaggregate_original_co(h5_input, h5_output,dataset_start_date_disag, dataset_end_date_disag, centroids=None ): import nilmtk.disaggregate as original_nilmtk ds = DataSet(h5_input) elec = ds.buildings[1].elec vampire_power_used_in_original = elec.mains().vampire_power() #Train plain_co = original_nilmtk.CombinatorialOptimisation() plain_co.train(elec) #Modify centroids manually if centroids is not None: for i, model in enumerate(plain_co.model): instance = model['training_metadata'].instance() model['states'] = centroids[instance] #Disaggregate ds.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) elec = ds.buildings[1].elec output_plain_co = HDFDataStore(h5_output, 'w') plain_co.disaggregate(elec.mains(), output_plain_co) output_plain_co.close() return plain_co, vampire_power_used_in_original
def hart85(start_train, end_train, start_test, end_test, train_elec): #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec hart = hart_85.Hart85() hart.train(train_elec, sample_period=1) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_hart85_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') hart.disaggregate(elec.mains(), output) output.close() disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.show() plt.title("HART85") #Calculate F1-Score f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("Hart85") plt.show()
def disaggregate_building_to_file(self, building_idx, filename, model=None): if model == None: model = self.fit_a_model(building_idx) elec = self.get_elec_meter_data_of_a_building(building_idx) output = HDFDataStore(filename, 'w') model.disaggregate(elec.mains(), output) output.close()
def co(start_train, end_train, start_test, end_test, train_elec): #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec co = CombinatorialOptimisation() co.train(train_elec, ac_type='active', physical_quantity='power', sample_period=1) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_co_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') co.disaggregate(elec.mains(), output, ac_type='active', physical_quantity='power', sample_period=1) output.close() dates_dict = { "start_train": start_train, "end_train": end_train, "start_test": start_test, "end_test": end_test } # write test and train timeframe into json file with open(disag_filename + ".json", 'w') as dates_file: json.dump(dates_dict, dates_file) #Calulate F1-Score disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.title("CO") plt.show() f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("CO") plt.show()
def mle(start_train, end_train, start_test, end_test, train_elec): # #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec mle = maximum_likelihood_estimation.MLE() mle.sample_period = "1s" mle.train(train_elec) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_mle_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') mle.disaggregate(elec.mains(), output) output.close() dates_dict = { "start_train": start_train, "end_train": end_train, "start_test": start_test, "end_test": end_test } # write test and train timeframe into json file with open(disag_filename + ".json", 'w') as dates_file: json.dump(dates_dict, dates_file) disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.show() plt.title("FHMM") #Calculate F1-Score f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("FHMM") plt.show()
def nilmtkECOfunc(dataset_loc, train_start, train_end, test_start, test_end, output_period): #### configuration #### period_s = output_period building = 2 #### load #### total = DataSet(dataset_loc) train = DataSet(dataset_loc) test = DataSet(dataset_loc) train.set_window(start=train_start, end=train_end) test.set_window(start=test_start, end=test_end) print(train_start) print(train_end) print(test_start) print(test_end) #### get timeframe #### tf_total = total.buildings[building].elec.mains().get_timeframe() tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #### eletrical metergroup #### total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #### training process #### start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") #### disaggregation process #### start = time.time() disag_filename = '../dataset/ecob-b2-kall-co-1w:11-1m.h5' output = HDFDataStore(disag_filename, 'w') co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec #### fraction energy assigned correctly #### #FTE_co_all = FTE_func(disag_co_elec, test_elec); #### total disaaggregation error #### #Te_co_all = total_disag_err(disag_co_elec, test_elec); #### creating dataframe from both disaggregated and ground truth metergroups disag_co_elec_df = disag_co_elec.dataframe_of_meters() disag_co_elec_df_nona = disag_co_elec_df.dropna() gt_full_df = test_elec.dataframe_of_meters() gt_full_df_nona = gt_full_df.dropna() gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index] #### jaccard #### #Ja_co_all = jaccard_similarity(disag_co_elec_df_nona, gt_df_nona, disag_co_elec.submeters().instance(), test_elec.instance()); #print("FTE all", FTE_co_all); #print("TE all", Te_co_all); #print("Ja all", Ja_co_all); #### output #### # drop aggregated power disag_co_elec_submeter_df = disag_co_elec_df.drop( disag_co_elec_df.columns[[0]], axis=1) # disag_co_elec_submeter_df = disag_co_elec_df # drop the unwanted timestamp gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index] # drop aggregated power gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0, 1, 2]], axis=1) # train train_elec_df = train_elec.dataframe_of_meters() train_elec_df_aligned = train_elec_df.resample(str(period_s) + 'S').asfreq()[0:] train_elec_df_aligned_drop = train_elec_df_aligned.drop( train_elec_df_aligned.columns[[0, 1, 2]], axis=1) return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
def dae(dataset_path, train_building, train_start, train_end, test_building, test_start, test_end, val_building, val_start, val_end, meter_key, sample_period, num_epochs, patience, sequence_length, optimizer, learning_rate, loss): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] try: train_mains = train_elec.mains().all_meters()[0] val_mains = val_elec.mains().all_meters()[0] test_mains = test_elec.mains().all_meters()[0] except AttributeError: train_mains = train_elec.mains() test_mains = test_elec.mains() dae = DAEDisaggregator(sequence_length, patience, optimizer, learning_rate, loss) # print("========== TRAIN ============") dae.train(train_mains, train_meter, epochs=num_epochs, sample_period=sample_period) # Get number of earlystop epochs num_epochs = dae.stopped_epoch if dae.stopped_epoch != 0 else num_epochs #dae.export_model("results/dae-model-{}-{}epochs.h5".format(meter_key, num_epochs)) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') dae.disaggregate(val_mains, output, train_meter, sample_period=sample_period) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') dae.disaggregate(test_mains, output, train_meter, sample_period=sample_period) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'DAE', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': num_epochs # }, # 'hyperparameters': { # 'sequence_length': sequence_length, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': num_epochs, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
data_dir = '/data/REDD' building_number = 3 disag_filename = join(data_dir, 'disag-fhmm' + str(building_number) + '.h5') data = DataSet(join(data_dir, 'redd.h5')) print("Loading building " + str(building_number)) elec = data.buildings[building_number].elec top_train_elec = elec.submeters().select_top_k(k=5) fhmm = fhmm_exact.FHMM() fhmm.train(top_train_elec) output = HDFDataStore(disag_filename, 'w') fhmm.disaggregate(elec.mains(), output) output.close() ### f1score fhmm disag = DataSet(disag_filename) disag_elec = disag.buildings[building_number].elec f1 = f1_score(disag_elec, elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance'); plt.xlabel('f-score'); plt.title("FHMM"); plt.savefig(join(data_dir, 'f1-fhmm' + str(building_number) + '.png')) disag.store.close() #### print("Finishing building " + str(building_number))
class REDD_Data(object): ''' REDD_Data Class is an object designed to abstract the lower level commands of the NILMTK software package, with focus on the use of REDD DataSet. Function is designed to allow rapid experimentation and disaggregation compared to attempting to set package up from scratch. This class requires the following for proper usage: - NILMTK package: https://github.com/nilmtk - REDD Dataset (converted to .h5): redd.csail.mit.edu - Various dependancies (that NILMTK also requires), most can be downloaded through Anaconda: continuum.io/downloads Parameters ----------- in_filepath: Filepath of converted REDD dataset (in .h5 format) out_filepath: filepath to place output disaggregation dataset (in .h5 format) Attributes ----------- km: Key_Map Object initializes the key_map object which will allow for the mapping of a meters appliance name to its specific .H5 key. dataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the converted REDD DataSet. dataSet: NILMTK DataSet Object the DataSet object that is generated from the REDD DataStore (self.dataStore) outDataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the disaggregated dataset. co: NILMTK CombinatorialOptimisation object the disaggregation model object that will be trained and will disaggregate the working dataset train_group: NILMTK MeterGroup object the MeterGroup object that is used to train the disaggregation model (self.co) ''' def __init__ (self,in_filepath,out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath,'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!") def train_disag_model(self,building_inst, use_topk = False, k = 5): ''' Function trains the disaggregation model using a selected MeterGroup. Parameters ----------- building_inst: the instance # of the building that you wish to grab the training group from. use_topk: true if you wish to only grab the top k most energy intensive appliance to train the model, false if you wish to use all appliances. k: the # of appliances you wish to use (if use_topk = True) ''' print("Training CO Disaggregation Model using given metergroup...") if (building_inst <= 6) & (building_inst > 0): #Select appropiate meter group to train with if use_topk == True: self.train_group = self.dataSet.buildings[building_inst].elec.select_top_k(k) else: self.train_group = self.dataSet.buildings[building_inst].elec self.co.train(self.train_group) print("CO Disaggreation Model Sucessfully Trained!") else: print("Error: Please select a building_inst of 1-6.") print("Model unsucessfully trained.") def load_disag_model(self, filepath): ''' Function loads the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Loading CO Disaggreation Model...") self.co.import_model(filepath) print("Model Sucessfully Loaded!") def save_disag_model(self,filepath): ''' Function saves the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Saving CO Disaggregation Model...") self.co.export_model(filepath) print("Model Sucessfully Saved!") def disaggregate(self,building_inst): ''' Function will disaggregate the mains MeterGroup of the passed building instance, and save this to the self.outDataStore object. Parameters ----------- building_inst: instance # of the building mains you wish to disaggregate. ''' print("Disaggregating Building Mains...") self.co.disaggregate(self.dataSet.buildings[building_inst].elec.mains(),self.outDataStore) print("Mains sucessfully disaggregated!") def close(self): ''' Function closes all open DataStore's being used by the program. ''' print("Closing DataStores...") self.dataStore.close() self.outDataStore.close() print("Output DataStores Sucessfully Closed") ''' All Plot Functions below are a WORK IN PROGRESS!----------------------------------- Documentation will be provided upon completion.------------------------------------ ''' def plot_disag_apl(self,inst,appliance,t1="",t2=""): self.km = Key_Map(inst) plot_series(self.outDataStore.store.get(self.km.get_key(appliance))[t1: t2]) plt.title("Disaggregated " + appliance.capitalize()+" Energy") plt.show() def show_plots(self): plt.show() def building_plot_all(self,building_inst,t1,t2): self.dataSet.buildings[building_inst].elec.plot(t1,t2) plt.title("Building "+str(building_inst)+" Energy per Appliance") plt.ylabel('Power [W]') plt.xlabel('Hour') def plot_redd_mains_data(self, inst=1, t1 = "", t2 = ""): self.km = Key_Map(inst) series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2] series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2] plot_series(series1 + series2) plt.title("Building "+str(inst)+" Mains Energy") plt.show()
def get_disaggregation(device, total_aggregate): here = os.path.dirname(os.path.abspath(__file__)) dataset_file = os.path.join(here, "dataset/iawe2.h5") devices = ["fridge", "air conditioner", "washing machine"] if device not in devices: return None total_seconds = 30 * 24 * 60 val_per_second = float(total_aggregate) / (total_seconds) print(val_per_second) start = 0 end = 0 with h5py.File(dataset_file, "r+") as f1: table = f1["building1/elec/meter1/table"].value start = int(str(table[0][0])[:10]) end = start + total_seconds print(end - start, total_seconds) # for i in range(total_seconds): # # for j in range(7): # print("Progress {:2.1%}".format(i / total_seconds), end="\r") # table[i][1][2] = val_per_second + np.random.uniform(-1e-17, # 1e-17, 1) # f1["building1/elec/meter1/table"][...] = table # print(table) # start = datetime.fromtimestamp(start) end = datetime.fromtimestamp(end) # start = start.isoformat(' ', 'seconds') end = end.isoformat(' ', 'seconds') # print(start, end) test = DataSet(dataset_file) # test.set_window(start=start, end=end) test.set_window(end=end) test_elec = test.buildings[1].elec test_mains = test_elec.mains()[1] df = next(test_mains.load()) print(df) test_meter = test_elec.submeters()[device] disag_filename = 'disag-out.h5' # The filename of the resulting datastore output = HDFDataStore(disag_filename, 'w') disaggregator = ShortSeq2PointDisaggregator() model_file = os.path.join( here, "disag1/IAWE-RNN-h{}-{}-{}epochs.h5".format(1, device, 10)) disaggregator.import_model(model_file) # anykey = input() # test_mains: The aggregated signal meter # output: The output datastore # train_meter: This is used in order to copy the metadata of the train # meter into the datastore disaggregator.disaggregate(test_mains, output, test_mains, sample_period=1) output.close() result = DataSet(disag_filename) res_elec = result.buildings[1].elec # prediction = res_elec[device] prediction = res_elec # df = next(prediction.load()) # prediction = df["power"]["active"][0] return prediction
print("Model Sucessfully Trained!") #the data will now be disaggregated and placed into an output HDF file print("Disaggregating data...") #load the mains of building 1 r1_mains = r_elec.mains() #declare an output HDF datastore output_store = HDFDataStore('C:/NILM/Data_Sets/redd_b1_output.h5','w') #disaggregate the mains data using the combinatorial optimization algorithm disag.disaggregate(r1_mains, output_store) #close the output store output_store.close() #open new datastore to load data output_load = HDFDataStore('C:/NILM/Data_Sets/redd_b1_output.h5') print ("Disaggregation Complete!") print ("Printing Data....") #format axis names #plot mains data vs disaggregated data based on appliance key r_datastore.store.get(kmap.get_key('mains1'))[t_start : t_end].plot() plt.title("Aggregated Mains Energy") plt.legend().set_visible(False)
def test_all(path_to_directory): ''' path_to_directory: Contains the h5 files on which the tests are supposed to be run ''' check_directory_exists(path_to_directory) #files=[f for f in listdir(path_to_directory) and '.h5' in f and '.swp' not in f] files = [f for f in listdir(path_to_directory) if isfile(join(path_to_directory, f)) and '.h5' in f and '.swp' not in f] files.sort() print ("Datasets collected and sorted. Processing...") try: for i, file in enumerate(files): current_file=DataSet(join(path_to_directory, file)) print ("Printing metadata for current file...done.") print_dict(current_file.metadata) print (" Loading file # ", i, " : ", file, ". Please wait.") for building_number in range(1, len(current_file.buildings)+1): #Examine metadata for a single house elec=current_file.buildings[building_number].elec print ("The dataset being processed is : ", elec.dataset()) print ("Metadata for current file: ") print_dict(current_file.buildings[building_number].metadata) print ("Appliance label information: ", elec.appliance_label()) #print (elec.appliances) print ("Appliances:- ") for i in elec.appliances: print (i) print ("Examining sub-metered appliances...") print ("Collecting stats on meters...Done.") print (elec._collect_stats_on_all_meters) print ("Timeframe: ", elec.get_timeframe()) print ("Available power AC types: ", elec.available_power_ac_types()) print ("Clearing cache...done.") elec.clear_cache() print ("Testing if there are meters from multiple buildings. Result returned by method: ", elec.contains_meters_from_multiple_buildings()) # TODO: Find a better way to test the correlation function # print ("Testing the correlation function. ", elec.correlation(elec)) print ("List of disabled meters: ", elec.disabled_meters) print ("Trying to determine the dominant appliance: ") try: elec.dominant_appliance() except RuntimeError: print ('''More than one dominant appliance in MeterGroup! (The dominant appliance per meter should be manually specified in the metadata. If it isn't and if there are multiple appliances for a meter then NILMTK assumes all appliances on that meter are dominant. NILMTK can't automatically distinguish between multiple appliances on the same meter (at least, not without using NILM!))''') pass print ("Dropout rate: ", elec.dropout_rate()) try: print ("Calculating energy per meter:") print (elec.energy_per_meter()) print ("Calculating total entropy") print (elec.entropy()) print ("Calculating entropy per meter: ") print (elec.entropy_per_meter()) except ValueError: print ("ValueError: Total size of array must remain unchanged.") pass print ("Calculating fraction per meter.") print (elec.fraction_per_meter()) #print ("Average energy per period: ", elec.average_energy_per_period()) print ("Executing functions...") lis=[] func="" '''for function in dir(elec): try: start=time.time() if ("__" not in function or "dataframe_of_meters" not in function): func=getattr(elec, function) print ("Currently executing ", function, ". Please wait...") print (func()) # print ("cProfile stats - printed") # cProfile.run("func") end=time.time() print ("Time taken for the entire process : ", (end - start)) except AttributeError: print ("Attribute error occured. ") except TypeError: lis.append(function) print ("Warning: TypeError") pass''' print ("Plotting wiring hierarchy of meters....") elec.draw_wiring_graph() ## DISAGGREGATION STARTS HERE appliance_type="unknown" #TODO : appliance_type should cycle through all appliances and check for each of them. For this, use a list. selected_appliance=nilmtk.global_meter_group.select_using_appliances(type=appliance_type) appliance_restricted = MeterGroup(selected_appliance.meters) if ((appliance_restricted.proportion_of_upstream_total_per_meter()) is not None): proportion_per_appliance = appliance_restricted.proportion_of_upstream_total_per_meter() proportion_per_appliance.plot(kind='bar'); plt.title('Appliance energy as proportion of total building energy'); plt.ylabel('Proportion'); plt.xlabel('Appliance (<appliance instance>, <building instance>, <dataset name>)'); selected_appliance.select(building=building_number).total_energy() selected_appliance.select(building=1).plot(); appliance_restricted = MeterGroup(selected_appliance.meters) daily_energy = pd.DataFrame([meter.average_energy_per_period(offset_alias='D') for meter in appliance_restricted.meters]) daily_energy.plot(kind='hist'); plt.title('Histogram of daily energy'); plt.xlabel('energy (kWh)'); plt.ylabel('Occurences'); plt.legend().set_visible(False) current_file.store.window=TimeFrame(start='2012-04-01 00:00:00-05:00', end='2012-04-02 00:00:00-05:00') #elec.plot(); fraction = elec.submeters().fraction_per_meter().dropna() labels = elec.get_appliance_labels(fraction.index) plt.figure(figsize=(8,8)) fraction.plot(kind='pie', labels=labels); elec.select_using_appliances(category='heating') elec.select_using_appliances(category='single-phase induction motor') co = CombinatorialOptimisation() co.train(elec) for model in co.model: print_dict(model) disag_filename = join(data_dir, 'ampds-disag.h5') output = HDFDataStore(disag_filename, 'w') co.disaggregate(elec.mains(), output) output.close() disag = DataSet(disag_filename) disag_elec = disag.buildings[building_number].elec f1 = f1_score(disag_elec, elec) f1.index = disag_elec.get_appliance_labels(f1.index) f1.plot(kind='bar') plt.xlabel('appliance'); plt.ylabel('f-score'); disag_elec.plot() disag.store.close() except AttributeError: print ("AttributeError occured while executing. This means that the value returned by proportion_per_appliance = appliance_restricted.proportion_of_upstream_total_per_meter() is None") pass
class REDD_Data(object): ''' REDD_Data Class is an object designed to abstract the lower level commands of the NILMTK software package, with focus on the use of REDD DataSet. Function is designed to allow rapid experimentation and disaggregation compared to attempting to set package up from scratch. This class requires the following for proper usage: - NILMTK package: https://github.com/nilmtk - REDD Dataset (converted to .h5): redd.csail.mit.edu - Various dependancies (that NILMTK also requires), most can be downloaded through Anaconda: continuum.io/downloads Parameters ----------- in_filepath: Filepath of converted REDD dataset (in .h5 format) out_filepath: filepath to place output disaggregation dataset (in .h5 format) Attributes ----------- km: Key_Map Object initializes the key_map object which will allow for the mapping of a meters appliance name to its specific .H5 key. dataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the converted REDD DataSet. dataSet: NILMTK DataSet Object the DataSet object that is generated from the REDD DataStore (self.dataStore) outDataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the disaggregated dataset. co: NILMTK CombinatorialOptimisation object the disaggregation model object that will be trained and will disaggregate the working dataset train_group: NILMTK MeterGroup object the MeterGroup object that is used to train the disaggregation model (self.co) ''' def __init__(self, in_filepath, out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath, 'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!") def train_disag_model(self, building_inst, use_topk=False, k=5): ''' Function trains the disaggregation model using a selected MeterGroup. Parameters ----------- building_inst: the instance # of the building that you wish to grab the training group from. use_topk: true if you wish to only grab the top k most energy intensive appliance to train the model, false if you wish to use all appliances. k: the # of appliances you wish to use (if use_topk = True) ''' print("Training CO Disaggregation Model using given metergroup...") if (building_inst <= 6) & (building_inst > 0): #Select appropiate meter group to train with if use_topk == True: self.train_group = self.dataSet.buildings[ building_inst].elec.select_top_k(k) else: self.train_group = self.dataSet.buildings[building_inst].elec self.co.train(self.train_group) print("CO Disaggreation Model Sucessfully Trained!") else: print("Error: Please select a building_inst of 1-6.") print("Model unsucessfully trained.") def load_disag_model(self, filepath): ''' Function loads the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Loading CO Disaggreation Model...") self.co.import_model(filepath) print("Model Sucessfully Loaded!") def save_disag_model(self, filepath): ''' Function saves the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Saving CO Disaggregation Model...") self.co.export_model(filepath) print("Model Sucessfully Saved!") def disaggregate(self, building_inst): ''' Function will disaggregate the mains MeterGroup of the passed building instance, and save this to the self.outDataStore object. Parameters ----------- building_inst: instance # of the building mains you wish to disaggregate. ''' print("Disaggregating Building Mains...") self.co.disaggregate( self.dataSet.buildings[building_inst].elec.mains(), self.outDataStore) print("Mains sucessfully disaggregated!") def close(self): ''' Function closes all open DataStore's being used by the program. ''' print("Closing DataStores...") self.dataStore.close() self.outDataStore.close() print("Output DataStores Sucessfully Closed") ''' All Plot Functions below are a WORK IN PROGRESS!----------------------------------- Documentation will be provided upon completion.------------------------------------ ''' def plot_disag_apl(self, inst, appliance, t1="", t2=""): self.km = Key_Map(inst) plot_series( self.outDataStore.store.get(self.km.get_key(appliance))[t1:t2]) plt.title("Disaggregated " + appliance.capitalize() + " Energy") plt.show() def show_plots(self): plt.show() def building_plot_all(self, building_inst, t1, t2): self.dataSet.buildings[building_inst].elec.plot(t1, t2) plt.title("Building " + str(building_inst) + " Energy per Appliance") plt.ylabel('Power [W]') plt.xlabel('Hour') def plot_redd_mains_data(self, inst=1, t1="", t2=""): self.km = Key_Map(inst) series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2] series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2] plot_series(series1 + series2) plt.title("Building " + str(inst) + " Mains Energy") plt.show()
f.write(str(time1)) f.write(" ") f.write(str(time2)) f.write(" ") f.write(str(time3)) f.write(" ") f.write(str(time4)) f.write(" ") f.write(str(time5)) f.write(" ") f.write(str(time6)) f.write(" ") f.write(str(time7)) f.write(" ") f.write(str(time8)) f.write(" ") f.write(str(time9)) f.close() print("Closing all open files.") #Close dataset dataStore redd_data.store.close() outData1.close() outData2.close() outData3.close() outData4.close() outData5.close() outData6.close() outData7.close() outData8.close() outData9.close()
def plot_zoomed_new_predicted_energy_consumption(): """ Predicts a new short window (of the given test set). """ train = DataSet('../data/ukdale.h5') train.clear_cache() train.set_window(start="13-4-2013", end="31-7-2013") test = DataSet('../data/ukdale.h5') test.clear_cache() test.set_window(start='16-9-2013 17:00:00', end='16-9-2013 18:00:00') train_building = 1 test_building = 1 sample_period = 6 meter_key = 'kettle' learning_rate = 1e-5 best_epoch = 140 train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] test_mains = test_elec.mains() results_dir = '../results/UKDALE-RNN-lr=1e-05-2018-02-16-18-52-34' train_logfile = os.path.join(results_dir, 'training.log') val_logfile = os.path.join(results_dir, 'validation.log') rnn = RNNDisaggregator(train_logfile, val_logfile, learning_rate, init=False) model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(best_epoch) rnn.import_model(os.path.join(results_dir, model)) disag_filename = 'disag-out-{}epochs.h5'.format(best_epoch) output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w') results_file = os.path.join(results_dir, 'results-{}epochs.txt'.format(best_epoch)) rnn.disaggregate(test_mains, output, results_file, train_meter, sample_period=sample_period) os.remove(results_file) output.close() # get predicted curve for the best epoch result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec os.remove(os.path.join(results_dir, disag_filename)) predicted = res_elec[meter_key] predicted = predicted.power_series(sample_period=sample_period) predicted = next(predicted) predicted.fillna(0, inplace=True) y1 = np.array(predicted) # power x1 = np.arange(y1.shape[0]) # timestamps ground_truth = test_elec[meter_key] ground_truth = ground_truth.power_series(sample_period=sample_period) ground_truth = next(ground_truth) ground_truth.fillna(0, inplace=True) y2 = np.array(ground_truth) # power x2 = np.arange(y2.shape[0]) # timestamps fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True) ax1.plot(x1, y1, color='r', label='predicted') ax1.plot(x2, y2, color='b', label='ground truth') ax2.plot(x1, y1, color='r') ax3.plot(x2, y2, color='b') ax1.set_title('Appliance: {}'.format(meter_key)) fig.legend() fig.savefig( os.path.join(results_dir, 'zoomed_new_predicted_vs_ground_truth.png'))
def generate_vertices(): """ Predicts the power demand of the target appliance using the intermediate models which are exported during training. Generates a polygon from those predictions. """ train = DataSet('../data/ukdale.h5') train.clear_cache() train.set_window(start="13-4-2013", end="31-7-2013") test = DataSet('../data/ukdale.h5') test.clear_cache() test.set_window(start='7-2-2014 08:00:00', end='7-3-2014') train_building = 1 test_building = 5 sample_period = 6 meter_key = 'kettle' learning_rate = 1e-5 train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] test_mains = test_elec.mains() results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-03-11-48-12' train_logfile = os.path.join(results_dir, 'training.log') val_logfile = os.path.join(results_dir, 'validation.log') rnn = RNNDisaggregator(train_logfile, val_logfile, learning_rate, init=False) verts = [] zs = [] # epochs for z in np.arange(10, 341, 10): # disaggregate model model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(z) rnn.import_model(os.path.join(results_dir, model)) disag_filename = 'disag-out-{}epochs.h5'.format(z) output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w') results_file = os.path.join(results_dir, 'results-{}epochs.txt'.format(z)) rnn.disaggregate(test_mains, output, results_file, train_meter, sample_period=sample_period) os.remove(results_file) output.close() # get predicted curve for epoch=z result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec os.remove(os.path.join(results_dir, disag_filename)) predicted = res_elec[meter_key] predicted = predicted.power_series(sample_period=sample_period) predicted = next(predicted) predicted.fillna(0, inplace=True) ys = np.array(predicted) # power xs = np.arange(ys.shape[0]) # timestamps verts.append(list(zip(xs, ys))) # add list of x-y-coordinates zs.append(z) ground_truth = test_elec[meter_key] ground_truth = ground_truth.power_series(sample_period=sample_period) ground_truth = next(ground_truth) ground_truth.fillna(0, inplace=True) ys = np.array(ground_truth) # power xs = np.arange(ys.shape[0]) # timestamps verts.append(list(zip(xs, ys))) # add list of x-y-coordinates zs.append(350) zs = np.asarray(zs) for i in range(len(verts)): verts[i].insert(0, [0, np.array([0])]) verts[i].append([len(verts[i]), np.array([0])]) pickle.dump(verts, open(os.path.join(results_dir, 'vertices.pkl'), 'wb')) pickle.dump(zs, open(os.path.join(results_dir, 'zs.pkl'), 'wb')) pickle.dump(ys, open(os.path.join(results_dir, 'ys.pkl'), 'wb'))
def plot_prediction_over_epochs_ploty(): """ Predicts the power demand of the target appliance using the intermediate models which are exported during training. Plots the prediction curves using plotly. """ train = DataSet('../data/ukdale.h5') train.clear_cache() train.set_window(start="13-4-2013", end="31-7-2013") test = DataSet('../data/ukdale.h5') test.clear_cache() test.set_window(start="23-7-2014 10:00:00", end="23-7-2014 11:00:00") train_building = 1 test_building = 5 sample_period = 6 meter_key = 'kettle' learning_rate = 1e-5 train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] test_mains = test_elec.mains() results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-03-11-48-12' train_logfile = os.path.join(results_dir, 'training.log') val_logfile = os.path.join(results_dir, 'validation.log') rnn = RNNDisaggregator(train_logfile, val_logfile, learning_rate, init=False) data = [] for i in range(10, 401, 10): # disaggregate model model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(i) rnn.import_model(os.path.join(results_dir, model)) disag_filename = 'disag-out-{}epochs.h5'.format(i) output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w') results_file = os.path.join(results_dir, 'results-{}epochs.txt'.format(i)) rnn.disaggregate(test_mains, output, results_file, train_meter, sample_period=sample_period) os.remove(results_file) output.close() # plot predicted curve for epoch=i result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec os.remove(os.path.join(results_dir, disag_filename)) predicted = res_elec[meter_key] predicted = predicted.power_series(sample_period=sample_period) predicted = next(predicted) predicted.fillna(0, inplace=True) power = predicted.tolist() length = len(power) timestamps = list(range(length)) x = [] y = [] z = [] ci = int(255 / 420 * i) # ci = "color index" for j in range(length): x.append([timestamps[j], timestamps[j]]) # timestamps y.append([i, i + 5]) # epochs z.append([power[j], power[j]]) # power data.append( dict( z=z, x=x, y=y, colorscale=[[i, 'rgb(%d,%d,255)' % (ci, ci)] for i in np.arange(0, 1.1, 0.1)], showscale=False, type='surface', )) # plot ground truth curve as the last curve ground_truth = test_elec[meter_key] ground_truth = ground_truth.power_series(sample_period=sample_period) ground_truth = next(ground_truth) ground_truth.fillna(0, inplace=True) power = ground_truth.tolist() length = len(power) timestamps = list(range(length)) i = 410 x = [] y = [] z = [] ci = int(255 / 410 * i) # ci = "color index" for j in range(length): x.append([timestamps[j], timestamps[j]]) # timestamps y.append([i, i + 5]) # epochs z.append([power[j], power[j]]) # power data.append( dict( z=z, x=x, y=y, colorscale=[[i, 'rgb(%d,%d,255)' % (ci, ci)] for i in np.arange(0, 1.1, 0.1)], showscale=False, type='surface', )) layout = dict(title='prediction over epochs', showlegend=False, scene=dict(xaxis=dict(title='timestamps'), yaxis=dict(title='epochs'), zaxis=dict(title='power'), camera=dict(eye=dict(x=-1.7, y=-1.7, z=0.5)))) fig = dict(data=data, layout=layout) plotly.offline.plot(fig, filename='filled-3d-lines')
for line in file: toks = line.split(',') StackTrain = DataSet(toks[0]) print(toks[2], '-', toks[3]) StackTrain.set_window(start=toks[2], end=toks[3]) test_elec = StackTrain.buildings[int(toks[1])].elec test_mains = test_elec.mains() print("========== DISAGGREGATE (stackTrain)============") disag_filename = "StackTrain-h" + toks[1] + ".h5" output = HDFDataStore(disag_filename, 'w') disaggregator.disaggregate(test_mains, output, test_elec[meter_key], sample_period=sample_period) output.close() for i in test_building_list: test_elec = test.buildings[i].elec test_mains = test_elec.mains() print("========== DISAGGREGATE ============") disag_filename = "StackTest-" + str(i) + ".h5" output = HDFDataStore(disag_filename, 'w') disaggregator.disaggregate(test_mains, output, test_elec[meter_key], sample_period=sample_period) output.close() print("========== RESULTS ============")
def fhmm(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec appliances = [meter_key] selected_meters = [train_elec[app] for app in appliances] selected_meters.append(train_elec.mains()) selected = MeterGroup(selected_meters) fhmm = FHMM() # print("========== TRAIN ============") fhmm.train(selected, sample_period=sample_period) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') fhmm.disaggregate(val_elec.mains(), output_datastore=output) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') fhmm.disaggregate(test_elec.mains(), output_datastore=output) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'FHMM', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
def nilmtkDREDfunc(dataset_loc, train_start, train_end, test_start, test_end, output_period): #### configuration #### period_s = output_period building = 1 #### load #### total = DataSet(dataset_loc) train = DataSet(dataset_loc) test = DataSet(dataset_loc) train.set_window(start=train_start, end=train_end) test.set_window(start=test_start, end=test_end) print(train_start) print(train_end) print(test_start) print(test_end) #### get timeframe #### tf_total = total.buildings[building].elec.mains().get_timeframe() tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #### eletrical metergroup #### total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #### training process #### start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") #### disaggregation process #### start = time.time() disag_filename = dataset_loc + 'DREDapp.h5' output = HDFDataStore(disag_filename, 'w') co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec #### creating dataframe from both disaggregated and ground truth metergroups disag_co_elec_df = disag_co_elec.dataframe_of_meters() gt_full_df = test_elec.dataframe_of_meters() # drop the NA, it might be needed (initially it is used for Ja) disag_co_elec_df_nona = disag_co_elec_df.dropna() gt_full_df_nona = gt_full_df.dropna() # drop the unwanted timestamp gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index] #### output #### # drop aggregated power from output disag_co_elec_submeter_df = disag_co_elec_df.drop( disag_co_elec_df.columns[[0]], axis=1) # drop the unwanted timestamp on ground truth (take the sampled timestamp) gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index] # drop aggregated power from ground truth gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0]], axis=1) # train data frame, resample based in disaggregation period, drop the main power train_elec_df = train_elec.dataframe_of_meters() train_elec_df_aligned = train_elec_df.resample(str(period_s) + 'S').asfreq()[0:] train_elec_df_aligned_drop = train_elec_df_aligned.drop( train_elec_df_aligned.columns[[0]], axis=1) return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop