def run_co(meters): # TRAIN CO logger.info("Training CO...") co = CombinatorialOptimisation() co.train(meters) logger.info("Disag CO...") run_nilmtk_disag(co, 'co')
def __init__(self, in_filepath, out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath, 'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!")
def benchmarks(house_id): redd_train = DataSet(REDD_FILE) redd_test = DataSet(REDD_FILE) # set up training and test sets redd_train.set_window(end=TRAIN_END) redd_test.set_window(start=TRAIN_END) # get top N_DEV devices house = redd_train.buildings[house_id] test_elec = redd_test.buildings[house_id].elec top_apps = house.elec.submeters().select_top_k(k=N_DEV) # store mains data test_mains = next(test_elec.mains().load()) truth = {} predictions = {} # benchmark classifier 1 co = CombinatorialOptimisation() start = time.time() print("*" *20) print('Combinatorial Optimisation: ') print("*" *20) co.train(top_apps, sample_period=SAMPLE_PERIOD) truth['CO'], predictions['CO'] = predict(co, test_elec, SAMPLE_PERIOD, redd_train.metadata['timezone']) end = time.time() print("Runtime: ", end-start) # benchmark classifier 2 fhmm = FHMM() start = time.time() print("*" *20) print('Factorial Hidden Markov Model: ') print("*" *20) fhmm.train(top_apps, sample_period=SAMPLE_PERIOD) truth['FHMM'], predictions['FHMM'] = predict(fhmm, test_elec, SAMPLE_PERIOD, redd_train.metadata['timezone']) end = time.time() print("Runtime: ", end-start) # add mains to truth truth['CO']['Main'] = test_mains truth['FHMM']['Main'] = test_mains return truth, predictions
def test_co_correctness(self): elec = self.dataset.buildings[1].elec co = CombinatorialOptimisation() co.train(elec) mains = elec.mains() pred = co.disaggregate_chunk(mains.load(sample_period=1).next()) gt = {} for meter in elec.submeters().meters: gt[meter] = meter.load(sample_period=1).next().squeeze() gt = pd.DataFrame(gt) pred = pred[gt.columns] self.assertTrue(gt.equals(pred))
def test_co_correctness(self): elec = self.dataset.buildings[1].elec co = CombinatorialOptimisation() co.train(elec) mains = elec.mains() pred = co.disaggregate_chunk(next(mains.load(sample_period=1))) gt = {} for meter in elec.submeters().meters: gt[meter] = next(meter.load(sample_period=1)).squeeze() gt = pd.DataFrame(gt) pred = pred[gt.columns] self.assertTrue(gt.equals(pred))
def test_co_correctness(self): elec = self.dataset.buildings[1].elec co = CombinatorialOptimisation() co.train(elec) mains = elec.mains() output = HDFDataStore("output.h5", "w") co.disaggregate(mains, output, resample_seconds=1) for meter in range(2, 4): df1 = output.store.get("/building1/elec/meter{}".format(meter)) df2 = self.dataset.store.store.get("/building1/elec/meter{}".format(meter)) self.assertEqual((df1 == df2).sum().values[0], len(df1.index)) self.assertEqual(len(df1.index), len(df2.index)) output.close() rm("output.h5")
def co_disag(): # TRAIN disag = CombinatorialOptimisation() disag.train(meters) # TEST appliance_powers = disag.disaggregate_chunk(mains) for i, df in appliance_powers.iteritems(): appliance = disag.model[i]['training_metadata'].dominant_appliance() appliance_type = appliance.identifier.type y_pred = df.values if appliance_type in MAINS_APPLIANCES: np.save(join(BASE_DIRECTORY, 'CO_' + appliance_type), y_pred) appliance_powers = disag.disaggregate_chunk(fridge_mains) for i, df in appliance_powers.iteritems(): appliance = disag.model[i]['training_metadata'].dominant_appliance() appliance_type = appliance.identifier.type y_pred = df.values if appliance_type in ['fridge freezer']: np.save(join(BASE_DIRECTORY, 'CO_' + appliance_type), y_pred) appliance_powers = disag.disaggregate_chunk(kettle_mains) for i, df in appliance_powers.iteritems(): appliance = disag.model[i]['training_metadata'].dominant_appliance() appliance_type = appliance.identifier.type y_pred = df.values if appliance_type in ['kettle']: np.save(join(BASE_DIRECTORY, 'CO_' + appliance_type), y_pred)
def __init__ (self,in_filepath,out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath,'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!")
def co(start_train, end_train, start_test, end_test, train_elec): #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec co = CombinatorialOptimisation() co.train(train_elec, ac_type='active', physical_quantity='power', sample_period=1) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_co_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') co.disaggregate(elec.mains(), output, ac_type='active', physical_quantity='power', sample_period=1) output.close() dates_dict = { "start_train": start_train, "end_train": end_train, "start_test": start_test, "end_test": end_test } # write test and train timeframe into json file with open(disag_filename + ".json", 'w') as dates_file: json.dump(dates_dict, dates_file) #Calulate F1-Score disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.title("CO") plt.show() f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("CO") plt.show()
def test_co_correctness(self): elec = self.dataset.buildings[1].elec co = CombinatorialOptimisation() co.train(elec) mains = elec.mains() output = HDFDataStore('output.h5', 'w') co.disaggregate(mains, output, resample_seconds=1) for meter in range(2, 4): df1 = output.store.get('/building1/elec/meter{}'.format(meter)) df2 = self.dataset.store.store.get( '/building1/elec/meter{}'.format(meter)) self.assertEqual((df1 == df2).sum().values[0], len(df1.index)) self.assertEqual(len(df1.index), len(df2.index)) output.close() remove("output.h5")
print(3) total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() tf_train tf_test # CO disaggregation test start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() # Note that we have given the sample period to downsample the data to 15 minute #co.train(top_8_train_elec, sample_period=period_s) #co.train(train_elec.submeters(), sample_period=period_s) meta_eco = Metadata("ECO").centroids co.train_centroid(train_elec.submeters(), centroids=meta_eco) end = time.time() print("Runtime =", end - start, "seconds.") for i, model in enumerate(co.model): print(model['training_metadata'].instance(), train_elec[model['training_metadata'].instance()].label(), model['states']) start = time.time() disag_filename = '/media/airawan/DATA/Data/eco-b2-kall-co-2d-prio-1m.h5'
plt.clf() f1 = f1_score(da_elec, dataset.buildings[disag_building].elec) f1.index = da_elec.get_labels([int(i) for i in f1.index]) ax = f1.plot(kind='barh') ax.set_ylabel('appliance') ax.set_xlabel('f-score') ax.set_title("B%d Dummy disaggregation accuracy" % (disag_building)) plt.savefig('results/%s__b%d__fscore__dummy.png' % (dataset_name, disag_building)) plt.clf() da_data.store.close() ## CO training and disaggregation ### Training co = CombinatorialOptimisation() print('\n== co.train(dataset.buildings[%d].elec)' % (train_building)) co.train(dataset.buildings[train_building].elec) ### Disaggregation co_outfile = dataset_directory / ('%s-da-co.h5' % (dataset_name.lower())) output = HDFDataStore(str(co_outfile), 'w') print('\n== co.disaggregate(dataset.buildings[%d].mains(), output)' % (disag_building)) co.disaggregate(dataset.buildings[disag_building].elec.mains(), output) output.close() ### Results print('\n== Plotting CO disaggregation results...') da_data = DataSet(str(co_outfile)) da_elec = da_data.buildings[disag_building].elec
def nilmtkDREDfunc(dataset_loc, train_start, train_end, test_start, test_end, output_period): #### configuration #### period_s = output_period building = 1 #### load #### total = DataSet(dataset_loc) train = DataSet(dataset_loc) test = DataSet(dataset_loc) train.set_window(start=train_start, end=train_end) test.set_window(start=test_start, end=test_end) print(train_start) print(train_end) print(test_start) print(test_end) #### get timeframe #### tf_total = total.buildings[building].elec.mains().get_timeframe() tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #### eletrical metergroup #### total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #### training process #### start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") #### disaggregation process #### start = time.time() disag_filename = dataset_loc + 'DREDapp.h5' output = HDFDataStore(disag_filename, 'w') co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec #### creating dataframe from both disaggregated and ground truth metergroups disag_co_elec_df = disag_co_elec.dataframe_of_meters() gt_full_df = test_elec.dataframe_of_meters() # drop the NA, it might be needed (initially it is used for Ja) disag_co_elec_df_nona = disag_co_elec_df.dropna() gt_full_df_nona = gt_full_df.dropna() # drop the unwanted timestamp gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index] #### output #### # drop aggregated power from output disag_co_elec_submeter_df = disag_co_elec_df.drop( disag_co_elec_df.columns[[0]], axis=1) # drop the unwanted timestamp on ground truth (take the sampled timestamp) gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index] # drop aggregated power from ground truth gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0]], axis=1) # train data frame, resample based in disaggregation period, drop the main power train_elec_df = train_elec.dataframe_of_meters() train_elec_df_aligned = train_elec_df.resample(str(period_s) + 'S').asfreq()[0:] train_elec_df_aligned_drop = train_elec_df_aligned.drop( train_elec_df_aligned.columns[[0]], axis=1) return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
train.set_window(end="2011-04-30") test.set_window(start="2011-04-30") train_elec = train.buildings[1].elec test_elec = test.buildings[1].elec top_5_train_elec = train_elec.submeters().select_top_k(k=5) np.random.seed(42) params = {} #classifiers = {'CO':CombinatorialOptimisation(), 'FHMM':FHMM()} predictions = {} sample_period = 120 co = CombinatorialOptimisation() fhmm = FHMM() ## Train models co.train(top_5_train_elec, sample_period=sample_period) fhmm.train(top_5_train_elec, sample_period=sample_period) ## Export models co.export_model(filename='co.h5') fhmm.export_model(filename='fhmm.h5') co.import_model(filename='co.h5') fhmm.import_model(filename='fhmm.h5')
''' training_set8 = training_set7.union(electric_stove) ''' Training Set 9: Top 10 Energy Appliances ''' training_set9 = training_set8.union(electric_space_heater) print("Training sets created!") ''' Create 9 instances of disaggregation model and train each with unique training set. ''' print("Training disaggregation algorithms...") co1 = CombinatorialOptimisation() co2 = CombinatorialOptimisation() co3 = CombinatorialOptimisation() co4 = CombinatorialOptimisation() co5 = CombinatorialOptimisation() co6 = CombinatorialOptimisation() co7 = CombinatorialOptimisation() co8 = CombinatorialOptimisation() co9 = CombinatorialOptimisation() co1.train(training_set1) print("set 1 trained") co2.train(training_set2) print("set 2 trained") co3.train(training_set3) print("set 3 trained")
common_index_utc = gt_index_utc.intersection(pred_index_utc) common_index_local = common_index_utc.tz_convert(timezone) gt_overall = gt_overall.loc[common_index_local] pred_overall = pred_overall.loc[common_index_local] appliance_labels = [m for m in gt_overall.columns.values] gt_overall.columns = appliance_labels pred_overall.columns = appliance_labels return gt_overall, pred_overall np.random.seed(42) params = {} co = CombinatorialOptimisation() fhmm = FHMM() #predictions = {} sample_period = 120 print("*"*20) print('CO') print("*" *20) co.train(top_5_train_elec, sample_period=sample_period) gt_1, predictions_co = predict(co, test_elec, 120, train.metadata['timezone']) print("*"*20) print('FHMM') print("*" *20) fhmm.train(top_5_train_elec, sample_period=sample_period) gt_2, predictions_fhmm = predict(fhmm, test_elec, 120, train.metadata['timezone']) rmse = {}
def test_all(path_to_directory): ''' path_to_directory: Contains the h5 files on which the tests are supposed to be run ''' check_directory_exists(path_to_directory) #files=[f for f in listdir(path_to_directory) and '.h5' in f and '.swp' not in f] files = [f for f in listdir(path_to_directory) if isfile(join(path_to_directory, f)) and '.h5' in f and '.swp' not in f] files.sort() print ("Datasets collected and sorted. Processing...") try: for i, file in enumerate(files): current_file=DataSet(join(path_to_directory, file)) print ("Printing metadata for current file...done.") print_dict(current_file.metadata) print (" Loading file # ", i, " : ", file, ". Please wait.") for building_number in range(1, len(current_file.buildings)+1): #Examine metadata for a single house elec=current_file.buildings[building_number].elec print ("The dataset being processed is : ", elec.dataset()) print ("Metadata for current file: ") print_dict(current_file.buildings[building_number].metadata) print ("Appliance label information: ", elec.appliance_label()) #print (elec.appliances) print ("Appliances:- ") for i in elec.appliances: print (i) print ("Examining sub-metered appliances...") print ("Collecting stats on meters...Done.") print (elec._collect_stats_on_all_meters) print ("Timeframe: ", elec.get_timeframe()) print ("Available power AC types: ", elec.available_power_ac_types()) print ("Clearing cache...done.") elec.clear_cache() print ("Testing if there are meters from multiple buildings. Result returned by method: ", elec.contains_meters_from_multiple_buildings()) # TODO: Find a better way to test the correlation function # print ("Testing the correlation function. ", elec.correlation(elec)) print ("List of disabled meters: ", elec.disabled_meters) print ("Trying to determine the dominant appliance: ") try: elec.dominant_appliance() except RuntimeError: print ('''More than one dominant appliance in MeterGroup! (The dominant appliance per meter should be manually specified in the metadata. If it isn't and if there are multiple appliances for a meter then NILMTK assumes all appliances on that meter are dominant. NILMTK can't automatically distinguish between multiple appliances on the same meter (at least, not without using NILM!))''') pass print ("Dropout rate: ", elec.dropout_rate()) try: print ("Calculating energy per meter:") print (elec.energy_per_meter()) print ("Calculating total entropy") print (elec.entropy()) print ("Calculating entropy per meter: ") print (elec.entropy_per_meter()) except ValueError: print ("ValueError: Total size of array must remain unchanged.") pass print ("Calculating fraction per meter.") print (elec.fraction_per_meter()) #print ("Average energy per period: ", elec.average_energy_per_period()) print ("Executing functions...") lis=[] func="" '''for function in dir(elec): try: start=time.time() if ("__" not in function or "dataframe_of_meters" not in function): func=getattr(elec, function) print ("Currently executing ", function, ". Please wait...") print (func()) # print ("cProfile stats - printed") # cProfile.run("func") end=time.time() print ("Time taken for the entire process : ", (end - start)) except AttributeError: print ("Attribute error occured. ") except TypeError: lis.append(function) print ("Warning: TypeError") pass''' print ("Plotting wiring hierarchy of meters....") elec.draw_wiring_graph() ## DISAGGREGATION STARTS HERE appliance_type="unknown" #TODO : appliance_type should cycle through all appliances and check for each of them. For this, use a list. selected_appliance=nilmtk.global_meter_group.select_using_appliances(type=appliance_type) appliance_restricted = MeterGroup(selected_appliance.meters) if ((appliance_restricted.proportion_of_upstream_total_per_meter()) is not None): proportion_per_appliance = appliance_restricted.proportion_of_upstream_total_per_meter() proportion_per_appliance.plot(kind='bar'); plt.title('Appliance energy as proportion of total building energy'); plt.ylabel('Proportion'); plt.xlabel('Appliance (<appliance instance>, <building instance>, <dataset name>)'); selected_appliance.select(building=building_number).total_energy() selected_appliance.select(building=1).plot(); appliance_restricted = MeterGroup(selected_appliance.meters) daily_energy = pd.DataFrame([meter.average_energy_per_period(offset_alias='D') for meter in appliance_restricted.meters]) daily_energy.plot(kind='hist'); plt.title('Histogram of daily energy'); plt.xlabel('energy (kWh)'); plt.ylabel('Occurences'); plt.legend().set_visible(False) current_file.store.window=TimeFrame(start='2012-04-01 00:00:00-05:00', end='2012-04-02 00:00:00-05:00') #elec.plot(); fraction = elec.submeters().fraction_per_meter().dropna() labels = elec.get_appliance_labels(fraction.index) plt.figure(figsize=(8,8)) fraction.plot(kind='pie', labels=labels); elec.select_using_appliances(category='heating') elec.select_using_appliances(category='single-phase induction motor') co = CombinatorialOptimisation() co.train(elec) for model in co.model: print_dict(model) disag_filename = join(data_dir, 'ampds-disag.h5') output = HDFDataStore(disag_filename, 'w') co.disaggregate(elec.mains(), output) output.close() disag = DataSet(disag_filename) disag_elec = disag.buildings[building_number].elec f1 = f1_score(disag_elec, elec) f1.index = disag_elec.get_appliance_labels(f1.index) f1.plot(kind='bar') plt.xlabel('appliance'); plt.ylabel('f-score'); disag_elec.plot() disag.store.close() except AttributeError: print ("AttributeError occured while executing. This means that the value returned by proportion_per_appliance = appliance_restricted.proportion_of_upstream_total_per_meter() is None") pass
def fit_a_model(self, building_idx): co = CombinatorialOptimisation() elec = self.get_elec_meter_data_of_a_building(building_idx) co.train(elec) return co
def nilmtkECOfunc(dataset_loc, train_start, train_end, test_start, test_end, output_period): #### configuration #### period_s = output_period building = 2 #### load #### total = DataSet(dataset_loc) train = DataSet(dataset_loc) test = DataSet(dataset_loc) train.set_window(start=train_start, end=train_end) test.set_window(start=test_start, end=test_end) print(train_start) print(train_end) print(test_start) print(test_end) #### get timeframe #### tf_total = total.buildings[building].elec.mains().get_timeframe() tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #### eletrical metergroup #### total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #### training process #### start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") #### disaggregation process #### start = time.time() disag_filename = '../dataset/ecob-b2-kall-co-1w:11-1m.h5' output = HDFDataStore(disag_filename, 'w') co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec #### fraction energy assigned correctly #### #FTE_co_all = FTE_func(disag_co_elec, test_elec); #### total disaaggregation error #### #Te_co_all = total_disag_err(disag_co_elec, test_elec); #### creating dataframe from both disaggregated and ground truth metergroups disag_co_elec_df = disag_co_elec.dataframe_of_meters() disag_co_elec_df_nona = disag_co_elec_df.dropna() gt_full_df = test_elec.dataframe_of_meters() gt_full_df_nona = gt_full_df.dropna() gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index] #### jaccard #### #Ja_co_all = jaccard_similarity(disag_co_elec_df_nona, gt_df_nona, disag_co_elec.submeters().instance(), test_elec.instance()); #print("FTE all", FTE_co_all); #print("TE all", Te_co_all); #print("Ja all", Ja_co_all); #### output #### # drop aggregated power disag_co_elec_submeter_df = disag_co_elec_df.drop( disag_co_elec_df.columns[[0]], axis=1) # disag_co_elec_submeter_df = disag_co_elec_df # drop the unwanted timestamp gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index] # drop aggregated power gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0, 1, 2]], axis=1) # train train_elec_df = train_elec.dataframe_of_meters() train_elec_df_aligned = train_elec_df.resample(str(period_s) + 'S').asfreq()[0:] train_elec_df_aligned_drop = train_elec_df_aligned.drop( train_elec_df_aligned.columns[[0, 1, 2]], axis=1) return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
#fridge_meter = train_elec['fridge', 1] #fridge_df = fridge_meter.load().next() #fridge_df.head() # select top k submeter top_5_train_elec = train_elec.submeters().select_top_k(k=5) ############################################################################################### # Jaccard (Number of Appliances Identified Correctly ########################################## ############################################################################################### # CO disaggregation test start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() # Note that we have given the sample period to downsample the data to 15 minute #co.train(top_5_train_elec, sample_period=period_s) co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") start = time.time() disag_filename = '/media/airawan/DATA/Data/eco-b2-k5-co-1:1-1m.h5' output = HDFDataStore(disag_filename, 'w') # Note that we have mentioned to disaggregate after converting to a sample period of 900 seconds co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close()
out = {} for b_id, building in building_chunk_items[home_group]: try: if b_id in existing_files_names: print("Skipping", b_id) continue print b_id out[b_id] = {} start = time.time() #cls_dict = {"Hart":Hart85()} cls_dict = { "CO": CombinatorialOptimisation(), "FHMM": FHMM(), "Hart": Hart85() } elec = building.elec mains = elec.mains() train = DataSet(ds_path) test = DataSet(ds_path) split_point = datetime.date(2013, 7, 16) train.set_window(end=split_point) #test.set_window(start=split_point) train_elec = train.buildings[b_id].elec test_elec = test.buildings[b_id].elec test_mains = test_elec.mains()
test.set_window(start=str(min(dates2)) + " 00:00:00", end=str(max(dates2)) + " 23:45:00") building = 2 elif house == 'r3': train.set_window(start="11-01-2012", end="11-30-2012") test.set_window(start=str(min(dates2)) + " 00:00:00", end=str(max(dates2)) + " 23:45:00") building = 3 train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #output with co training co = CombinatorialOptimisation() co.train(train_elec.submeters(), sample_period=feature_length) #retrieving state database state = get_states(co) #build disaggregation disag_filename_co = '/home/neo/NILMTK_experimental/disarg_folder/disarg_co.h5' output = HDFDataStore(disag_filename_co, 'w') co.disaggregate(test_elec.mains(), output, sample_period=feature_length) output.close() #capturing nilmtk result disag_co = DataSet(disag_filename_co) disag_elec = disag_co.buildings[2].elec
def combinatorial_optimisation(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec appliances = [meter_key] selected_meters = [train_elec[app] for app in appliances] selected_meters.append(train_elec.mains()) selected = MeterGroup(selected_meters) co = CombinatorialOptimisation() # print("========== TRAIN ============") co.train(selected, sample_period=sample_period) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') co.disaggregate(val_elec.mains(), output_datastore=output) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') co.disaggregate(test_elec.mains(), output_datastore=output) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'CO', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
class REDD_Data(object): ''' REDD_Data Class is an object designed to abstract the lower level commands of the NILMTK software package, with focus on the use of REDD DataSet. Function is designed to allow rapid experimentation and disaggregation compared to attempting to set package up from scratch. This class requires the following for proper usage: - NILMTK package: https://github.com/nilmtk - REDD Dataset (converted to .h5): redd.csail.mit.edu - Various dependancies (that NILMTK also requires), most can be downloaded through Anaconda: continuum.io/downloads Parameters ----------- in_filepath: Filepath of converted REDD dataset (in .h5 format) out_filepath: filepath to place output disaggregation dataset (in .h5 format) Attributes ----------- km: Key_Map Object initializes the key_map object which will allow for the mapping of a meters appliance name to its specific .H5 key. dataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the converted REDD DataSet. dataSet: NILMTK DataSet Object the DataSet object that is generated from the REDD DataStore (self.dataStore) outDataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the disaggregated dataset. co: NILMTK CombinatorialOptimisation object the disaggregation model object that will be trained and will disaggregate the working dataset train_group: NILMTK MeterGroup object the MeterGroup object that is used to train the disaggregation model (self.co) ''' def __init__ (self,in_filepath,out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath,'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!") def train_disag_model(self,building_inst, use_topk = False, k = 5): ''' Function trains the disaggregation model using a selected MeterGroup. Parameters ----------- building_inst: the instance # of the building that you wish to grab the training group from. use_topk: true if you wish to only grab the top k most energy intensive appliance to train the model, false if you wish to use all appliances. k: the # of appliances you wish to use (if use_topk = True) ''' print("Training CO Disaggregation Model using given metergroup...") if (building_inst <= 6) & (building_inst > 0): #Select appropiate meter group to train with if use_topk == True: self.train_group = self.dataSet.buildings[building_inst].elec.select_top_k(k) else: self.train_group = self.dataSet.buildings[building_inst].elec self.co.train(self.train_group) print("CO Disaggreation Model Sucessfully Trained!") else: print("Error: Please select a building_inst of 1-6.") print("Model unsucessfully trained.") def load_disag_model(self, filepath): ''' Function loads the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Loading CO Disaggreation Model...") self.co.import_model(filepath) print("Model Sucessfully Loaded!") def save_disag_model(self,filepath): ''' Function saves the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Saving CO Disaggregation Model...") self.co.export_model(filepath) print("Model Sucessfully Saved!") def disaggregate(self,building_inst): ''' Function will disaggregate the mains MeterGroup of the passed building instance, and save this to the self.outDataStore object. Parameters ----------- building_inst: instance # of the building mains you wish to disaggregate. ''' print("Disaggregating Building Mains...") self.co.disaggregate(self.dataSet.buildings[building_inst].elec.mains(),self.outDataStore) print("Mains sucessfully disaggregated!") def close(self): ''' Function closes all open DataStore's being used by the program. ''' print("Closing DataStores...") self.dataStore.close() self.outDataStore.close() print("Output DataStores Sucessfully Closed") ''' All Plot Functions below are a WORK IN PROGRESS!----------------------------------- Documentation will be provided upon completion.------------------------------------ ''' def plot_disag_apl(self,inst,appliance,t1="",t2=""): self.km = Key_Map(inst) plot_series(self.outDataStore.store.get(self.km.get_key(appliance))[t1: t2]) plt.title("Disaggregated " + appliance.capitalize()+" Energy") plt.show() def show_plots(self): plt.show() def building_plot_all(self,building_inst,t1,t2): self.dataSet.buildings[building_inst].elec.plot(t1,t2) plt.title("Building "+str(building_inst)+" Energy per Appliance") plt.ylabel('Power [W]') plt.xlabel('Hour') def plot_redd_mains_data(self, inst=1, t1 = "", t2 = ""): self.km = Key_Map(inst) series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2] series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2] plot_series(series1 + series2) plt.title("Building "+str(inst)+" Mains Energy") plt.show()
pred_index_utc = pred_overall.index.tz_convert("UTC") common_index_utc = gt_index_utc.intersection(pred_index_utc) common_index_local = common_index_utc.tz_convert(timezone) gt_overall = gt_overall.loc[common_index_local] pred_overall = pred_overall.loc[common_index_local] appliance_labels = [m for m in gt_overall.columns.values] gt_overall.columns = appliance_labels pred_overall.columns = appliance_labels return gt_overall, pred_overall np.random.seed(42) co = CombinatorialOptimisation() fhmm = FHMM() co.import_model(filename='co.h5') fhmm.import_model(filename='fhmm.h5') cot, pred_co = predict(co, test_elec, 120, train.metadata['timezone']) fhmmt, pred_fhmm = predict(fhmm, test_elec, 120, train.metadata['timezone']) rmse = {} rmse["CO"] = nilmtk.utils.compute_rmse(cot, pred_co, pretty=True) rmse["FHMM"] = nilmtk.utils.compute_rmse(fhmmt, pred_fhmm, pretty=True) for clf_name in classifiers.keys():
class REDD_Data(object): ''' REDD_Data Class is an object designed to abstract the lower level commands of the NILMTK software package, with focus on the use of REDD DataSet. Function is designed to allow rapid experimentation and disaggregation compared to attempting to set package up from scratch. This class requires the following for proper usage: - NILMTK package: https://github.com/nilmtk - REDD Dataset (converted to .h5): redd.csail.mit.edu - Various dependancies (that NILMTK also requires), most can be downloaded through Anaconda: continuum.io/downloads Parameters ----------- in_filepath: Filepath of converted REDD dataset (in .h5 format) out_filepath: filepath to place output disaggregation dataset (in .h5 format) Attributes ----------- km: Key_Map Object initializes the key_map object which will allow for the mapping of a meters appliance name to its specific .H5 key. dataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the converted REDD DataSet. dataSet: NILMTK DataSet Object the DataSet object that is generated from the REDD DataStore (self.dataStore) outDataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the disaggregated dataset. co: NILMTK CombinatorialOptimisation object the disaggregation model object that will be trained and will disaggregate the working dataset train_group: NILMTK MeterGroup object the MeterGroup object that is used to train the disaggregation model (self.co) ''' def __init__(self, in_filepath, out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath, 'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!") def train_disag_model(self, building_inst, use_topk=False, k=5): ''' Function trains the disaggregation model using a selected MeterGroup. Parameters ----------- building_inst: the instance # of the building that you wish to grab the training group from. use_topk: true if you wish to only grab the top k most energy intensive appliance to train the model, false if you wish to use all appliances. k: the # of appliances you wish to use (if use_topk = True) ''' print("Training CO Disaggregation Model using given metergroup...") if (building_inst <= 6) & (building_inst > 0): #Select appropiate meter group to train with if use_topk == True: self.train_group = self.dataSet.buildings[ building_inst].elec.select_top_k(k) else: self.train_group = self.dataSet.buildings[building_inst].elec self.co.train(self.train_group) print("CO Disaggreation Model Sucessfully Trained!") else: print("Error: Please select a building_inst of 1-6.") print("Model unsucessfully trained.") def load_disag_model(self, filepath): ''' Function loads the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Loading CO Disaggreation Model...") self.co.import_model(filepath) print("Model Sucessfully Loaded!") def save_disag_model(self, filepath): ''' Function saves the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Saving CO Disaggregation Model...") self.co.export_model(filepath) print("Model Sucessfully Saved!") def disaggregate(self, building_inst): ''' Function will disaggregate the mains MeterGroup of the passed building instance, and save this to the self.outDataStore object. Parameters ----------- building_inst: instance # of the building mains you wish to disaggregate. ''' print("Disaggregating Building Mains...") self.co.disaggregate( self.dataSet.buildings[building_inst].elec.mains(), self.outDataStore) print("Mains sucessfully disaggregated!") def close(self): ''' Function closes all open DataStore's being used by the program. ''' print("Closing DataStores...") self.dataStore.close() self.outDataStore.close() print("Output DataStores Sucessfully Closed") ''' All Plot Functions below are a WORK IN PROGRESS!----------------------------------- Documentation will be provided upon completion.------------------------------------ ''' def plot_disag_apl(self, inst, appliance, t1="", t2=""): self.km = Key_Map(inst) plot_series( self.outDataStore.store.get(self.km.get_key(appliance))[t1:t2]) plt.title("Disaggregated " + appliance.capitalize() + " Energy") plt.show() def show_plots(self): plt.show() def building_plot_all(self, building_inst, t1, t2): self.dataSet.buildings[building_inst].elec.plot(t1, t2) plt.title("Building " + str(building_inst) + " Energy per Appliance") plt.ylabel('Power [W]') plt.xlabel('Hour') def plot_redd_mains_data(self, inst=1, t1="", t2=""): self.km = Key_Map(inst) series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2] series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2] plot_series(series1 + series2) plt.title("Building " + str(inst) + " Mains Energy") plt.show()
training_set6 = training_set5.union(microwave) training_set7 = training_set6.union(sockets1) training_set8 = training_set7.union(lights3) training_set9 = training_set8.union(electric_oven) training_set10 = training_set9.union(unknown1) training_set11 = training_set10.union(sockets3) training_set12 = training_set11.union(sockets4) training_set13 = training_set12.union(electric_stove) training_set14 = training_set13.union(electric_space_heater) training_set15 = training_set14.union(unknown2) print("Training groups sucessfully created!") co1 = CombinatorialOptimisation() co2 = CombinatorialOptimisation() co3 = CombinatorialOptimisation() co4 = CombinatorialOptimisation() co5 = CombinatorialOptimisation() co6 = CombinatorialOptimisation() co7 = CombinatorialOptimisation() co8 = CombinatorialOptimisation() co9 = CombinatorialOptimisation() co10 = CombinatorialOptimisation() co11 = CombinatorialOptimisation() co12 = CombinatorialOptimisation() co13 = CombinatorialOptimisation() co14 = CombinatorialOptimisation() co15 = CombinatorialOptimisation()
#Intersection of index gt_index_utc = gt_overall.index.tz_convert("UTC") pred_index_utc = pred_overall.index.tz_convert("UTC") common_index_utc = gt_index_utc.intersection(pred_index_utc) common_index_local = common_index_utc.tz_convert(timezone) gt_overall = gt_overall.ix[common_index_local] pred_overall = pred_overall.ix[common_index_local] appliance_labels = [m.label() for m in gt_overall.columns.values] gt_overall.columns = appliance_labels pred_overall.columns = appliance_labels return gt_overall, pred_overall #Run classifiers CO and FHMM classifiers = {'CO': CombinatorialOptimisation(), 'FHMM': FHMM()} predictions = {} sample_period = 6 for clf_name, clf in classifiers.iteritems(): print("*" * 20) print(clf_name) print("*" * 20) clf.train(top_5_train_elec, sample_period=sample_period) gt, predictions[clf_name] = predict(clf, test_elec, 6, train.metadata['timezone']) #Evaluate algorithms by rmse metric def compute_rmse(gt, pred): from sklearn.metrics import mean_squared_error rms_error = {}
# select the larger sampling period between the train and the test set samplePeriod = next(iter( train.metadata['meter_devices'].values()))['sample_period'] testSamples = next(iter( test.metadata['meter_devices'].values()))['sample_period'] if samplePeriod < testSamples: samplePeriod = testSamples # train the appropriate algorithm clf = '' if algorithm == 'fhmm': clf = fhmm_exact.FHMM() elif algorithm == 'combOpt': print('here') clf = CombinatorialOptimisation() start = time.time() clf.train(train_elec, sample_period=samplePeriod) end = time.time() print('Training runtime =', end - start, 'seconds.') # make predicitons pred = {} testChunks = test_elec.mains().load(sample_period=samplePeriod) for i, chunk in enumerate(testChunks): chunk_drop_na = chunk.dropna() pred[i] = clf.disaggregate_chunk(chunk_drop_na) print('---------------------------------') print('Testing done') print('---------------------------------') # If everything can fit in memory
print("DataSet Sucessfully Loaded!") #now we take the data and elminate all sections with no samples print("Conditioning Data... \n") #load the metergroup from building one (house1 in REDD) r_elec = r_dataset.buildings[1].elec print("\nConditioning Finished.") #now we must train the disaggregation model to ensure accuracy print("Training disaggregation model...") #declare a cominatorial optimization disaggregation object disag = CombinatorialOptimisation() #train the model on the building 1 meter 1 dataset (using every appliance) disag.train(r_elec) print("Model Sucessfully Trained!") #the data will now be disaggregated and placed into an output HDF file print("Disaggregating data...") #load the mains of building 1 r1_mains = r_elec.mains() #declare an output HDF datastore output_store = HDFDataStore('C:/NILM/Data_Sets/redd_b1_output.h5','w')
# *Cannot be implemented until database is setup in environment # Verify input appliance exists in building km = Key_Map(redd_building) # verify a real appliance has been entered if km.is_in_map(disag_appliance) == False: sys.exit("An incorrect appliance name has been entered. Please ensure the entered name is exactly correct.") redd_data = DataSet("C:/NILM/Data/REDD/redd.h5") # load mains of the building building_mains = redd_data.buildings[redd_building].elec.mains() #train disaggregation set co = CombinatorialOptimisation() training_set = redd_data.buildings[redd_building].elec co.train(training_set) #set output datastore outputData = HDFDataStore("C:/NILM/Data/Output/output.h5",'w') #disaggregate co.disaggregate(building_mains,outputData) # to add: # 1) get the meter instance # of the appliance selected # 2) export the meter instance series of the output datastore to database using SQL, within t1-t2 parameters* # # *Cannot be implemented until database is setup in environment
# Verify input appliance exists in building km = Key_Map(1) # verify a real appliance has been entered if km.is_in_map(disag_appliance) == False: sys.exit( "An incorrect appliance name has been entered. Please ensure the entered name is exactly correct." ) redd_data = DataSet("/home/mike/workspace/data/redd_data.h5") # load mains of the building building_mains = redd_data.buildings[1].elec.mains() #train disaggregation set co = CombinatorialOptimisation() training_set = redd_data.buildings[1].elec co.train(training_set) #set output datastore outputData = HDFDataStore("/home/mike/workspace/data/redd_output.h5", 'w') #disaggregate co.disaggregate(building_mains, outputData) #set sub-datastore for CSV output output_csv_store = outputData.store.__getitem__(km.get_key(disag_appliance)) #set date parameters output_csv_store = output_csv_store[t1:t2]
print("Runtime =", end-start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec print("finish CO"); """ ############################################################################################### # CO Priority combination ##################################################################### ############################################################################################### print("starting CO_priority") start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() # Note that we have given the sample period to downsample the data to 1 minute co.train(top_8_train_elec, sample_period=60 * 15) #co.train(train_elec.submeters(), sample_period=60*15) end = time.time() print("Runtime =", end - start, "seconds.") start = time.time() disag_filename = 'data/co_out/co_building3_1vs1_all.h5' output = HDFDataStore(disag_filename, 'w') # Note that we have mentioned to disaggregate after converting to a sample period of 60 seconds co.disaggregate(test_elec.mains(), output, sample_period=60 * 15) end = time.time() print("Runtime =", end - start, "seconds.") output.close()