def disaggregate_original_co(h5_input, h5_output,dataset_start_date_disag, dataset_end_date_disag, centroids=None ): import nilmtk.disaggregate as original_nilmtk ds = DataSet(h5_input) elec = ds.buildings[1].elec vampire_power_used_in_original = elec.mains().vampire_power() #Train plain_co = original_nilmtk.CombinatorialOptimisation() plain_co.train(elec) #Modify centroids manually if centroids is not None: for i, model in enumerate(plain_co.model): instance = model['training_metadata'].instance() model['states'] = centroids[instance] #Disaggregate ds.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) elec = ds.buildings[1].elec output_plain_co = HDFDataStore(h5_output, 'w') plain_co.disaggregate(elec.mains(), output_plain_co) output_plain_co.close() return plain_co, vampire_power_used_in_original
def import_dataset(self, source_file, start_end): self.ds = DataSet(source_file) self.ds_train = DataSet(source_file) self.ds_train.set_window(end=start_end) self.ds_test = DataSet(source_file) self.ds_test.set_window(start=start_end)
def code(): # CONVERT THE REDD DATASET TO NILMTK'S HDF5 FORMAT from nilmtk.dataset_converters import convert_redd convert_redd('/data/REDD/low_freq', '/data/REDD/redd.h5') # IMPORT HDF5 FORMAT INTO NILMTK from nilmtk import DataSet redd = DataSet('/data/REDD/redd.h5')
def __init__ (self,in_filepath,out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath,'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!")
def getMeterTargetGenerator(b, train_meterRef): trainYDS = DataSet(dsPathY) print('Stack train: ', train_meterRef.get_timeframe().start.date(), " - ", train_meterRef.get_timeframe().end.date()) # trainYDS.set_window(start=train_meter.get_timeframe().start.date(), end=train_meter.get_timeframe().end.date()) trainY_elec = trainYDS.buildings[b].elec trainY_meter = trainY_elec.submeters()[meter_key] # print(trainY_meter.sample_period()) trainYGen = align_two_meters(train_meterRef, trainY_meter) return trainYGen
def groupmix_rlo_generator(dataset_loc, start_time, end_time, freq, occupancy, co): building = 2 label = [] label_upper = [] data = DataSet(dataset_loc) data.set_window(start=start_time, end=end_time) data_elec = data.buildings[building].elec for i in data_elec.submeters().instance(): label.append(str(data_elec[i].label()).lower()) label_upper.append(str(data_elec[i].label()).upper()) train_elec_df = data_elec.dataframe_of_meters().resample( str(freq) + 'S').max().round(0) train_elec_df = train_elec_df.drop(train_elec_df.columns[[0, 1, 2]], axis=1) train_elec_df.columns = label states = get_states(co) group_mix, room_occ_num_people = groupmix_rlo(states, label_upper, occupancy, train_elec_df) return group_mix, room_occ_num_people
def getStackTrainGenerators(b, train_meterRef): trainXGen_list = [] for path in dsPathsList[b]: train = DataSet(path) train_elec = train.buildings[b].elec train_meter = train_elec.submeters()[meter_key] # print('Stack train: ', train_meter.get_timeframe().start.date(), " - ", train_meter.get_timeframe().end.date()) # Align the 'train_meterRef' with the X file (smaller). it's also a way to read the X meters chunk-by-chunk aligned_meters = align_two_meters(train_meterRef, train_meter) trainXGen_list.append(aligned_meters) return trainXGen_list
def plot_f_score(self, disag_filename): plt.figure() from nilmtk.metrics import f1_score disag = DataSet(disag_filename) disag_elec = disag.buildings[building].elec f1 = f1_score(disag_elec, test_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title(type(self.model).__name__)
def plot_zoomed_original_predicted_energy_consumption(): """ Plots a zoomed time frame of the original prediction. """ test = DataSet('../data/ukdale.h5') test.clear_cache() test.set_window(start="30-6-2013", end="15-7-2013") test_building = 1 sample_period = 6 meter_keys = ['kettle'] test_elec = test.buildings[test_building].elec results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-20-14-24-46' disag_filename = 'disag-out.h5' for key in meter_keys: # get predicted curve for the best epoch result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec predicted = res_elec[key] predicted = predicted.power_series(sample_period=sample_period) predicted = next(predicted) predicted.fillna(0, inplace=True) y1 = np.array(predicted) # power x1 = np.arange(y1.shape[0]) # timestamps # The chosen time frame to zoom in x1 = x1[94000:102500] y1 = y1[94000:102500] ground_truth = test_elec[key] ground_truth = ground_truth.power_series(sample_period=sample_period) ground_truth = next(ground_truth) ground_truth.fillna(0, inplace=True) y2 = np.array(ground_truth) # power x2 = np.arange(y2.shape[0]) # timestamps # The chosen time frame to zoom in x2 = x2[94000:102500] y2 = y2[94000:102500] fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True) ax1.plot(x1, y1, color='r', label='predicted') ax1.plot(x2, y2, color='b', label='ground truth') ax2.plot(x1, y1, color='r') ax3.plot(x2, y2, color='b') ax1.set_title('Appliance: {}'.format(key)) plt.xticks( np.arange(94000, 102500, 2000), ('5-10-2013 12:00', '16:00', '20:00', '6-10-2013 00:00', '04:00')) fig.legend() fig.savefig( os.path.join( results_dir, 'zoomed_original_predicted_vs_ground_truth_{}.png'.format( key)))
def test_load(self): filename = join(data_dir(), 'energy.h5') ds = DataSet(filename) elec = ds.buildings[1].elec df = next(elec.load()) self.assertEqual(len(df), 13) df = next(elec.load(chunksize=5)) self.assertEqual(len(df), 5) df = next(elec.load(physical_quantity='energy')) self.assertEqual(len(df), 13) self.assertEqual(df.columns.levels, [['energy'], ['reactive']]) df = next(elec.load(ac_type='active')) self.assertEqual(df.columns.levels, [['power'], ['active']])
def get_disaggregation(device, total_aggregate): devices = ["fridge", "air conditioner", "washing machine"] if device not in devices: return None test = DataSet('iawe.h5') test_elec = test.buildings[1].elec test_mains = test_elec.mains().all_meters()[0] test_meter = test_elec.submeters()[device] df = next(test_meter.load(ac_type='active', sample_period=2592000)) prediction = df['power'].values[0] print(df.head()) return prediction
def __init__(self, paths, bad_meters=None, timeframe=None, merge_shorter_gaps_then=None, remove_shorter_then=None, verbose=False): """ Creates an DatasetAnalysis object. Parameters ---------- paths: str or [str] Paths to the datasets, which shall be analyzed. bad_meters: [str] (optional) Define some meters which shall be excluded as they are malicious. timeframe: pd.TimeFrame The region for which the analysis shall be performed. Todo: Should be made optional. Take whole timeframe then. merge_shorter_gaps_then: pd.Timedelta Merge sections which are separated by a gap smaller then this timedelta remove_shorter_then: pd.Timedelta Remove sections which are smaller then this timedelta verbose: bool Whether to return additional information. """ if timeframe is None: raise Exception( "TimeFrame has to be set. None timeframe not yet supported.") if not type(paths) is list: paths = [paths] self.datasets = [] for path in paths: if verbose: print("Load Dataset {0}.".format(path)) self.datasets.append(DataSet(path)) self.timeframe = timeframe self.bad_meters = bad_meters self._load_all_stats(timeframe, verbose=verbose, merge_shorter_gaps_then=merge_shorter_gaps_then, remove_shorter_then=remove_shorter_then)
def co(start_train, end_train, start_test, end_test, train_elec): #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec co = CombinatorialOptimisation() co.train(train_elec, ac_type='active', physical_quantity='power', sample_period=1) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_co_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') co.disaggregate(elec.mains(), output, ac_type='active', physical_quantity='power', sample_period=1) output.close() dates_dict = { "start_train": start_train, "end_train": end_train, "start_test": start_test, "end_test": end_test } # write test and train timeframe into json file with open(disag_filename + ".json", 'w') as dates_file: json.dump(dates_dict, dates_file) #Calulate F1-Score disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.title("CO") plt.show() f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("CO") plt.show()
def get_states(h5_files_path): # Read H5 file localhome = DataSet(h5_files_path) # Get mains elec = localhome.buildings[1].elec mains = elec.mains() # Train Hart's model h = Hart85() h.train(mains) pairs = h.pair_df # Get states with duration states = pd.DataFrame(pairs['T2 Time'] - pairs['T1 Time'], columns=['duration']) states['P'] = pairs['T1 Active'] # return centroids (load models) return [h.centroids, states]
def mle(start_train, end_train, start_test, end_test, train_elec): # #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec mle = maximum_likelihood_estimation.MLE() mle.sample_period = "1s" mle.train(train_elec) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_mle_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') mle.disaggregate(elec.mains(), output) output.close() dates_dict = { "start_train": start_train, "end_train": end_train, "start_test": start_test, "end_test": end_test } # write test and train timeframe into json file with open(disag_filename + ".json", 'w') as dates_file: json.dump(dates_dict, dates_file) disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.show() plt.title("FHMM") #Calculate F1-Score f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("FHMM") plt.show()
def load_dataset(window_per_house, test_window, filename, meter_label, train_building, test_building, **load_kwargs): #Load datasets train = DataSet(filename) test = DataSet(filename) #train.set_window(start=start_train, end=end_train) test.set_window(*test_window[test_building]) # if only onw house is used for training # train_y = train.buildings[train_building].elec[meter_label] # train_x = train.buildings[train_building].elec.mains() train_mainlist = [] train_meterlist = [] for building_id, building in train.buildings.items(): if building_id in train_building: train.set_window(*window_per_house[building_id]) y = building.elec[meter_label] x = building.elec.mains() train_mainlist.append(x.power_series_all_data(**load_kwargs)) train_meterlist.append(y.power_series_all_data(**load_kwargs)) # # multiple houses for training # train_meterlist = [train.buildings[i].elec[meter_label] for i in train_building] # train_mainlist = [train.buildings[i].elec.mains() for i in train_building] test_meterlist = test.buildings[test_building].elec[meter_label] test_mainlist = test.buildings[test_building].elec.mains() assert len(train_mainlist) == len( train_meterlist ), "The number of main and apliances meters must be equal" return train_meterlist, train_mainlist, test_meterlist, test_mainlist
def process_nilmtk_h5(data_source: str, users_data, utility_appliance): from nilmtk import DataSet from pathlib import Path import pandas as pd import numpy as np import time import glob data_source = glob.glob(data_source + '*.h5') all_db_houses = {} if not users_data.exists(): print("Loading Datasets") for data_source_path in data_source: if data_source_path in "..\\data\\SynD.h5" or data_source_path in "..\\data\\iawe.h5": print('Skipping {}'.format(data_source_path)) continue dt = DataSet(data_source_path) file_name = Path(data_source_path).name print("\n\n===>Pre-processing {} dataset".format(file_name)) start = time.time() house_data_pair = preprocess_nilmtk_to_df(file_name, dt, utility_appliance) print("=>Time to process {} data: {}s ".format(data_source_path, time.time() - start)) all_db_houses.update(house_data_pair) df = pd.DataFrame(all_db_houses, dtype=np.float) df.to_hdf("./processed_input_data.pkl", key='df', mode='w') else: df = pd.read_hdf("./processed_input_data.pkl", key='df') all_db_houses = df.to_dict('series') return all_db_houses
def main(): #Load data gjw = DataSet("C:/Users/GJWood/nilm_gjw_data/HDF5/nilm_gjw_data.hdf5") print('loaded ' + str(len(gjw.buildings)) + ' buildings') #Examine metadata building_number =1 print_dict(gjw.buildings[building_number].metadata) #metadata for house elec = gjw.buildings[building_number].elec print(elec.appliances) #List & plot coherent blocks of meter readings mains = elec.mains() mains_good_sections = elec.mains().good_sections() Print(elec.mains().good_sections()) mains_good_sections.plot() #Examine the power data print(mains.available_power_ac_types()) mains_energy = mains.total_energy(sections=mains_good_sections) print(mains_energy) whole_house = nilmtk.global_meter_group.select() print(whole_house.select(building=1).total_energy()) whole_house.plot()
zip_ref = zipfile.ZipFile('dataset/ds.zip', 'r') zip_ref.extractall('dataset') zip_ref.close() os.remove("dataset/ds.zip") shutil.rmtree("dataset/disag_estimates", ignore_errors=True) os.makedirs("dataset/trainsets", exist_ok=True) print("Done downloading") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python gen.py ukdale_path") exit() conf_files = os.listdir("appconf") ds = DataSet(sys.argv[1]) for app in conf_files: filename = "appconf/{}".format(app) with open(filename) as data_file: conf = json.load(data_file) if not os.path.exists("dataset"): download_dataset() os.makedirs(conf['save_path'], exist_ok=True) # Create trainset for meter print(conf["nilmtk_key"]) house_keys = conf['train_buildings'] window_size = conf['lookback'] all_x_train = np.empty((train_size * len(house_keys), window_size, 1)) all_y_train = np.empty((train_size * len(house_keys), ))
from flask import Flask, render_template # # 데이터 컨버터 (return : DataSet) # # --- # In[ ]: # 데이터 컨버트 # .dat ==> .h5 convert_redd('C:\\Users\\dlsrk\\Desktop\\nilm\\low_freq', 'C:\\Users\\dlsrk\\Desktop\\nilm\\data\\redd.h5') # .h5 데이터(컨버트된) read # redd = DataSet('C:\\Users\\Kim-Taesu\\Documents\\nilm\\data\\redd.h5') redd = DataSet('C:\\Users\\dlsrk\\Desktop\\nilm\\data\\redd.h5') # date load 함수 def getData(inputPath, convertOutputPath): convert_redd(inputPath, convertOutputPath) return DataSet(convertOutputPath) # # 시각화 데이터 준비 # # --- # ### 변수 # > buildings_count : 전체 빌딩 수 (int)
def getData(inputPath, convertOutputPath): convert_redd(inputPath, convertOutputPath) return DataSet(convertOutputPath)
from __future__ import print_function, division import time from matplotlib import rcParams import matplotlib.pyplot as plt from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore from windowgrudisaggregator import WindowGRUDisaggregator import metrics print("========== OPEN DATASETS ============") meterList = [] mainsList = [] test = DataSet('ukdale.h5') # test = DataSet('redd.h5') # test.set_window(start='2016-04-01',end='2016-05-01') test_building_list = [2, 3, 4, 5] #[2,5] sample_period = 6 meter_key = 'kettle' file = open('baseTrainSetsInfo_' + meter_key, 'r') for line in file: toks = line.split(',') train = DataSet(toks[0]) print(toks[2], '-', toks[3]) train.set_window(start=toks[2], end=toks[3]) train_elec = train.buildings[int(toks[1])].elec meterList.append(train_elec.submeters()[meter_key]) mainsList.append(train_elec.mains()) disaggregator = WindowGRUDisaggregator(window_size=100)
#initialize key map for building 1 kmap = Key_map() #set the disaggregated plot here disag_apl = 'fridge' disag_key = kmap.get_key(disag_apl) #First we must load in the converted REDD Dataset print ("Loading DataSet.....") #declare datastore and load converted HDF that stores REDD data r_datastore = HDFDataStore("C:/NILM/Data_Sets/redd_data.h5") #declare dataset object to work with and load datastore into Dataset r_dataset = DataSet() r_dataset.load(r_datastore) #output sucessfull loading of data to user print("DataSet Sucessfully Loaded!") #now we take the data and elminate all sections with no samples print("Conditioning Data... \n") #load the metergroup from building one (house1 in REDD) r_elec = r_dataset.buildings[1].elec print("\nConditioning Finished.") #now we must train the disaggregation model to ensure accuracy
from __future__ import print_function, division from nilmtk import DataSet, TimeFrame, MeterGroup import plot_config import seaborn as sns from matplotlib.dates import DateFormatter, HourLocator import matplotlib.pyplot as plt import pytz from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 120)}) print("plotting good sections...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') # dataset.set_window("2013-06-01", "2013-06-02") dataset.set_window(None, None) axes = dataset.plot_good_sections(color=plot_config.BLUE) for i, ax in enumerate(axes): plot_config.format_axes(ax, tick_size=2) ax.set_title('House {:d}'.format(i+1), x=0.05, y=.4, va='top') ax.set_ylabel('Meter' if i == 1 else '', rotation=0, ha='center', va='center', y=.4) plt.savefig(join(plot_config.FIG_DIR, '03_good_sections.eps'), bbox_inches='tight')
}, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) } ] net = Net(**net_dict_copy) return net os.chdir('/data/dk3810/figures/e446o/') net = exp_o('e446o') net.compile() net.load_params(50000, '/data/dk3810/figures/e446o/e446o.hdf5') dataset = DataSet('/data/dk3810/ukdale.h5') dataset.set_window("2013-06-01", "2014-07-01") elec = dataset.buildings[1].elec elec.use_alternative_mains() mains = elec.mains().power_series_all_data() washer = elec['washer dryer'].power_series_all_data() N = 131072 estimates = disaggregate(mains.values[:N], net) fig, axes = plt.subplots(3, 1, sharex=True) axes[0].plot(mains[:N].index, estimates) axes[1].plot(mains[:N].index, mains[:N]) axes[2].plot(washer[:N].index, washer[:N])
import plot_config import seaborn as sns from matplotlib.dates import DateFormatter, HourLocator from datetime import timedelta import matplotlib.pyplot as plt import pytz import pandas as pd from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 100)}) UNIT = 'kW' dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') TZ_STRING = dataset.metadata['timezone'] TZ = pytz.timezone(TZ_STRING) elec = dataset.buildings[1].elec submeters = elec.meters_directly_downstream_of_mains() # Select appliances used in top K plot APPLIANCES = [ 'fridge freezer', 'HTPC', 'dish washer', 'washer dryer', 'kettle' ] selected_meters = [submeters[appliance] for appliance in APPLIANCES] remainder = [] for meter in submeters.meters: for appliance in APPLIANCES: if meter.matches_appliances({'type': appliance}): break
from __future__ import print_function, division from nilmtk import DataSet, HDFDataStore from nilmtk.disaggregate import fhmm_exact from nilmtk.metrics import f1_score from os.path import join import matplotlib.pyplot as plt """ This file replicates issue #376 (which should now be fixed) https://github.com/nilmtk/nilmtk/issues/376 """ data_dir = '/data' building_number = 3 disag_filename = join(data_dir, 'disag-fhmm' + str(building_number) + '.h5') data = DataSet(join(data_dir, 'redd.h5')) print("Loading building " + str(building_number)) elec = data.buildings[building_number].elec top_train_elec = elec.submeters().select_top_k(k=5) fhmm = fhmm_exact.FHMM() fhmm.train(top_train_elec) output = HDFDataStore(disag_filename, 'w') fhmm.disaggregate(elec.mains(), output) output.close() ### f1score fhmm disag = DataSet(disag_filename) disag_elec = disag.buildings[building_number].elec
from __future__ import print_function, division from nilmtk import DataSet dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') window_per_house = {1: ("2013-04-12", None), 2: ("2013-05-22", None), 3: (None, None), 4: (None, None), 5: (None, "2014-09-06")} descriptions = [] for building_id, building in dataset.buildings.iteritems(): print("*********** House", building_id, "*************") dataset.set_window(*window_per_house[building_id]) description = building.describe() descriptions.append(description) print(description) print()
print("Disaggregating=========================================================") output = HDFDataStore(h5_disag, 'w') loc.dataset.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) co.disaggregate(loc.elec.mains(), output, location_data=loc, baseline=vampire_power_in_original, resample_seconds=60) output.close() time_start_metrics = time.time() print("\nTotal elapsed: %s seconds ---" % (time_start_metrics - start_time)) print("Section Disaggregation: %s seconds ---\n" % (time_start_metrics - time_start_disag)) #METRICS======================================================================= print("Calculating metrics====================================================") disag = DataSet(h5_disag) disago = DataSet(h5_disag_redd_original) disago.metadata['timezone'] = disag.metadata['timezone'] disago.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) disag_elec = disag.buildings[1].elec disago_elec = disago.buildings[1].elec disag_predictions_original = utils.get_disaggregation_predictions(disago_elec, vampire_power_in_original, start_date = dataset_start_date_disag, end_date = dataset_end_date_disag) disag_predictions_location = utils.get_disaggregation_predictions(disag_elec, vampire_power_in_original, start_date = dataset_start_date_disag, end_date = dataset_end_date_disag)
class REDD_Data(object): ''' REDD_Data Class is an object designed to abstract the lower level commands of the NILMTK software package, with focus on the use of REDD DataSet. Function is designed to allow rapid experimentation and disaggregation compared to attempting to set package up from scratch. This class requires the following for proper usage: - NILMTK package: https://github.com/nilmtk - REDD Dataset (converted to .h5): redd.csail.mit.edu - Various dependancies (that NILMTK also requires), most can be downloaded through Anaconda: continuum.io/downloads Parameters ----------- in_filepath: Filepath of converted REDD dataset (in .h5 format) out_filepath: filepath to place output disaggregation dataset (in .h5 format) Attributes ----------- km: Key_Map Object initializes the key_map object which will allow for the mapping of a meters appliance name to its specific .H5 key. dataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the converted REDD DataSet. dataSet: NILMTK DataSet Object the DataSet object that is generated from the REDD DataStore (self.dataStore) outDataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the disaggregated dataset. co: NILMTK CombinatorialOptimisation object the disaggregation model object that will be trained and will disaggregate the working dataset train_group: NILMTK MeterGroup object the MeterGroup object that is used to train the disaggregation model (self.co) ''' def __init__ (self,in_filepath,out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath,'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!") def train_disag_model(self,building_inst, use_topk = False, k = 5): ''' Function trains the disaggregation model using a selected MeterGroup. Parameters ----------- building_inst: the instance # of the building that you wish to grab the training group from. use_topk: true if you wish to only grab the top k most energy intensive appliance to train the model, false if you wish to use all appliances. k: the # of appliances you wish to use (if use_topk = True) ''' print("Training CO Disaggregation Model using given metergroup...") if (building_inst <= 6) & (building_inst > 0): #Select appropiate meter group to train with if use_topk == True: self.train_group = self.dataSet.buildings[building_inst].elec.select_top_k(k) else: self.train_group = self.dataSet.buildings[building_inst].elec self.co.train(self.train_group) print("CO Disaggreation Model Sucessfully Trained!") else: print("Error: Please select a building_inst of 1-6.") print("Model unsucessfully trained.") def load_disag_model(self, filepath): ''' Function loads the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Loading CO Disaggreation Model...") self.co.import_model(filepath) print("Model Sucessfully Loaded!") def save_disag_model(self,filepath): ''' Function saves the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Saving CO Disaggregation Model...") self.co.export_model(filepath) print("Model Sucessfully Saved!") def disaggregate(self,building_inst): ''' Function will disaggregate the mains MeterGroup of the passed building instance, and save this to the self.outDataStore object. Parameters ----------- building_inst: instance # of the building mains you wish to disaggregate. ''' print("Disaggregating Building Mains...") self.co.disaggregate(self.dataSet.buildings[building_inst].elec.mains(),self.outDataStore) print("Mains sucessfully disaggregated!") def close(self): ''' Function closes all open DataStore's being used by the program. ''' print("Closing DataStores...") self.dataStore.close() self.outDataStore.close() print("Output DataStores Sucessfully Closed") ''' All Plot Functions below are a WORK IN PROGRESS!----------------------------------- Documentation will be provided upon completion.------------------------------------ ''' def plot_disag_apl(self,inst,appliance,t1="",t2=""): self.km = Key_Map(inst) plot_series(self.outDataStore.store.get(self.km.get_key(appliance))[t1: t2]) plt.title("Disaggregated " + appliance.capitalize()+" Energy") plt.show() def show_plots(self): plt.show() def building_plot_all(self,building_inst,t1,t2): self.dataSet.buildings[building_inst].elec.plot(t1,t2) plt.title("Building "+str(building_inst)+" Energy per Appliance") plt.ylabel('Power [W]') plt.xlabel('Hour') def plot_redd_mains_data(self, inst=1, t1 = "", t2 = ""): self.km = Key_Map(inst) series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2] series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2] plot_series(series1 + series2) plt.title("Building "+str(inst)+" Mains Energy") plt.show()
from __future__ import print_function, division from nilmtk import DataSet import plot_config import seaborn as sns import matplotlib.pyplot as plt from os.path import join from pylab import rcParams print("plotting energy bar...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') dataset.set_window("2013-04-01", None) elec = dataset.buildings[1].elec submeters = elec.meters_directly_downstream_of_mains() grouped = submeters.groupby('type') top_k = grouped.select_top_k(group_remainder=False) try: top_k['HTPC'].name = "Home theatre PC" except KeyError: pass ############ # Plot rcParams.update({'figure.figsize': plot_config._mm_to_inches(70, 90)}) ax = top_k.plot(kind='energy bar', mains=elec.mains()) sns.despine(ax=ax, bottom=True, left=True) plt.tight_layout() plt.draw()
from __future__ import print_function, division from nilmtk import DataSet, TimeFrame from nilmtk.elecmeter import ElecMeterID import pandas as pd ukdale = DataSet('/data/mine/vadeec/merged/ukdale.h5') # TZ = 'Europe/London' # ukdale.store.window = TimeFrame(pd.Timestamp("2014-01-01 00:00", tz=TZ), # pd.Timestamp("2014-01-02 00:00", tz=TZ)) ukdale.set_window("2013-04-01", "2013-05-01") elec = ukdale.buildings[1].elec meter = elec[2] # ukdale.plot_good_sections() # best = meter._convert_physical_quantity_and_ac_type_to_cols(ac_type='best') # elec2 = ukdale.buildings[2].elec # elec.use_alternative_mains() # elec2.use_alternative_mains() # submeters2 = elec2.submeters() # gen = submeters2.load() # df = next(gen) # gen = elec.load(verbose=True) # df = gen.next() # corr = elec.correlation_of_sum_of_submeters_with_mains(verbose=True) # prop = elec.proportion_of_energy_submetered()
# to add: # 1) load REDD data from database (SQL interface)* # # *Cannot be implemented until database is setup in environment # Verify input appliance exists in building km = Key_Map(1) # verify a real appliance has been entered if km.is_in_map(disag_appliance) == False: sys.exit( "An incorrect appliance name has been entered. Please ensure the entered name is exactly correct." ) redd_data = DataSet("/home/mike/workspace/data/redd_data.h5") # load mains of the building building_mains = redd_data.buildings[1].elec.mains() #train disaggregation set co = CombinatorialOptimisation() training_set = redd_data.buildings[1].elec co.train(training_set) #set output datastore outputData = HDFDataStore("/home/mike/workspace/data/redd_output.h5", 'w') #disaggregate co.disaggregate(building_mains, outputData)
device_family.append( nilm.select_appliances_by_type("washer dryer")[0] ) device_family.append( nilm.select_appliances_by_type("electric space heater")[0] ) #top_devs = nilm.select_top_consuming_appliances_for_training(6, 5) print device_family return MeterGroup(device_family), device_family def train_group(group): nilm.train_nilm_model(group, sample_period=60) # Example at https://github.com/nilmtk/nilmtk/blob/master/docs/manual/user_guide/disaggregation_and_metrics.ipynb train = DataSet('/home/andrea/Desktop/redd.h5') test = DataSet('/home/andrea/Desktop/redd.h5') train.set_window(end="30-4-2011") test.set_window(start="30-4-2011") train_elect = train.buildings[1].elec test_elec = test.buildings[1].elec best_devices = test_elec.submeters().select_top_k(k=5) test_elec.mains().plot() fhmm = fhmm_exact.FHMM() fhmm.train(best_devices, sample_period=60) # Save disaggregation to external dataset
from __future__ import print_function, division import time from matplotlib import rcParams import matplotlib.pyplot as plt from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore from nilmtk.elecmeter import ElecMeterID import metrics from rnndisaggregator import RNNDisaggregator print("========== OPEN DATASETS ============") train = DataSet('../../Datasets/REDD/redd.h5') train.set_window(end="30-4-2011") test = DataSet('../../Datasets/REDD/redd.h5') test.set_window(start="30-4-2011") train_building = 1 test_building = 1 sample_period = 6 meter_key = 'fridge' train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] train_mains = train_elec.mains().all_meters()[0] test_mains = test_elec.mains().all_meters()[0] rnn = RNNDisaggregator() start = time.time() print("========== TRAIN ============")
from __future__ import print_function, division from nilmtk import DataSet, TimeFrame, MeterGroup import plot_config import seaborn as sns from matplotlib.dates import DateFormatter, HourLocator from matplotlib.ticker import MaxNLocator import matplotlib.pyplot as plt import pytz from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 100)}) print("plotting appliance power histograms...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') dataset.set_window("2013-04-26", None) # ignore tungsten kitchen lamps elec = dataset.buildings[1].elec fig, axes = plt.subplots(3, 3) meter_keys = ['fridge freezer', 'kettle', 'toaster', 'vacuum cleaner', 'television', 'oven', 'laptop computer', 'computer monitor', ('light', 1)] kwargs_per_meter = {'range': [( 2, 275), (2200, 2460), (1480, 1650), ( 400, 2200), ( 80, 140), (None, 60), ( 2, 65), ( 30, 85), (35, 290)]} axes = elec.plot_multiple(axes, meter_keys, 'plot_power_histogram', kwargs_per_meter, plot_kwargs={'color': plot_config.BLUE})
from __future__ import print_function, division from nilmtk import DataSet, TimeFrame, MeterGroup import plot_config import seaborn as sns from matplotlib.ticker import MultipleLocator import matplotlib.pyplot as plt import pytz from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(88, 150)}) print("plotting activity histograms...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') dataset.set_window("2013-03-01", None)#"2013-08-01") elec = dataset.buildings[1].elec N = 9 fig, axes = plt.subplots(N, 1) meter_keys = ['boiler', 'kettle', 'toaster', 'oven', 'vacuum cleaner', 'television', 'laptop computer', 'computer monitor', ('light', 1)] axes = elec.plot_multiple(axes, meter_keys, 'plot_activity_histogram') # Formatting for i, ax in enumerate(axes): ax.grid(False) ax.set_yticks([]) ax.set_ylabel('')
for b_id, building in building_chunk_items[home_group]: try: if b_id in existing_files_names: print("Skipping", b_id) continue print b_id out[b_id] = {} start = time.time() #cls_dict = {"Hart":Hart85()} cls_dict = {"CO": CombinatorialOptimisation(), "FHMM": FHMM(), "Hart": Hart85()} elec = building.elec mains = elec.mains() train = DataSet(ds_path) test = DataSet(ds_path) split_point = datetime.date(2013, 7, 16) train.set_window(end=split_point) #test.set_window(start=split_point) train_elec = train.buildings[b_id].elec test_elec = test.buildings[b_id].elec test_mains = test_elec.mains() # AC elec ac_elec_train = train_elec[('air conditioner', 1)] ac_elec_test = test_elec[('air conditioner', 1)] num_states_dict = {ac_elec_train: num_states}
def random_forest(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period, n_estimators, criterion, min_sample_split): # Start tracking time start = time.time() # Prepare dataset and options dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building val_building = val_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec try: # REDD X_train = next(train_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_train = next( train_elec[meter_key].load(sample_period=sample_period)).fillna(0) X_test = next(test_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_test = next( test_elec[meter_key].load(sample_period=sample_period)).fillna(0) X_val = next(val_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_val = next( val_elec[meter_key].load(sample_period=sample_period)).fillna(0) # Intersect between two dataframe - to make sure same trining instances in X and y # Train set intersect_index = pd.Index( np.sort(list(set(X_train.index).intersection(set(y_train.index))))) X_train = X_train.ix[intersect_index] y_train = y_train.ix[intersect_index] # Test set intersect_index = pd.Index( np.sort(list(set(X_test.index).intersection(set(y_test.index))))) X_test = X_test.ix[intersect_index] y_test = y_test.ix[intersect_index] # Val set intersect_index = pd.Index( np.sort(list(set(X_val.index).intersection(set(y_val.index))))) X_val = X_val.ix[intersect_index] y_val = y_val.ix[intersect_index] # Get values from numpy array X_train = X_train.values y_train = y_train.values X_test = X_test.values y_test = y_test.values X_val = X_val.values y_val = y_val.values except AttributeError: # UKDALE X_train = train_elec.mains().power_series_all_data( sample_period=sample_period).fillna(0) y_train = next(train_elec[meter_key].power_series( sample_period=sample_period)).fillna(0) X_test = test_elec.mains().power_series_all_data( sample_period=sample_period).fillna(0) y_test = next(test_elec[meter_key].power_series( sample_period=sample_period)).fillna(0) # Intersect between two dataframe - to make sure same trining instances in X and y # Train set intersect_index = pd.Index( np.sort(list(set(X_train.index).intersection(set(y_train.index))))) X_train = X_train.ix[intersect_index] y_train = y_train.ix[intersect_index] # Test set intersect_index = pd.Index( np.sort(list(set(X_test.index).intersection(set(y_test.index))))) X_test = X_test.ix[intersect_index] y_test = y_test.ix[intersect_index] # X_train = X_train.reshape(-1, 1) # y_train = y_train.reshape(-1, 1) # X_test = X_test.reshape(-1, 1) # y_test = y_test.reshape(-1, 1) # Get values from numpy array - Avoid server error X_train = X_train.values.reshape(-1, 1) y_train = y_train.values.reshape(-1, 1) X_test = X_test.values.reshape(-1, 1) y_test = y_test.values.reshape(-1, 1) # Model settings and hyperparameters min_samples_split = min_sample_split rf_regr = RandomForestRegressor(n_estimators=n_estimators, criterion=criterion, min_samples_split=min_samples_split, random_state=0) # print("========== TRAIN ============") rf_regr.fit(X_train, y_train) # print("========== DISAGGREGATE ============") y_val_predict = rf_regr.predict(X_val) y_test_predict = rf_regr.predict(X_test) # print("========== RESULTS ============") # me = Metrics(state_boundaries=[10]) on_power_threshold = train_elec[meter_key].on_power_threshold() me = Metrics(state_boundaries=[on_power_threshold]) val_metrics_results_dict = Metrics.compute_metrics(me, y_val_predict, y_val.flatten()) test_metrics_results_dict = Metrics.compute_metrics( me, y_test_predict, y_test.flatten()) # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'Random Forest Regressor', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': min_sample_split, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
from nilmtk import DataSet, TimeFrame, MeterGroup import plot_config import seaborn as sns from matplotlib.dates import DateFormatter, HourLocator from datetime import timedelta import matplotlib.pyplot as plt import pytz import pandas as pd from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 100)}) UNIT = 'kW' dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') TZ_STRING = dataset.metadata['timezone'] TZ = pytz.timezone(TZ_STRING) elec = dataset.buildings[1].elec submeters = elec.meters_directly_downstream_of_mains() # Select appliances used in top K plot APPLIANCES = ['fridge freezer', 'HTPC', 'dish washer', 'washer dryer', 'kettle'] selected_meters = [submeters[appliance] for appliance in APPLIANCES] remainder = [] for meter in submeters.meters: for appliance in APPLIANCES: if meter.matches_appliances({'type': appliance}): break else: remainder.append(meter)
def fcnn(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period, num_epochs, patience, num_layers, optimizer, learning_rate, dropout_prob, loss): # Start tracking time start = time.time() # Prepare dataset and options dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building val_building = val_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec try: # REDD X_train = next(train_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_train = next( train_elec[meter_key].load(sample_period=sample_period)).fillna(0) X_test = next(test_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_test = next( test_elec[meter_key].load(sample_period=sample_period)).fillna(0) X_val = next(val_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_val = next( val_elec[meter_key].load(sample_period=sample_period)).fillna(0) # Intersect between two dataframe - to make sure same trining instances in X and y # Train set intersect_index = pd.Index( np.sort(list(set(X_train.index).intersection(set(y_train.index))))) X_train = X_train.ix[intersect_index] y_train = y_train.ix[intersect_index] # Test set intersect_index = pd.Index( np.sort(list(set(X_test.index).intersection(set(y_test.index))))) X_test = X_test.ix[intersect_index] y_test = y_test.ix[intersect_index] # Val set intersect_index = pd.Index( np.sort(list(set(X_val.index).intersection(set(y_val.index))))) X_val = X_val.ix[intersect_index] y_val = y_val.ix[intersect_index] # Get values from numpy array X_train = X_train.values y_train = y_train.values X_test = X_test.values y_test = y_test.values X_val = X_val.values y_val = y_val.values except AttributeError: # UKDALE X_train = train_elec.mains().power_series_all_data( sample_period=sample_period).fillna(0) y_train = next(train_elec[meter_key].power_series( sample_period=sample_period)).fillna(0) X_test = test_elec.mains().power_series_all_data( sample_period=sample_period).fillna(0) y_test = next(test_elec[meter_key].power_series( sample_period=sample_period)).fillna(0) # Intersect between two dataframe - to make sure same trining instances in X and y # Train set intersect_index = pd.Index( np.sort(list(set(X_train.index).intersection(set(y_train.index))))) X_train = X_train.ix[intersect_index] y_train = y_train.ix[intersect_index] # Test set intersect_index = pd.Index( np.sort(list(set(X_test.index).intersection(set(y_test.index))))) X_test = X_test.ix[intersect_index] y_test = y_test.ix[intersect_index] # X_train = X_train.reshape(-1, 1) # y_train = y_train.reshape(-1, 1) # X_test = X_test.reshape(-1, 1) # y_test = y_test.reshape(-1, 1) # Get values from numpy array - Avoid server error X_train = X_train.values.reshape(-1, 1) y_train = y_train.values.reshape(-1, 1) X_test = X_test.values.reshape(-1, 1) y_test = y_test.values.reshape(-1, 1) # Model settings and hyperparameters layers_array = array_layers(num_layers) fc_model = build_fc_model(layers_array, dropout_prob) # adam = Adam(lr = 1e-5) optimizer = optimizer(lr=learning_rate) fc_model.compile(loss=loss, optimizer=optimizer) # print("========== TRAIN ============") #checkpointer = ModelCheckpoint(filepath="results/fcnn-model-{}-{}epochs.h5".format(meter_key, num_epochs), verbose=0, save_best_only=True) # Early stopping when validation loss increases earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=patience, verbose=0, mode='auto') hist_fc_ = fc_model.fit(X_train, y_train, batch_size=512, verbose=1, nb_epoch=num_epochs, validation_split=0.2, shuffle=True, callbacks=[earlystop]) # , checkpointer]) # Get number of earlystop epochs num_epochs = earlystop.stopped_epoch if earlystop.stopped_epoch != 0 else num_epochs # print("========== DISAGGREGATE ============") val_pred_fc = fc_model.predict(X_val).reshape(-1) test_pred_fc = fc_model.predict(X_test).reshape(-1) # print("========== RESULTS ============") # me = Metrics(state_boundaries=[10]) on_power_threshold = train_elec[meter_key].on_power_threshold() me = Metrics(state_boundaries=[on_power_threshold]) val_metrics_results_dict = Metrics.compute_metrics(me, val_pred_fc, y_val.flatten()) test_metrics_results_dict = Metrics.compute_metrics( me, test_pred_fc, y_test.flatten()) # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'FCNN', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': num_epochs # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': num_layers # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': num_epochs, } # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
def dae(dataset_path, train_building, train_start, train_end, test_building, test_start, test_end, val_building, val_start, val_end, meter_key, sample_period, num_epochs, patience, sequence_length, optimizer, learning_rate, loss): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] try: train_mains = train_elec.mains().all_meters()[0] val_mains = val_elec.mains().all_meters()[0] test_mains = test_elec.mains().all_meters()[0] except AttributeError: train_mains = train_elec.mains() test_mains = test_elec.mains() dae = DAEDisaggregator(sequence_length, patience, optimizer, learning_rate, loss) # print("========== TRAIN ============") dae.train(train_mains, train_meter, epochs=num_epochs, sample_period=sample_period) # Get number of earlystop epochs num_epochs = dae.stopped_epoch if dae.stopped_epoch != 0 else num_epochs #dae.export_model("results/dae-model-{}-{}epochs.h5".format(meter_key, num_epochs)) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') dae.disaggregate(val_mains, output, train_meter, sample_period=sample_period) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') dae.disaggregate(test_mains, output, train_meter, sample_period=sample_period) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'DAE', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': num_epochs # }, # 'hyperparameters': { # 'sequence_length': sequence_length, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': num_epochs, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
class NILM: def __init__(self): pass def convert_dataset(self, folder, destination_file): #convert_greend(folder, destination_file) convert_redd(folder, destination_file) def import_dataset(self, source_file, start_end): self.ds = DataSet(source_file) self.ds_train = DataSet(source_file) self.ds_train.set_window(end=start_end) self.ds_test = DataSet(source_file) self.ds_test.set_window(start=start_end) def show_wiring(self, building_no): self.ds.buildings[building_no].elec.draw_wiring_graph() def show_available_devices(self, building_no): return self.ds.buildings[building_no].elec def show_available_data(self, building_no, device_id): return self.ds.buildings[building_no].elec[device_id].available_columns() #.device["measurements"] def get_aggregated_power(self, building_no): return self.ds.buildings[building_no].elec.mains().power_series_all_data() #.head() def get_device_power(self, building_no, device_id): """ Returns a generator over the power timeserie """ return self.ds.buildings[building_no].elec[device_id].power_series() def get_energy_per_meter(self, building_no): return self.ds_train.buildings[building_no].elec.submeters().energy_per_meter().loc['active'] def get_total_energy_per_device(self, building_no, device_id): return self.ds.buildings[building_no].elec[device_id].total_energy() def plot_aggregated_power(self, building_no): self.ds.buildings[building_no].elec.mains().plot() def plot_meter_power(self, building_no, device_id): self.ds.buildings[building_no].elec[device_id].plot() def plot_all_meters(self, building_no): self.ds.buildings[building_no].elec.plot() def plot_appliance_states(self, building_no, device_id): self.ds.buildings[building_no].elec[device_id].plot_power_histogram() def plot_spectrum(self, building_no, device_id): self.ds.buildings[building_no].elec[device_id].plot_spectrum() def plot_appliance_usage(self, building_no, device_id): self.ds.buildings[building_no].elec[device_id].plot_activity_histogram() def select_appliances_by_id(self, building_no, names): pass def select_top_consuming_appliances_for_training(self, building_no, k=5): return self.ds.buildings[building_no].elec.submeters().select_top_k(k) def select_appliances_by_type(self, t): import nilmtk meters = nilmtk.global_meter_group.select_using_appliances(type=t).all_meters() #print([m.total_energy() for m in meters]) meters = sorted(meters, key=(lambda m: m.total_energy()[0]), reverse=True) # sort by energy consumption #print([m.total_energy() for m in meters]) return meters def create_nilm_model(self, m_type): if m_type is "FHMM": self.model = fhmm_exact.FHMM() elif m_type is "CombOpt": self.model = combinatorial_optimisation.CombinatorialOptimisation() def import_nilm_model(self, filepath, m_type): if m_type is "FHMM": self.model = fhmm_exact.FHMM() self.model.import_model(filepath) elif m_type is "CombOpt": self.model = combinatorial_optimisation.CombinatorialOptimisation() self.model.import_model(filepath) def train_nilm_model(self, top_devices, sample_period=None): if sample_period is None: self.model.train(top_devices) else: self.model.train(top_devices, sample_period) def save_disaggregator(self, filepath): self.model.export_model(filepath) def disaggregate(self, aggregate_timeserie, output_file, sample_period): self.model.disaggregate(aggregate_timeserie, output_file, sample_period) def plot_f_score(self, disag_filename): plt.figure() from nilmtk.metrics import f1_score disag = DataSet(disag_filename) disag_elec = disag.buildings[building].elec f1 = f1_score(disag_elec, test_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance'); plt.xlabel('f-score'); plt.title(type(self.model).__name__);
from __future__ import print_function, division from nilmtk import DataSet, HDFDataStore, TimeFrame from os.path import join import matplotlib.pyplot as plt from matplotlib import rcParams import math from nilmtk.metrics import f1_score # metrics is actually different; need to look at this from nilmtk.metrics import rms_error_power from nilmtk.metrics import mean_normalized_error_power from nilmtk.disaggregate import fhmm_exact # OK, only different in what is printed to screen (and this is diagonal covariance matrix) building_number = 1 ds = DataSet('/nilmtk/data/iawe.h5') #('/nilmtk/data/ukdale.h5') #("/data/REDD/redd.h5") print(ds.buildings) train = DataSet('/nilmtk/data/iawe.h5') #('/nilmtk/data/ukdale.h5') #("/data/REDD/redd.h5") test = DataSet('/nilmtk/data/iawe.h5') #('/nilmtk/data/ukdale.h5') #("/data/REDD/redd.h5") elec = train.buildings[building_number].elec mains = elec.mains() df_all = mains.power_series_all_data() #df_all has a bunch of NaNs df_all_noNan = df_all.dropna() a = df_all_noNan.keys() middleTime = a[int(math.floor(a.size/2))] middleTimeStr = "%d-%02d-%02d %02d:%02d:%02d" % (middleTime.year, middleTime.month, middleTime.day, middleTime.hour, middleTime.minute, middleTime.second) print(middleTimeStr) train.set_window(end=middleTimeStr) test.set_window(start=middleTimeStr)
K = int(sys.argv[3]) train_fraction = int(sys.argv[4]) / 100.0 print("*"*80) print("Arguments") print("Number states", num_states) print("Train fraction is ", train_fraction) print("Top k", K) out_file_name = "N%d_K%d_T%s" % (num_states, K, sys.argv[4]) OUTPUT_PATH = os.path.join(BASH_RUN, out_file_name) existing_files = glob.glob(OUTPUT_PATH+str("/*.h5")) existing_files_names = [int(x.split("/")[-1].split(".")[0]) for x in existing_files] ds = DataSet(ds_path) fridges = nilmtk.global_meter_group.select_using_appliances(type='fridge') fridges_id_building_id = {i: fridges.meters[i].building() for i in range(len(fridges.meters))} fridge_id_building_id_ser = pd.Series(fridges_id_building_id) from fridge_compressor_durations_optimised_jul_7 import compressor_powers fridge_ids_to_consider = compressor_powers.keys() building_ids_to_consider = fridge_id_building_id_ser[fridge_ids_to_consider] #sys.exit(0) def find_specific_appliance(appliance_name, appliance_instance, list_of_elecs):
def nilmtkECOfunc(dataset_loc, train_start, train_end, test_start, test_end, output_period): #### configuration #### period_s = output_period building = 2 #### load #### total = DataSet(dataset_loc) train = DataSet(dataset_loc) test = DataSet(dataset_loc) train.set_window(start=train_start, end=train_end) test.set_window(start=test_start, end=test_end) print(train_start) print(train_end) print(test_start) print(test_end) #### get timeframe #### tf_total = total.buildings[building].elec.mains().get_timeframe() tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #### eletrical metergroup #### total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #### training process #### start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") #### disaggregation process #### start = time.time() disag_filename = '../dataset/ecob-b2-kall-co-1w:11-1m.h5' output = HDFDataStore(disag_filename, 'w') co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec #### fraction energy assigned correctly #### #FTE_co_all = FTE_func(disag_co_elec, test_elec); #### total disaaggregation error #### #Te_co_all = total_disag_err(disag_co_elec, test_elec); #### creating dataframe from both disaggregated and ground truth metergroups disag_co_elec_df = disag_co_elec.dataframe_of_meters() disag_co_elec_df_nona = disag_co_elec_df.dropna() gt_full_df = test_elec.dataframe_of_meters() gt_full_df_nona = gt_full_df.dropna() gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index] #### jaccard #### #Ja_co_all = jaccard_similarity(disag_co_elec_df_nona, gt_df_nona, disag_co_elec.submeters().instance(), test_elec.instance()); #print("FTE all", FTE_co_all); #print("TE all", Te_co_all); #print("Ja all", Ja_co_all); #### output #### # drop aggregated power disag_co_elec_submeter_df = disag_co_elec_df.drop( disag_co_elec_df.columns[[0]], axis=1) # disag_co_elec_submeter_df = disag_co_elec_df # drop the unwanted timestamp gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index] # drop aggregated power gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0, 1, 2]], axis=1) # train train_elec_df = train_elec.dataframe_of_meters() train_elec_df_aligned = train_elec_df.resample(str(period_s) + 'S').asfreq()[0:] train_elec_df_aligned_drop = train_elec_df_aligned.drop( train_elec_df_aligned.columns[[0, 1, 2]], axis=1) return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
def fhmm(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec appliances = [meter_key] selected_meters = [train_elec[app] for app in appliances] selected_meters.append(train_elec.mains()) selected = MeterGroup(selected_meters) fhmm = FHMM() # print("========== TRAIN ============") fhmm.train(selected, sample_period=sample_period) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') fhmm.disaggregate(val_elec.mains(), output_datastore=output) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') fhmm.disaggregate(test_elec.mains(), output_datastore=output) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'FHMM', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
from __future__ import print_function, division from nilmtk import DataSet, TimeFrame, MeterGroup import plot_config import seaborn as sns from matplotlib.dates import DateFormatter, HourLocator import matplotlib.pyplot as plt import pytz from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(88, 60)}) print("plotting histograms...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') #dataset.set_window("2013-04-01", "2013-05-01") dataset.set_window(None, None) axes = dataset.plot_mains_power_histograms(bins=500, range=(5, 500), plot_kwargs={'color': plot_config.BLUE}) for i, ax in enumerate(axes): ax.grid(False) ax.set_yticks([]) ax.set_ylabel("") plot_config.format_axes(ax, tick_size=2) sns.despine(ax=ax, left=True) ax.spines['bottom'].set_linewidth(0.2) ax.set_title('House {}'.format(i+1), y=.5, va='top', x=0.08) if i != 4: ax.set_xlabel('')