def load_appliance_activation(dataset: DataSet, start_date: datetime, end_date: datetime, building: int, selection: [object], activation_series_parameters: [[int]], sample_period: int = 60): dataset.set_window(start=start_date.strftime("%Y-%m-%d %H:%M:%S"), end=end_date.strftime("%Y-%m-%d %H:%M:%S")) elec_meter = dataset.buildings[building].elec result = [] for i, s in enumerate(selection): min_off_duration = activation_series_parameters[i]["min_off"] min_on_duration = activation_series_parameters[i]["min_on"] on_power_threshold = activation_series_parameters[i][ "on_power_threshold"] value = elec_meter[s].activation_series( min_off_duration=min_off_duration, min_on_duration=min_on_duration, on_power_threshold=on_power_threshold, sample_period=sample_period, border=0) result.append(value) return result
def disaggregate_original_co(h5_input, h5_output,dataset_start_date_disag, dataset_end_date_disag, centroids=None ): import nilmtk.disaggregate as original_nilmtk ds = DataSet(h5_input) elec = ds.buildings[1].elec vampire_power_used_in_original = elec.mains().vampire_power() #Train plain_co = original_nilmtk.CombinatorialOptimisation() plain_co.train(elec) #Modify centroids manually if centroids is not None: for i, model in enumerate(plain_co.model): instance = model['training_metadata'].instance() model['states'] = centroids[instance] #Disaggregate ds.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) elec = ds.buildings[1].elec output_plain_co = HDFDataStore(h5_output, 'w') plain_co.disaggregate(elec.mains(), output_plain_co) output_plain_co.close() return plain_co, vampire_power_used_in_original
def load_formatted_appliances(dataset: DataSet, start_date: datetime, end_date: datetime, building: int, selection: [object], activation_series_parameters: [[int]], sample_period: int = 60): dataset.set_window(start=start_date.strftime("%Y-%m-%d %H:%M:%S"), end=end_date.strftime("%Y-%m-%d %H:%M:%S")) elec_meter = dataset.buildings[building].elec idx = pd.date_range(start_date, end_date - timedelta(seconds=sample_period), freq=str(sample_period) + 'S') result = [] total = None for i, s in enumerate(selection): min_off_duration = activation_series_parameters[i][0] min_on_duration = activation_series_parameters[i][1] on_power_threshold = activation_series_parameters[i][2] value = elec_meter[s].activation_series( min_off_duration=min_off_duration, min_on_duration=min_on_duration, on_power_threshold=on_power_threshold, sample_period=sample_period, border=0) data = pd.concat(value) data = data.reindex(idx, fill_value=0) result.append(data) if total is None: total = data else: total = total.add(data) return result, total
def plot_zoomed_original_predicted_energy_consumption(): """ Plots a zoomed time frame of the original prediction. """ test = DataSet('../data/ukdale.h5') test.clear_cache() test.set_window(start="30-6-2013", end="15-7-2013") test_building = 1 sample_period = 6 meter_keys = ['kettle'] test_elec = test.buildings[test_building].elec results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-20-14-24-46' disag_filename = 'disag-out.h5' for key in meter_keys: # get predicted curve for the best epoch result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec predicted = res_elec[key] predicted = predicted.power_series(sample_period=sample_period) predicted = next(predicted) predicted.fillna(0, inplace=True) y1 = np.array(predicted) # power x1 = np.arange(y1.shape[0]) # timestamps # The chosen time frame to zoom in x1 = x1[94000:102500] y1 = y1[94000:102500] ground_truth = test_elec[key] ground_truth = ground_truth.power_series(sample_period=sample_period) ground_truth = next(ground_truth) ground_truth.fillna(0, inplace=True) y2 = np.array(ground_truth) # power x2 = np.arange(y2.shape[0]) # timestamps # The chosen time frame to zoom in x2 = x2[94000:102500] y2 = y2[94000:102500] fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True) ax1.plot(x1, y1, color='r', label='predicted') ax1.plot(x2, y2, color='b', label='ground truth') ax2.plot(x1, y1, color='r') ax3.plot(x2, y2, color='b') ax1.set_title('Appliance: {}'.format(key)) plt.xticks( np.arange(94000, 102500, 2000), ('5-10-2013 12:00', '16:00', '20:00', '6-10-2013 00:00', '04:00')) fig.legend() fig.savefig( os.path.join( results_dir, 'zoomed_original_predicted_vs_ground_truth_{}.png'.format( key)))
def benchmarks(house_id): redd_train = DataSet(REDD_FILE) redd_test = DataSet(REDD_FILE) # set up training and test sets redd_train.set_window(end=TRAIN_END) redd_test.set_window(start=TRAIN_END) # get top N_DEV devices house = redd_train.buildings[house_id] test_elec = redd_test.buildings[house_id].elec top_apps = house.elec.submeters().select_top_k(k=N_DEV) # store mains data test_mains = next(test_elec.mains().load()) truth = {} predictions = {} # benchmark classifier 1 co = CombinatorialOptimisation() start = time.time() print("*" *20) print('Combinatorial Optimisation: ') print("*" *20) co.train(top_apps, sample_period=SAMPLE_PERIOD) truth['CO'], predictions['CO'] = predict(co, test_elec, SAMPLE_PERIOD, redd_train.metadata['timezone']) end = time.time() print("Runtime: ", end-start) # benchmark classifier 2 fhmm = FHMM() start = time.time() print("*" *20) print('Factorial Hidden Markov Model: ') print("*" *20) fhmm.train(top_apps, sample_period=SAMPLE_PERIOD) truth['FHMM'], predictions['FHMM'] = predict(fhmm, test_elec, SAMPLE_PERIOD, redd_train.metadata['timezone']) end = time.time() print("Runtime: ", end-start) # add mains to truth truth['CO']['Main'] = test_mains truth['FHMM']['Main'] = test_mains return truth, predictions
def state_generator(dataset_loc, start_time, end_time, freq, co, appliance_pwr): building = 1 label = [] label_upper = [] data = DataSet(dataset_loc) data.set_window(start=start_time, end=end_time) data_elec = data.buildings[building].elec for i in data_elec.submeters().instance(): label.append(str(data_elec[i].label()).lower()) label_upper.append(str(data_elec[i].label()).upper()) states = get_states(co) app_state = get_state(states, label_upper, appliance_pwr) return app_state
class REDDloader: def __init__(self, window_selection: {}, appliance_selection: {}, order_appliances: [], sample_rate: int, improved: bool): self.dataset = DataSet("../data/redd.5h") self.window_selection = window_selection self.appliance_selection = appliance_selection self.order_appliances = order_appliances self.sample_rate = sample_rate self.improved = improved def load_house(self, house: int): print("loading house: " + str(house)) selection = self.appliance_selection[house] window_start, window_end = self.window_selection[house] self.dataset.set_window(start=window_start, end=window_end) elec = self.dataset.buildings[house].elec train_appliances = dr.load_appliances_selection( elec, self.order_appliances, selection, self.sample_rate) train_total = dr.load_total_power_consumption(elec, selection, self.sample_rate) signals = Signals(self.sample_rate, self.order_appliances, breakpoint_classification, self.improved, "temperature_redd.csv") signals.set_signals(train_appliances, train_total) return signals def concat_houses(self, houses_list, include_fake_breakpoint=False): _x1, _y1, _x2, _y2 = None, None, None, None for i in range(0, len(houses_list)): house = houses_list[i] signals = self.load_house(house) x1_part, y1_part, x2_part, y2_part = signals.get_prepared_data() if include_fake_breakpoint: x2_fake, y2_fake = dp.create_fake_breakpoints(signals) x2_part = np.concatenate((x2_part, x2_fake)) y2_part = np.concatenate((y2_part, y2_fake)) _x1 = x1_part if _x1 is None else np.concatenate((_x1, x1_part)) _y1 = y1_part if _y1 is None else np.concatenate((_y1, y1_part)) _x2 = x2_part if _x2 is None else np.concatenate((_x2, x2_part)) _y2 = y2_part if _y2 is None else np.concatenate((_y2, y2_part)) return _x1, _y1, _x2, _y2
def occ_state_generator(dataset_loc, start_time, end_time, freq, co): building = 1 label = [] label_upper = [] data = DataSet(dataset_loc) data.set_window(start=start_time, end=end_time) data_elec = data.buildings[building].elec for i in data_elec.submeters().instance(): label.append(str(data_elec[i].label()).lower()) label_upper.append(str(data_elec[i].label()).upper()) train_elec_df = data_elec.dataframe_of_meters().resample( str(freq) + 'S').max().round(0) train_elec_df = train_elec_df.drop(train_elec_df.columns[[0]], axis=1) train_elec_df.columns = label states = get_states(co) occ, state = occ_state(states, label_upper, train_elec_df) return occ, state
def feature_generator(dataset_loc, start_time, end_time, building, freq, occ_data): label = [] label_upper = [] data = DataSet(dataset_loc) data.set_window(start=start_time, end=end_time) data_elec = data.buildings[building].elec for i in data_elec.submeters().instance(): label.append(str(data_elec[i].label()).lower()) label_upper.append(str(data_elec[i].label()).upper()) test_elec_df = data_elec.dataframe_of_meters().resample(str(freq) + 'S').max().round(0) test_elec_df = test_elec_df.drop(test_elec_df.columns[[0]], axis=1) test_elec_df.columns = label test_elec_df.to_csv('feature_elec.csv') states = pd.DataFrame.from_csv('states.csv') result = room_feature(states, building, label_upper, occ_data) return result
def groupmix_rlo_generator(dataset_loc, start_time, end_time, freq, occupancy, co): building = 2 label = [] label_upper = [] data = DataSet(dataset_loc) data.set_window(start=start_time, end=end_time) data_elec = data.buildings[building].elec for i in data_elec.submeters().instance(): label.append(str(data_elec[i].label()).lower()) label_upper.append(str(data_elec[i].label()).upper()) train_elec_df = data_elec.dataframe_of_meters().resample( str(freq) + 'S').max().round(0) train_elec_df = train_elec_df.drop(train_elec_df.columns[[0, 1, 2]], axis=1) train_elec_df.columns = label states = get_states(co) group_mix, room_occ_num_people = groupmix_rlo(states, label_upper, occupancy, train_elec_df) return group_mix, room_occ_num_people
def load_dataset(window_per_house, test_window, filename, meter_label, train_building, test_building, **load_kwargs): #Load datasets train = DataSet(filename) test = DataSet(filename) #train.set_window(start=start_train, end=end_train) test.set_window(*test_window[test_building]) # if only onw house is used for training # train_y = train.buildings[train_building].elec[meter_label] # train_x = train.buildings[train_building].elec.mains() train_mainlist = [] train_meterlist = [] for building_id, building in train.buildings.items(): if building_id in train_building: train.set_window(*window_per_house[building_id]) y = building.elec[meter_label] x = building.elec.mains() train_mainlist.append(x.power_series_all_data(**load_kwargs)) train_meterlist.append(y.power_series_all_data(**load_kwargs)) # # multiple houses for training # train_meterlist = [train.buildings[i].elec[meter_label] for i in train_building] # train_mainlist = [train.buildings[i].elec.mains() for i in train_building] test_meterlist = test.buildings[test_building].elec[meter_label] test_mainlist = test.buildings[test_building].elec.mains() assert len(train_mainlist) == len( train_meterlist ), "The number of main and apliances meters must be equal" return train_meterlist, train_mainlist, test_meterlist, test_mainlist
from __future__ import print_function, division import time from matplotlib import rcParams import matplotlib.pyplot as plt from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore from daedisaggregator import DAEDisaggregator import metrics print("========== OPEN DATASETS ============") train = DataSet(r'c:\dev\nilmtk\data\ukdale\ukdale.h5') test = DataSet(r'c:\dev\nilmtk\data\ukdale\ukdale.h5') train.set_window(start="13-4-2013", end="1-1-2014") test.set_window(start="1-1-2014", end="30-3-2014") train_building = 1 test_building = 1 sample_period = 6 meter_key = 'microwave' train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] test_meter = test_elec.submeters()[meter_key] train_mains = train_elec.mains() test_mains = test_elec.mains() dae = DAEDisaggregator(300) start = time.time()
return pd.Series(f1_scores) import numpy.random numpy.random.seed(42) n=10 print("....") train = DataSet('C:/Users/20552/Desktop/HMM/HMM/NMLTK/iawe.h5') test = DataSet('C:/Users/20552/Desktop/HMM/HMM/NMLTK/iawe.h5') test_use = DataSet('C:/Users/20552/Desktop/HMM/HMM/NMLTK/iawe.h5') building = 1 train.set_window(end="2013-07-13") test.set_window(start="2013-07-13") test_use.set_window(start="2013-07-13") train_elec = train.buildings[1].elec test_elec = test.buildings[1].elec #test_use_elec = test_use.buildings[1].elec print(test_elec) #top_5_train_elec = train_elec.submeters().select_top_k(k=10) top_5_train_elec = train_elec.submeters().select_top_k(k=5) classifiers = {'CO':CombinatorialOptimisation(), 'FHMM':FHMM()} classifiers = {'FHMM':FHMM()} predictions = {} sample_period = 5 for clf_name, clf in classifiers.items(): print("*"*20)
remainder = MeterGroup(remainder) remainder.name = 'Other submeters' selected_meters = MeterGroup(selected_meters[:2] + [remainder] + selected_meters[2:]) selected_meters['HTPC'].name = 'Home theatre PC' # Reverse the colour palette so it matches top_5_energy colors = sns.color_palette('deep') colors.reverse() colors = [colors[i] for i in [4, 2, 5, 1, 3, 0]] sns.set_palette(colors) # Set window DATE = "2014-12-07" next_day = pd.Timestamp(DATE) + timedelta(days=1) dataset.set_window(DATE, next_day) # Plot area # Need to use a linewidth of 0 to prevent nasty things appearing # in output. Looks bad in plt.show() though! ax, df = selected_meters.plot(kind='area', unit=UNIT, width=4000, threshold=5, plot_kwargs={'linewidth': 0}) # Plot mains ax = elec.mains().plot(ax=ax, unit=UNIT, width=10000, plot_kwargs={
from __future__ import print_function, division from nilmtk import DataSet dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') window_per_house = {1: ("2013-04-12", None), 2: ("2013-05-22", None), 3: (None, None), 4: (None, None), 5: (None, "2014-09-06")} descriptions = [] for building_id, building in dataset.buildings.iteritems(): print("*********** House", building_id, "*************") dataset.set_window(*window_per_house[building_id]) description = building.describe() descriptions.append(description) print(description) print()
filename = "appconf/{}".format(app) with open(filename) as data_file: conf = json.load(data_file) if not os.path.exists("dataset"): download_dataset() os.makedirs(conf['save_path'], exist_ok=True) # Create trainset for meter print(conf["nilmtk_key"]) house_keys = conf['train_buildings'] window_size = conf['lookback'] all_x_train = np.empty((train_size * len(house_keys), window_size, 1)) all_y_train = np.empty((train_size * len(house_keys), )) for i, building in enumerate(house_keys): ds.set_window(start=(ukdale_windows[building - 1])[0], end=(ukdale_windows[building - 1])[1]) elec = ds.buildings[building].elec meter = elec.submeters()[conf["nilmtk_key"]] mains = elec.mains() all_x, all_y = create_trainset(meter, mains, train_size, window_size) all_x = all_x all_y = all_y all_x_train[i * train_size:(i + 1) * train_size] = all_x all_y_train[i * train_size:(i + 1) * train_size] = all_y np.save('dataset/trainsets/X-{}'.format(conf['synth_key']), all_x_train) np.save('dataset/trainsets/Y-{}'.format(conf['synth_key']), all_y_train)
from __future__ import print_function, division from nilmtk import DataSet, TimeFrame, MeterGroup import plot_config import seaborn as sns from matplotlib.dates import DateFormatter, HourLocator import matplotlib.pyplot as plt import pytz from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 120)}) print("plotting good sections...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') # dataset.set_window("2013-06-01", "2013-06-02") dataset.set_window(None, None) axes = dataset.plot_good_sections(color=plot_config.BLUE) for i, ax in enumerate(axes): plot_config.format_axes(ax, tick_size=2) ax.set_title('House {:d}'.format(i+1), x=0.05, y=.4, va='top') ax.set_ylabel('Meter' if i == 1 else '', rotation=0, ha='center', va='center', y=.4) plt.savefig(join(plot_config.FIG_DIR, '03_good_sections.eps'), bbox_inches='tight')
def fcnn(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period, num_epochs, patience, num_layers, optimizer, learning_rate, dropout_prob, loss): # Start tracking time start = time.time() # Prepare dataset and options dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building val_building = val_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec try: # REDD X_train = next(train_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_train = next( train_elec[meter_key].load(sample_period=sample_period)).fillna(0) X_test = next(test_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_test = next( test_elec[meter_key].load(sample_period=sample_period)).fillna(0) X_val = next(val_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_val = next( val_elec[meter_key].load(sample_period=sample_period)).fillna(0) # Intersect between two dataframe - to make sure same trining instances in X and y # Train set intersect_index = pd.Index( np.sort(list(set(X_train.index).intersection(set(y_train.index))))) X_train = X_train.ix[intersect_index] y_train = y_train.ix[intersect_index] # Test set intersect_index = pd.Index( np.sort(list(set(X_test.index).intersection(set(y_test.index))))) X_test = X_test.ix[intersect_index] y_test = y_test.ix[intersect_index] # Val set intersect_index = pd.Index( np.sort(list(set(X_val.index).intersection(set(y_val.index))))) X_val = X_val.ix[intersect_index] y_val = y_val.ix[intersect_index] # Get values from numpy array X_train = X_train.values y_train = y_train.values X_test = X_test.values y_test = y_test.values X_val = X_val.values y_val = y_val.values except AttributeError: # UKDALE X_train = train_elec.mains().power_series_all_data( sample_period=sample_period).fillna(0) y_train = next(train_elec[meter_key].power_series( sample_period=sample_period)).fillna(0) X_test = test_elec.mains().power_series_all_data( sample_period=sample_period).fillna(0) y_test = next(test_elec[meter_key].power_series( sample_period=sample_period)).fillna(0) # Intersect between two dataframe - to make sure same trining instances in X and y # Train set intersect_index = pd.Index( np.sort(list(set(X_train.index).intersection(set(y_train.index))))) X_train = X_train.ix[intersect_index] y_train = y_train.ix[intersect_index] # Test set intersect_index = pd.Index( np.sort(list(set(X_test.index).intersection(set(y_test.index))))) X_test = X_test.ix[intersect_index] y_test = y_test.ix[intersect_index] # X_train = X_train.reshape(-1, 1) # y_train = y_train.reshape(-1, 1) # X_test = X_test.reshape(-1, 1) # y_test = y_test.reshape(-1, 1) # Get values from numpy array - Avoid server error X_train = X_train.values.reshape(-1, 1) y_train = y_train.values.reshape(-1, 1) X_test = X_test.values.reshape(-1, 1) y_test = y_test.values.reshape(-1, 1) # Model settings and hyperparameters layers_array = array_layers(num_layers) fc_model = build_fc_model(layers_array, dropout_prob) # adam = Adam(lr = 1e-5) optimizer = optimizer(lr=learning_rate) fc_model.compile(loss=loss, optimizer=optimizer) # print("========== TRAIN ============") #checkpointer = ModelCheckpoint(filepath="results/fcnn-model-{}-{}epochs.h5".format(meter_key, num_epochs), verbose=0, save_best_only=True) # Early stopping when validation loss increases earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=patience, verbose=0, mode='auto') hist_fc_ = fc_model.fit(X_train, y_train, batch_size=512, verbose=1, nb_epoch=num_epochs, validation_split=0.2, shuffle=True, callbacks=[earlystop]) # , checkpointer]) # Get number of earlystop epochs num_epochs = earlystop.stopped_epoch if earlystop.stopped_epoch != 0 else num_epochs # print("========== DISAGGREGATE ============") val_pred_fc = fc_model.predict(X_val).reshape(-1) test_pred_fc = fc_model.predict(X_test).reshape(-1) # print("========== RESULTS ============") # me = Metrics(state_boundaries=[10]) on_power_threshold = train_elec[meter_key].on_power_threshold() me = Metrics(state_boundaries=[on_power_threshold]) val_metrics_results_dict = Metrics.compute_metrics(me, val_pred_fc, y_val.flatten()) test_metrics_results_dict = Metrics.compute_metrics( me, test_pred_fc, y_test.flatten()) # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'FCNN', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': num_epochs # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': num_layers # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': num_epochs, } # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
train = DataSet('/nilmtk/data/iawe.h5') #('/nilmtk/data/ukdale.h5') #("/data/REDD/redd.h5") test = DataSet('/nilmtk/data/iawe.h5') #('/nilmtk/data/ukdale.h5') #("/data/REDD/redd.h5") elec = train.buildings[building_number].elec mains = elec.mains() df_all = mains.power_series_all_data() #df_all has a bunch of NaNs df_all_noNan = df_all.dropna() a = df_all_noNan.keys() middleTime = a[int(math.floor(a.size/2))] middleTimeStr = "%d-%02d-%02d %02d:%02d:%02d" % (middleTime.year, middleTime.month, middleTime.day, middleTime.hour, middleTime.minute, middleTime.second) print(middleTimeStr) train.set_window(end=middleTimeStr) test.set_window(start=middleTimeStr) train_elec = train.buildings[building_number].elec test_elec = test.buildings[building_number].elec top_train_elec = train_elec.submeters().select_top_k(k=5) fhmm = fhmm_exact.FHMM() #mk change this later to default fhmm.train(top_train_elec, sample_period=60, resample=True) outputAddress = "/nilmtk/data/iawe_449_3.h5" output = HDFDataStore(outputAddress, 'w') fhmm.disaggregate(test_elec.mains(), output, sample_period=60, resample=True) output.close()
{ 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) } ] net = Net(**net_dict_copy) return net os.chdir('/data/dk3810/figures/e446o/') net = exp_o('e446o') net.compile() net.load_params(50000, '/data/dk3810/figures/e446o/e446o.hdf5') dataset = DataSet('/data/dk3810/ukdale.h5') dataset.set_window("2013-06-01", "2014-07-01") elec = dataset.buildings[1].elec elec.use_alternative_mains() mains = elec.mains().power_series_all_data() washer = elec['washer dryer'].power_series_all_data() N = 131072 estimates = disaggregate(mains.values[:N], net) fig, axes = plt.subplots(3, 1, sharex=True) axes[0].plot(mains[:N].index, estimates) axes[1].plot(mains[:N].index, mains[:N]) axes[2].plot(washer[:N].index, washer[:N]) """
from __future__ import print_function, division from nilmtk import DataSet, TimeFrame from nilmtk.elecmeter import ElecMeterID import pandas as pd ukdale = DataSet('/data/mine/vadeec/merged/ukdale.h5') # TZ = 'Europe/London' # ukdale.store.window = TimeFrame(pd.Timestamp("2014-01-01 00:00", tz=TZ), # pd.Timestamp("2014-01-02 00:00", tz=TZ)) ukdale.set_window("2013-04-01", "2013-05-01") elec = ukdale.buildings[1].elec meter = elec[2] # ukdale.plot_good_sections() # best = meter._convert_physical_quantity_and_ac_type_to_cols(ac_type='best') # elec2 = ukdale.buildings[2].elec # elec.use_alternative_mains() # elec2.use_alternative_mains() # submeters2 = elec2.submeters() # gen = submeters2.load() # df = next(gen) # gen = elec.load(verbose=True) # df = gen.next() # corr = elec.correlation_of_sum_of_submeters_with_mains(verbose=True) # prop = elec.proportion_of_energy_submetered()
if b_id in existing_files_names: print("Skipping", b_id) continue print b_id out[b_id] = {} start = time.time() #cls_dict = {"Hart":Hart85()} cls_dict = {"CO": CombinatorialOptimisation(), "FHMM": FHMM(), "Hart": Hart85()} elec = building.elec mains = elec.mains() train = DataSet(ds_path) test = DataSet(ds_path) split_point = datetime.date(2013, 7, 16) train.set_window(end=split_point) #test.set_window(start=split_point) train_elec = train.buildings[b_id].elec test_elec = test.buildings[b_id].elec test_mains = test_elec.mains() # AC elec ac_elec_train = train_elec[('air conditioner', 1)] ac_elec_test = test_elec[('air conditioner', 1)] num_states_dict = {ac_elec_train: num_states} # Finding top N appliances top_k_train_list = top_k_dict[str(b_id)][:K] print("Top %d list is " % (K), top_k_train_list)
co.disaggregate(loc.elec.mains(), output, location_data=loc, baseline=vampire_power_in_original, resample_seconds=60) output.close() time_start_metrics = time.time() print("\nTotal elapsed: %s seconds ---" % (time_start_metrics - start_time)) print("Section Disaggregation: %s seconds ---\n" % (time_start_metrics - time_start_disag)) #METRICS======================================================================= print("Calculating metrics====================================================") disag = DataSet(h5_disag) disago = DataSet(h5_disag_redd_original) disago.metadata['timezone'] = disag.metadata['timezone'] disago.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) disag_elec = disag.buildings[1].elec disago_elec = disago.buildings[1].elec disag_predictions_original = utils.get_disaggregation_predictions(disago_elec, vampire_power_in_original, start_date = dataset_start_date_disag, end_date = dataset_end_date_disag) disag_predictions_location = utils.get_disaggregation_predictions(disag_elec, vampire_power_in_original, start_date = dataset_start_date_disag, end_date = dataset_end_date_disag) mt = Metrics(co, gt, loc, disag_elec, disago_elec) mt.calculate()
remainder = MeterGroup(remainder) remainder.name = 'Other submeters' selected_meters = MeterGroup(selected_meters[:2] + [remainder] + selected_meters[2:]) selected_meters['HTPC'].name = 'Home theatre PC' # Reverse the colour palette so it matches top_5_energy colors = sns.color_palette('deep') colors.reverse() colors = [colors[i] for i in [4, 2, 5, 1, 3, 0]] sns.set_palette(colors) # Set window DATE = "2014-12-07" next_day = pd.Timestamp(DATE) + timedelta(days=1) dataset.set_window(DATE, next_day) # Plot area # Need to use a linewidth of 0 to prevent nasty things appearing # in output. Looks bad in plt.show() though! ax, df = selected_meters.plot(kind='area', unit=UNIT, width=4000, threshold=5, plot_kwargs={'linewidth': 0}) # Plot mains ax = elec.mains().plot(ax=ax, unit=UNIT, width=10000, plot_kwargs={'linewidth': 0.3, 'color': 'grey', 'label': 'Mains (active power)'}) # Prettify ax.grid(False) ax.set_ylim([0, 4])
""" import numpy as np import pandas as pd import matplotlib.pyplot as plt from nilmtk.disaggregate.hmm import test_hmm from nilmtk import DataSet, MeterGroup redd_file = r'E:\Dataset\REDD\rd.h5' if __name__ == '__main__': data_file = redd_file all_ds = DataSet(data_file) train_ds = DataSet(data_file) test_ds = DataSet(data_file) building = 1 train_ds.set_window(end='2011-04-30') test_ds.set_window(start='2011-05-08') all_elec = all_ds.buildings[1].elec train_elec = train_ds.buildings[1].elec test_elec = test_ds.buildings[1].elec test_hmm(train_elec, test_elec) print('over') print('over')
class NILM: def __init__(self): pass def convert_dataset(self, folder, destination_file): #convert_greend(folder, destination_file) convert_redd(folder, destination_file) def import_dataset(self, source_file, start_end): self.ds = DataSet(source_file) self.ds_train = DataSet(source_file) self.ds_train.set_window(end=start_end) self.ds_test = DataSet(source_file) self.ds_test.set_window(start=start_end) def show_wiring(self, building_no): self.ds.buildings[building_no].elec.draw_wiring_graph() def show_available_devices(self, building_no): return self.ds.buildings[building_no].elec def show_available_data(self, building_no, device_id): return self.ds.buildings[building_no].elec[device_id].available_columns() #.device["measurements"] def get_aggregated_power(self, building_no): return self.ds.buildings[building_no].elec.mains().power_series_all_data() #.head() def get_device_power(self, building_no, device_id): """ Returns a generator over the power timeserie """ return self.ds.buildings[building_no].elec[device_id].power_series() def get_energy_per_meter(self, building_no): return self.ds_train.buildings[building_no].elec.submeters().energy_per_meter().loc['active'] def get_total_energy_per_device(self, building_no, device_id): return self.ds.buildings[building_no].elec[device_id].total_energy() def plot_aggregated_power(self, building_no): self.ds.buildings[building_no].elec.mains().plot() def plot_meter_power(self, building_no, device_id): self.ds.buildings[building_no].elec[device_id].plot() def plot_all_meters(self, building_no): self.ds.buildings[building_no].elec.plot() def plot_appliance_states(self, building_no, device_id): self.ds.buildings[building_no].elec[device_id].plot_power_histogram() def plot_spectrum(self, building_no, device_id): self.ds.buildings[building_no].elec[device_id].plot_spectrum() def plot_appliance_usage(self, building_no, device_id): self.ds.buildings[building_no].elec[device_id].plot_activity_histogram() def select_appliances_by_id(self, building_no, names): pass def select_top_consuming_appliances_for_training(self, building_no, k=5): return self.ds.buildings[building_no].elec.submeters().select_top_k(k) def select_appliances_by_type(self, t): import nilmtk meters = nilmtk.global_meter_group.select_using_appliances(type=t).all_meters() #print([m.total_energy() for m in meters]) meters = sorted(meters, key=(lambda m: m.total_energy()[0]), reverse=True) # sort by energy consumption #print([m.total_energy() for m in meters]) return meters def create_nilm_model(self, m_type): if m_type is "FHMM": self.model = fhmm_exact.FHMM() elif m_type is "CombOpt": self.model = combinatorial_optimisation.CombinatorialOptimisation() def import_nilm_model(self, filepath, m_type): if m_type is "FHMM": self.model = fhmm_exact.FHMM() self.model.import_model(filepath) elif m_type is "CombOpt": self.model = combinatorial_optimisation.CombinatorialOptimisation() self.model.import_model(filepath) def train_nilm_model(self, top_devices, sample_period=None): if sample_period is None: self.model.train(top_devices) else: self.model.train(top_devices, sample_period) def save_disaggregator(self, filepath): self.model.export_model(filepath) def disaggregate(self, aggregate_timeserie, output_file, sample_period): self.model.disaggregate(aggregate_timeserie, output_file, sample_period) def plot_f_score(self, disag_filename): plt.figure() from nilmtk.metrics import f1_score disag = DataSet(disag_filename) disag_elec = disag.buildings[building].elec f1 = f1_score(disag_elec, test_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance'); plt.xlabel('f-score'); plt.title(type(self.model).__name__);
def dae(dataset_path, train_building, train_start, train_end, test_building, test_start, test_end, val_building, val_start, val_end, meter_key, sample_period, num_epochs, patience, sequence_length, optimizer, learning_rate, loss): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] try: train_mains = train_elec.mains().all_meters()[0] val_mains = val_elec.mains().all_meters()[0] test_mains = test_elec.mains().all_meters()[0] except AttributeError: train_mains = train_elec.mains() test_mains = test_elec.mains() dae = DAEDisaggregator(sequence_length, patience, optimizer, learning_rate, loss) # print("========== TRAIN ============") dae.train(train_mains, train_meter, epochs=num_epochs, sample_period=sample_period) # Get number of earlystop epochs num_epochs = dae.stopped_epoch if dae.stopped_epoch != 0 else num_epochs #dae.export_model("results/dae-model-{}-{}epochs.h5".format(meter_key, num_epochs)) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') dae.disaggregate(val_mains, output, train_meter, sample_period=sample_period) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') dae.disaggregate(test_mains, output, train_meter, sample_period=sample_period) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'DAE', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': num_epochs # }, # 'hyperparameters': { # 'sequence_length': sequence_length, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': num_epochs, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
print("========== OPEN DATASETS ============") meterList = [] mainsList = [] test = DataSet('ukdale.h5') # test = DataSet('redd.h5') # test.set_window(start='2016-04-01',end='2016-05-01') test_building_list = [2, 3, 4, 5] #[2,5] sample_period = 6 meter_key = 'kettle' file = open('baseTrainSetsInfo_' + meter_key, 'r') for line in file: toks = line.split(',') train = DataSet(toks[0]) print(toks[2], '-', toks[3]) train.set_window(start=toks[2], end=toks[3]) train_elec = train.buildings[int(toks[1])].elec meterList.append(train_elec.submeters()[meter_key]) mainsList.append(train_elec.mains()) disaggregator = WindowGRUDisaggregator(window_size=100) start = time.time() print("========== TRAIN ============") epochs = 0 epochsPerCheckpoint = 3 totalCheckpoints = 1 #3 # disaggregator.import_model("WGRU-MultiHouse-{}-{}epochs.h5".format(meter_key, # epochs)) for i in range(totalCheckpoints):
def nilmtkECOfunc(dataset_loc, train_start, train_end, test_start, test_end, output_period): #### configuration #### period_s = output_period building = 2 #### load #### total = DataSet(dataset_loc) train = DataSet(dataset_loc) test = DataSet(dataset_loc) train.set_window(start=train_start, end=train_end) test.set_window(start=test_start, end=test_end) print(train_start) print(train_end) print(test_start) print(test_end) #### get timeframe #### tf_total = total.buildings[building].elec.mains().get_timeframe() tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #### eletrical metergroup #### total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #### training process #### start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") #### disaggregation process #### start = time.time() disag_filename = '../dataset/ecob-b2-kall-co-1w:11-1m.h5' output = HDFDataStore(disag_filename, 'w') co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec #### fraction energy assigned correctly #### #FTE_co_all = FTE_func(disag_co_elec, test_elec); #### total disaaggregation error #### #Te_co_all = total_disag_err(disag_co_elec, test_elec); #### creating dataframe from both disaggregated and ground truth metergroups disag_co_elec_df = disag_co_elec.dataframe_of_meters() disag_co_elec_df_nona = disag_co_elec_df.dropna() gt_full_df = test_elec.dataframe_of_meters() gt_full_df_nona = gt_full_df.dropna() gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index] #### jaccard #### #Ja_co_all = jaccard_similarity(disag_co_elec_df_nona, gt_df_nona, disag_co_elec.submeters().instance(), test_elec.instance()); #print("FTE all", FTE_co_all); #print("TE all", Te_co_all); #print("Ja all", Ja_co_all); #### output #### # drop aggregated power disag_co_elec_submeter_df = disag_co_elec_df.drop( disag_co_elec_df.columns[[0]], axis=1) # disag_co_elec_submeter_df = disag_co_elec_df # drop the unwanted timestamp gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index] # drop aggregated power gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0, 1, 2]], axis=1) # train train_elec_df = train_elec.dataframe_of_meters() train_elec_df_aligned = train_elec_df.resample(str(period_s) + 'S').asfreq()[0:] train_elec_df_aligned_drop = train_elec_df_aligned.drop( train_elec_df_aligned.columns[[0, 1, 2]], axis=1) return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
from nilmtk import DataSet, TimeFrame, MeterGroup import plot_config import seaborn as sns from matplotlib.dates import DateFormatter, HourLocator from matplotlib.ticker import MaxNLocator import matplotlib.pyplot as plt import pytz from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 100)}) print("plotting appliance power histograms...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') dataset.set_window("2013-04-26", None) # ignore tungsten kitchen lamps elec = dataset.buildings[1].elec fig, axes = plt.subplots(3, 3) meter_keys = ['fridge freezer', 'kettle', 'toaster', 'vacuum cleaner', 'television', 'oven', 'laptop computer', 'computer monitor', ('light', 1)] kwargs_per_meter = {'range': [( 2, 275), (2200, 2460), (1480, 1650), ( 400, 2200), ( 80, 140), (None, 60), ( 2, 65), ( 30, 85), (35, 290)]} axes = elec.plot_multiple(axes, meter_keys, 'plot_power_histogram', kwargs_per_meter, plot_kwargs={'color': plot_config.BLUE}) # Formatting
from __future__ import print_function, division import time from matplotlib import rcParams import matplotlib.pyplot as plt from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore from nilmtk.elecmeter import ElecMeterID import metrics from rnndisaggregator import RNNDisaggregator print("========== OPEN DATASETS ============") train = DataSet('../../Datasets/REDD/redd.h5') train.set_window(end="30-4-2011") test = DataSet('../../Datasets/REDD/redd.h5') test.set_window(start="30-4-2011") train_building = 1 test_building = 1 sample_period = 6 meter_key = 'fridge' train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] train_mains = train_elec.mains().all_meters()[0] test_mains = test_elec.mains().all_meters()[0] rnn = RNNDisaggregator() start = time.time() print("========== TRAIN ============")
else: FINE_TUNING = False DATASET = '../data/UKDALE/ukdale.h5' MODEL = '../data/UKDALE/model-dae-1024-' + APPLIANCE + 'ukdale.h5' DISAG = '../data/UKDALE/disag-dae-1024-' + APPLIANCE + 'out.h5' UKDALE_MODEL = '../data/UKDALE/model-dae-washing machine-ukdale.h5' TRAIN_BUILDING = 1 TEST_BUILDING = 2 SEQUENCE = 1024 START_TEST = "2013-05-22" END_TEST = "2013-09-24" train = DataSet(DATASET) train.set_window(start="2013-04-12", end="2015-07-01") # Training data time window train_elec = train.buildings[TRAIN_BUILDING].elec # Get building 1 meters dae = DAEDisaggregator(SEQUENCE, FINE_TUNING) if FINE_TUNING: print("------ FINE TUNING ------") dae.fine_tuning(UKDALE_MODEL) train_mains = train_elec.mains( ) # The aggregated meter that provides the input train_meter = train_elec.submeters()[ APPLIANCE] # The kettle meter that is used as a training target if TRAINING: print("------ TRAINING ------")
from __future__ import print_function, division from nilmtk import DataSet, TimeFrame, MeterGroup import plot_config import seaborn as sns from matplotlib.ticker import MultipleLocator import matplotlib.pyplot as plt import pytz from os.path import join from pylab import rcParams rcParams.update({'figure.figsize': plot_config._mm_to_inches(88, 150)}) print("plotting activity histograms...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') dataset.set_window("2013-03-01", None)#"2013-08-01") elec = dataset.buildings[1].elec N = 9 fig, axes = plt.subplots(N, 1) meter_keys = ['boiler', 'kettle', 'toaster', 'oven', 'vacuum cleaner', 'television', 'laptop computer', 'computer monitor', ('light', 1)] axes = elec.plot_multiple(axes, meter_keys, 'plot_activity_histogram') # Formatting for i, ax in enumerate(axes): ax.grid(False) ax.set_yticks([]) ax.set_ylabel('')
from nilm_models.gru.grudisaggregator import GRUDisaggregator from nilmtk.dataset_converters import convert_redd import tensorflow as tf print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) cwd = Path.cwd() dataset_path = '..\\..\\experiments\\data\\low_freq' full_path = cwd.joinpath(dataset_path) if not Path(r'..\\..\\experiments\\data\\redd.h5').exists(): convert_redd(str(full_path), r'..\\..\\experiments\\data\\redd.h5') redd = DataSet(r'..\\..\\experiments\\data\\redd.h5') redd.set_window(end="30-4-2011") #Use data only until 4/30/2011 train_elec = redd.buildings[1].elec train_mains = train_elec.mains().all_meters()[ 0] # The aggregated meter that provides the input train_meter = train_elec.submeters()['fridge'] gru = GRUDisaggregator() if not Path("model-redd5.h5").exists(): gru.train(train_mains, train_meter, epochs=5, sample_period=1) gru.export_model("model-redd5.h5") else: gru.import_model("model-redd5.h5") test = DataSet(r'..\\..\\experiments\\data\\redd.h5')
def random_forest(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period, n_estimators, criterion, min_sample_split): # Start tracking time start = time.time() # Prepare dataset and options dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building val_building = val_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec try: # REDD X_train = next(train_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_train = next( train_elec[meter_key].load(sample_period=sample_period)).fillna(0) X_test = next(test_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_test = next( test_elec[meter_key].load(sample_period=sample_period)).fillna(0) X_val = next(val_elec.mains().all_meters()[0].load( sample_period=sample_period)).fillna(0) y_val = next( val_elec[meter_key].load(sample_period=sample_period)).fillna(0) # Intersect between two dataframe - to make sure same trining instances in X and y # Train set intersect_index = pd.Index( np.sort(list(set(X_train.index).intersection(set(y_train.index))))) X_train = X_train.ix[intersect_index] y_train = y_train.ix[intersect_index] # Test set intersect_index = pd.Index( np.sort(list(set(X_test.index).intersection(set(y_test.index))))) X_test = X_test.ix[intersect_index] y_test = y_test.ix[intersect_index] # Val set intersect_index = pd.Index( np.sort(list(set(X_val.index).intersection(set(y_val.index))))) X_val = X_val.ix[intersect_index] y_val = y_val.ix[intersect_index] # Get values from numpy array X_train = X_train.values y_train = y_train.values X_test = X_test.values y_test = y_test.values X_val = X_val.values y_val = y_val.values except AttributeError: # UKDALE X_train = train_elec.mains().power_series_all_data( sample_period=sample_period).fillna(0) y_train = next(train_elec[meter_key].power_series( sample_period=sample_period)).fillna(0) X_test = test_elec.mains().power_series_all_data( sample_period=sample_period).fillna(0) y_test = next(test_elec[meter_key].power_series( sample_period=sample_period)).fillna(0) # Intersect between two dataframe - to make sure same trining instances in X and y # Train set intersect_index = pd.Index( np.sort(list(set(X_train.index).intersection(set(y_train.index))))) X_train = X_train.ix[intersect_index] y_train = y_train.ix[intersect_index] # Test set intersect_index = pd.Index( np.sort(list(set(X_test.index).intersection(set(y_test.index))))) X_test = X_test.ix[intersect_index] y_test = y_test.ix[intersect_index] # X_train = X_train.reshape(-1, 1) # y_train = y_train.reshape(-1, 1) # X_test = X_test.reshape(-1, 1) # y_test = y_test.reshape(-1, 1) # Get values from numpy array - Avoid server error X_train = X_train.values.reshape(-1, 1) y_train = y_train.values.reshape(-1, 1) X_test = X_test.values.reshape(-1, 1) y_test = y_test.values.reshape(-1, 1) # Model settings and hyperparameters min_samples_split = min_sample_split rf_regr = RandomForestRegressor(n_estimators=n_estimators, criterion=criterion, min_samples_split=min_samples_split, random_state=0) # print("========== TRAIN ============") rf_regr.fit(X_train, y_train) # print("========== DISAGGREGATE ============") y_val_predict = rf_regr.predict(X_val) y_test_predict = rf_regr.predict(X_test) # print("========== RESULTS ============") # me = Metrics(state_boundaries=[10]) on_power_threshold = train_elec[meter_key].on_power_threshold() me = Metrics(state_boundaries=[on_power_threshold]) val_metrics_results_dict = Metrics.compute_metrics(me, y_val_predict, y_val.flatten()) test_metrics_results_dict = Metrics.compute_metrics( me, y_test_predict, y_test.flatten()) # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'Random Forest Regressor', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': min_sample_split, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
from __future__ import print_function, division from nilmtk import DataSet import plot_config import seaborn as sns import matplotlib.pyplot as plt from os.path import join from pylab import rcParams print("plotting energy bar...") dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') dataset.set_window("2013-04-01", None) elec = dataset.buildings[1].elec submeters = elec.meters_directly_downstream_of_mains() grouped = submeters.groupby('type') top_k = grouped.select_top_k(group_remainder=False) try: top_k['HTPC'].name = "Home theatre PC" except KeyError: pass ############ # Plot rcParams.update({'figure.figsize': plot_config._mm_to_inches(70, 90)}) ax = top_k.plot(kind='energy bar', mains=elec.mains()) sns.despine(ax=ax, bottom=True, left=True) plt.tight_layout() plt.draw()
#top_devs = nilm.select_top_consuming_appliances_for_training(6, 5) print device_family return MeterGroup(device_family), device_family def train_group(group): nilm.train_nilm_model(group, sample_period=60) # Example at https://github.com/nilmtk/nilmtk/blob/master/docs/manual/user_guide/disaggregation_and_metrics.ipynb train = DataSet('/home/andrea/Desktop/redd.h5') test = DataSet('/home/andrea/Desktop/redd.h5') train.set_window(end="30-4-2011") test.set_window(start="30-4-2011") train_elect = train.buildings[1].elec test_elec = test.buildings[1].elec best_devices = test_elec.submeters().select_top_k(k=5) test_elec.mains().plot() fhmm = fhmm_exact.FHMM() fhmm.train(best_devices, sample_period=60) # Save disaggregation to external dataset #output = HDFDataStore('/home/andrea/Desktop/nilmtk_tests/redd.disag-fhmm.h5', 'w') """ fhmm.disaggregate(test_elec.mains(), output, sample_period=60)
#matplotlib inline rcParams['figure.figsize'] = (13, 6) from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore from nilmtk.disaggregate import CombinatorialOptimisation, FHMM #dividing data in train and tes set train = DataSet('ukdale.h5') test = DataSet('ukdale.h5') #defining the building of interest. to change for more than one buildings building = 2 #set window of interest. perhaps timestamp is thrwoing an error train.set_window(start="1-4-2013", end="30-8-2013") test.set_window(start="16-9-2013", end="30-9-2013") # to change to 5 house# to change to 5 house train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #selecting top applience. Change to kettle, fridge, washing machine, microwave and dish washer #top_5_train_elec = train_elec.submeters().select_top_k(k=5) top_5_train_elec = train_elec.submeters().select_using_appliances( type=['fridge', 'kettle', 'microwave', 'dish washer']) #Training and disaggregation def predict(clf, test_elec, sample_period, timezone):
print("*"*80) print("Starting for ids %d and %d" % (f_id, b_id)) print("*"*80) start = time.time() out[f_id] = {} # Need to put it here to ensure that we have a new instance of the algorithm each time cls_dict = {"Hart": Hart85()} elec = ds.buildings[b_id].elec mains = elec.mains() fridge_instance = fridges.meters[f_id].appliances[0].identifier.instance # Dividing train, test train = DataSet(ds_path) test = DataSet(ds_path) split_point = elec.train_test_split(train_fraction=train_fraction).date() train.set_window(end=split_point) test.set_window(start=split_point) train_elec = train.buildings[b_id].elec test_elec = test.buildings[b_id].elec test_mains = test_elec.mains() # Fridge elec fridge_elec_train = train_elec[('fridge', fridge_instance)] fridge_elec_test = test_elec[('fridge', fridge_instance)] num_states_dict = {fridge_elec_train: num_states} # Finding top N appliances top_k_train_list = top_k_dict[str(f_id)][:K] print("Top %d list is " %(K), top_k_train_list)
from __future__ import print_function, division from nilmtk import DataSet dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5') window_per_house = { 1: ("2013-04-12", None), 2: ("2013-05-22", None), 3: (None, None), 4: (None, None), 5: (None, "2014-09-06") } descriptions = [] for building_id, building in dataset.buildings.iteritems(): print("*********** House", building_id, "*************") dataset.set_window(*window_per_house[building_id]) description = building.describe() descriptions.append(description) print(description) print()
def fhmm(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec appliances = [meter_key] selected_meters = [train_elec[app] for app in appliances] selected_meters.append(train_elec.mains()) selected = MeterGroup(selected_meters) fhmm = FHMM() # print("========== TRAIN ============") fhmm.train(selected, sample_period=sample_period) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') fhmm.disaggregate(val_elec.mains(), output_datastore=output) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') fhmm.disaggregate(test_elec.mains(), output_datastore=output) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'FHMM', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
out[b_id] = {} start = time.time() #cls_dict = {"Hart":Hart85()} cls_dict = { "CO": CombinatorialOptimisation(), "FHMM": FHMM(), "Hart": Hart85() } elec = building.elec mains = elec.mains() train = DataSet(ds_path) test = DataSet(ds_path) split_point = datetime.date(2013, 7, 16) train.set_window(end=split_point) #test.set_window(start=split_point) train_elec = train.buildings[b_id].elec test_elec = test.buildings[b_id].elec test_mains = test_elec.mains() # AC elec ac_elec_train = train_elec[('air conditioner', 1)] ac_elec_test = test_elec[('air conditioner', 1)] num_states_dict = {ac_elec_train: num_states} # Finding top N appliances top_k_train_list = top_k_dict[str(b_id)][:K] print("Top %d list is " % (K), top_k_train_list) top_k_train_elec = MeterGroup([