Ejemplo n.º 1
0
def disaggregate_original_co(h5_input, h5_output,dataset_start_date_disag, dataset_end_date_disag, centroids=None ):
    import nilmtk.disaggregate as original_nilmtk
    ds = DataSet(h5_input)
    elec = ds.buildings[1].elec
    
    vampire_power_used_in_original = elec.mains().vampire_power()

    #Train
    plain_co = original_nilmtk.CombinatorialOptimisation()
    plain_co.train(elec)
    
    #Modify centroids manually
    if centroids is not None:            
        for i, model in enumerate(plain_co.model):
            instance = model['training_metadata'].instance()
            model['states'] = centroids[instance]
    
    
    #Disaggregate
    ds.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag)
    elec = ds.buildings[1].elec
    output_plain_co = HDFDataStore(h5_output, 'w')
    plain_co.disaggregate(elec.mains(), output_plain_co)
    output_plain_co.close()
       
    return plain_co, vampire_power_used_in_original
Ejemplo n.º 2
0
	def import_dataset(self, source_file, start_end):
		self.ds = DataSet(source_file)
		
		self.ds_train = DataSet(source_file)
		self.ds_train.set_window(end=start_end)
		
		self.ds_test = DataSet(source_file)
		self.ds_test.set_window(start=start_end)
Ejemplo n.º 3
0
def code():

    # CONVERT THE REDD DATASET TO NILMTK'S HDF5 FORMAT
    from nilmtk.dataset_converters import convert_redd
    convert_redd('/data/REDD/low_freq', '/data/REDD/redd.h5')

    # IMPORT HDF5 FORMAT INTO NILMTK
    from nilmtk import DataSet
    redd = DataSet('/data/REDD/redd.h5')
Ejemplo n.º 4
0
	def __init__ (self,in_filepath,out_filepath):
		print("Loading DataStore and Generating Dataset...")
		self.km = {}
		self.dataStore = HDFDataStore(in_filepath)
		self.dataSet = DataSet()
		self.dataSet.load(self.dataStore)
		self.outDataStore = HDFDataStore(out_filepath,'w')
		self.co = CombinatorialOptimisation()
		self.train_group = {}
		print("Data Properly Loaded!")
Ejemplo n.º 5
0
def getMeterTargetGenerator(b, train_meterRef):
    trainYDS = DataSet(dsPathY)
    print('Stack train: ',
          train_meterRef.get_timeframe().start.date(), " - ",
          train_meterRef.get_timeframe().end.date())
    # trainYDS.set_window(start=train_meter.get_timeframe().start.date(), end=train_meter.get_timeframe().end.date())
    trainY_elec = trainYDS.buildings[b].elec
    trainY_meter = trainY_elec.submeters()[meter_key]
    # print(trainY_meter.sample_period())
    trainYGen = align_two_meters(train_meterRef, trainY_meter)
    return trainYGen
Ejemplo n.º 6
0
def groupmix_rlo_generator(dataset_loc, start_time, end_time, freq, occupancy,
                           co):
    building = 2
    label = []
    label_upper = []
    data = DataSet(dataset_loc)
    data.set_window(start=start_time, end=end_time)
    data_elec = data.buildings[building].elec
    for i in data_elec.submeters().instance():
        label.append(str(data_elec[i].label()).lower())
        label_upper.append(str(data_elec[i].label()).upper())
    train_elec_df = data_elec.dataframe_of_meters().resample(
        str(freq) + 'S').max().round(0)
    train_elec_df = train_elec_df.drop(train_elec_df.columns[[0, 1, 2]],
                                       axis=1)
    train_elec_df.columns = label
    states = get_states(co)
    group_mix, room_occ_num_people = groupmix_rlo(states, label_upper,
                                                  occupancy, train_elec_df)
    return group_mix, room_occ_num_people
Ejemplo n.º 7
0
def getStackTrainGenerators(b, train_meterRef):
    trainXGen_list = []
    for path in dsPathsList[b]:
        train = DataSet(path)
        train_elec = train.buildings[b].elec
        train_meter = train_elec.submeters()[meter_key]
        # print('Stack train: ', train_meter.get_timeframe().start.date(), " - ", train_meter.get_timeframe().end.date())
        # Align the 'train_meterRef' with the X file (smaller). it's also a way to read the X meters chunk-by-chunk
        aligned_meters = align_two_meters(train_meterRef, train_meter)
        trainXGen_list.append(aligned_meters)
    return trainXGen_list
Ejemplo n.º 8
0
 def plot_f_score(self, disag_filename):
     plt.figure()
     from nilmtk.metrics import f1_score
     disag = DataSet(disag_filename)
     disag_elec = disag.buildings[building].elec
     f1 = f1_score(disag_elec, test_elec)
     f1.index = disag_elec.get_labels(f1.index)
     f1.plot(kind='barh')
     plt.ylabel('appliance')
     plt.xlabel('f-score')
     plt.title(type(self.model).__name__)
Ejemplo n.º 9
0
def plot_zoomed_original_predicted_energy_consumption():
    """
    Plots a zoomed time frame of the original prediction.
    """
    test = DataSet('../data/ukdale.h5')
    test.clear_cache()
    test.set_window(start="30-6-2013", end="15-7-2013")

    test_building = 1
    sample_period = 6
    meter_keys = ['kettle']

    test_elec = test.buildings[test_building].elec

    results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-20-14-24-46'
    disag_filename = 'disag-out.h5'

    for key in meter_keys:
        # get predicted curve for the best epoch
        result = DataSet(os.path.join(results_dir, disag_filename))
        res_elec = result.buildings[test_building].elec
        predicted = res_elec[key]
        predicted = predicted.power_series(sample_period=sample_period)
        predicted = next(predicted)
        predicted.fillna(0, inplace=True)
        y1 = np.array(predicted)  # power
        x1 = np.arange(y1.shape[0])  # timestamps
        # The chosen time frame to zoom in
        x1 = x1[94000:102500]
        y1 = y1[94000:102500]

        ground_truth = test_elec[key]
        ground_truth = ground_truth.power_series(sample_period=sample_period)
        ground_truth = next(ground_truth)
        ground_truth.fillna(0, inplace=True)
        y2 = np.array(ground_truth)  # power
        x2 = np.arange(y2.shape[0])  # timestamps
        # The chosen time frame to zoom in
        x2 = x2[94000:102500]
        y2 = y2[94000:102500]

        fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True)
        ax1.plot(x1, y1, color='r', label='predicted')
        ax1.plot(x2, y2, color='b', label='ground truth')
        ax2.plot(x1, y1, color='r')
        ax3.plot(x2, y2, color='b')
        ax1.set_title('Appliance: {}'.format(key))
        plt.xticks(
            np.arange(94000, 102500, 2000),
            ('5-10-2013 12:00', '16:00', '20:00', '6-10-2013 00:00', '04:00'))
        fig.legend()
        fig.savefig(
            os.path.join(
                results_dir,
                'zoomed_original_predicted_vs_ground_truth_{}.png'.format(
                    key)))
 def test_load(self):
     filename = join(data_dir(), 'energy.h5')
     ds = DataSet(filename)
     elec = ds.buildings[1].elec
     df = next(elec.load())
     self.assertEqual(len(df), 13)
     df = next(elec.load(chunksize=5))
     self.assertEqual(len(df), 5)
     df = next(elec.load(physical_quantity='energy'))
     self.assertEqual(len(df), 13)
     self.assertEqual(df.columns.levels, [['energy'], ['reactive']])
     df = next(elec.load(ac_type='active'))
     self.assertEqual(df.columns.levels, [['power'], ['active']])
Ejemplo n.º 11
0
def get_disaggregation(device, total_aggregate):
    devices = ["fridge", "air conditioner", "washing machine"]
    if device not in devices:
        return None

    test = DataSet('iawe.h5')
    test_elec = test.buildings[1].elec
    test_mains = test_elec.mains().all_meters()[0]
    test_meter = test_elec.submeters()[device]

    df = next(test_meter.load(ac_type='active', sample_period=2592000))
    prediction = df['power'].values[0]

    print(df.head())
    return prediction
Ejemplo n.º 12
0
    def __init__(self,
                 paths,
                 bad_meters=None,
                 timeframe=None,
                 merge_shorter_gaps_then=None,
                 remove_shorter_then=None,
                 verbose=False):
        """ Creates an DatasetAnalysis object.
        
        Parameters
        ----------
        paths: str or [str]
            Paths to the datasets, which shall be analyzed.
        bad_meters: [str] (optional)
            Define some meters which shall be excluded as they are
            malicious.
        timeframe: pd.TimeFrame
            The region for which the analysis shall be performed.
            Todo: Should be made optional. Take whole timeframe then.
        merge_shorter_gaps_then: pd.Timedelta
            Merge sections which are separated by a gap smaller then this timedelta
        remove_shorter_then: pd.Timedelta
            Remove sections which are smaller then this timedelta
        verbose: bool
            Whether to return additional information.
        """
        if timeframe is None:
            raise Exception(
                "TimeFrame has to be set. None timeframe not yet supported.")

        if not type(paths) is list:
            paths = [paths]

        self.datasets = []
        for path in paths:
            if verbose:
                print("Load Dataset {0}.".format(path))
            self.datasets.append(DataSet(path))

        self.timeframe = timeframe

        self.bad_meters = bad_meters

        self._load_all_stats(timeframe,
                             verbose=verbose,
                             merge_shorter_gaps_then=merge_shorter_gaps_then,
                             remove_shorter_then=remove_shorter_then)
Ejemplo n.º 13
0
def co(start_train, end_train, start_test, end_test, train_elec):

    #Start training
    data.set_window(start_train, end_train)
    elec = data.buildings[1].elec
    co = CombinatorialOptimisation()
    co.train(train_elec,
             ac_type='active',
             physical_quantity='power',
             sample_period=1)

    #Start disaggregating
    data.set_window(start_test, end_test)
    disag_filename = './build/disagg_sum_co_{}_k.h5'.format(
        len(train_elec.meters))
    output = HDFDataStore(disag_filename, 'w')
    co.disaggregate(elec.mains(),
                    output,
                    ac_type='active',
                    physical_quantity='power',
                    sample_period=1)
    output.close()
    dates_dict = {
        "start_train": start_train,
        "end_train": end_train,
        "start_test": start_test,
        "end_test": end_test
    }
    # write test and train timeframe into json file
    with open(disag_filename + ".json", 'w') as dates_file:
        json.dump(dates_dict, dates_file)

    #Calulate F1-Score
    disag = DataSet(disag_filename)
    disag_elec = disag.buildings[1].elec
    disag_elec.plot()
    plt.title("CO")
    plt.show()

    f1 = f1_score(disag_elec, train_elec)
    f1.index = disag_elec.get_labels(f1.index)
    f1.plot(kind='barh')
    plt.ylabel('appliance')
    plt.xlabel('f-score')
    plt.title("CO")
    plt.show()
Ejemplo n.º 14
0
def get_states(h5_files_path):
    # Read H5 file
    localhome = DataSet(h5_files_path)

    # Get mains
    elec = localhome.buildings[1].elec
    mains = elec.mains()

    # Train Hart's model
    h = Hart85()
    h.train(mains)
    pairs = h.pair_df

    # Get states with duration
    states = pd.DataFrame(pairs['T2 Time'] - pairs['T1 Time'], columns=['duration'])
    states['P'] = pairs['T1 Active']

    # return centroids (load models)
    return [h.centroids, states]
Ejemplo n.º 15
0
def mle(start_train, end_train, start_test, end_test, train_elec):

    # #Start training
    data.set_window(start_train, end_train)
    elec = data.buildings[1].elec
    mle = maximum_likelihood_estimation.MLE()
    mle.sample_period = "1s"
    mle.train(train_elec)

    #Start disaggregating
    data.set_window(start_test, end_test)
    disag_filename = './build/disagg_sum_mle_{}_k.h5'.format(
        len(train_elec.meters))
    output = HDFDataStore(disag_filename, 'w')
    mle.disaggregate(elec.mains(), output)
    output.close()
    dates_dict = {
        "start_train": start_train,
        "end_train": end_train,
        "start_test": start_test,
        "end_test": end_test
    }
    # write test and train timeframe into json file
    with open(disag_filename + ".json", 'w') as dates_file:
        json.dump(dates_dict, dates_file)

    disag = DataSet(disag_filename)
    disag_elec = disag.buildings[1].elec
    disag_elec.plot()
    plt.show()
    plt.title("FHMM")

    #Calculate F1-Score
    f1 = f1_score(disag_elec, train_elec)
    f1.index = disag_elec.get_labels(f1.index)
    f1.plot(kind='barh')
    plt.ylabel('appliance')
    plt.xlabel('f-score')
    plt.title("FHMM")
    plt.show()
def load_dataset(window_per_house, test_window, filename, meter_label,
                 train_building, test_building, **load_kwargs):

    #Load datasets
    train = DataSet(filename)
    test = DataSet(filename)

    #train.set_window(start=start_train, end=end_train)
    test.set_window(*test_window[test_building])

    # if only onw house is used for training
    # train_y = train.buildings[train_building].elec[meter_label]
    # train_x = train.buildings[train_building].elec.mains()

    train_mainlist = []
    train_meterlist = []
    for building_id, building in train.buildings.items():
        if building_id in train_building:
            train.set_window(*window_per_house[building_id])
            y = building.elec[meter_label]
            x = building.elec.mains()
            train_mainlist.append(x.power_series_all_data(**load_kwargs))
            train_meterlist.append(y.power_series_all_data(**load_kwargs))


#     # multiple houses for training
#     train_meterlist = [train.buildings[i].elec[meter_label] for i in train_building]
#     train_mainlist = [train.buildings[i].elec.mains() for i in train_building]

    test_meterlist = test.buildings[test_building].elec[meter_label]
    test_mainlist = test.buildings[test_building].elec.mains()

    assert len(train_mainlist) == len(
        train_meterlist
    ), "The number of main and apliances meters must be equal"

    return train_meterlist, train_mainlist, test_meterlist, test_mainlist
Ejemplo n.º 17
0
def process_nilmtk_h5(data_source: str, users_data, utility_appliance):

    from nilmtk import DataSet
    from pathlib import Path
    import pandas as pd
    import numpy as np
    import time
    import glob

    data_source = glob.glob(data_source + '*.h5')

    all_db_houses = {}

    if not users_data.exists():
        print("Loading Datasets")
        for data_source_path in data_source:

            if data_source_path in "..\\data\\SynD.h5" or data_source_path in "..\\data\\iawe.h5":
                print('Skipping {}'.format(data_source_path))
                continue

            dt = DataSet(data_source_path)
            file_name = Path(data_source_path).name
            print("\n\n===>Pre-processing {} dataset".format(file_name))

            start = time.time()
            house_data_pair = preprocess_nilmtk_to_df(file_name, dt, utility_appliance)
            print("=>Time to process {} data: {}s ".format(data_source_path, time.time() - start))

            all_db_houses.update(house_data_pair)

        df = pd.DataFrame(all_db_houses, dtype=np.float)
        df.to_hdf("./processed_input_data.pkl", key='df', mode='w')

    else:
        df = pd.read_hdf("./processed_input_data.pkl", key='df')
        all_db_houses = df.to_dict('series')
    return all_db_houses
Ejemplo n.º 18
0
def main():
    #Load data
    gjw = DataSet("C:/Users/GJWood/nilm_gjw_data/HDF5/nilm_gjw_data.hdf5")
    print('loaded ' + str(len(gjw.buildings)) + ' buildings')
    
    #Examine metadata
    building_number =1
    print_dict(gjw.buildings[building_number].metadata) #metadata for house
    elec = gjw.buildings[building_number].elec
    print(elec.appliances)
    
    #List & plot coherent blocks of meter readings
    mains = elec.mains()
    mains_good_sections = elec.mains().good_sections()
    Print(elec.mains().good_sections())
    mains_good_sections.plot()
    
    #Examine the power data
    print(mains.available_power_ac_types())
    mains_energy = mains.total_energy(sections=mains_good_sections)
    print(mains_energy)
    whole_house = nilmtk.global_meter_group.select()
    print(whole_house.select(building=1).total_energy())
    whole_house.plot()
Ejemplo n.º 19
0
    zip_ref = zipfile.ZipFile('dataset/ds.zip', 'r')
    zip_ref.extractall('dataset')
    zip_ref.close()
    os.remove("dataset/ds.zip")
    shutil.rmtree("dataset/disag_estimates", ignore_errors=True)
    os.makedirs("dataset/trainsets", exist_ok=True)
    print("Done downloading")


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python gen.py ukdale_path")
        exit()

    conf_files = os.listdir("appconf")
    ds = DataSet(sys.argv[1])
    for app in conf_files:
        filename = "appconf/{}".format(app)
        with open(filename) as data_file:
            conf = json.load(data_file)

        if not os.path.exists("dataset"):
            download_dataset()
        os.makedirs(conf['save_path'], exist_ok=True)

        # Create trainset for meter
        print(conf["nilmtk_key"])
        house_keys = conf['train_buildings']
        window_size = conf['lookback']
        all_x_train = np.empty((train_size * len(house_keys), window_size, 1))
        all_y_train = np.empty((train_size * len(house_keys), ))
Ejemplo n.º 20
0
from flask import Flask, render_template

# # 데이터 컨버터 (return : DataSet)
#
# ---

# In[ ]:

# 데이터 컨버트
# .dat ==> .h5
convert_redd('C:\\Users\\dlsrk\\Desktop\\nilm\\low_freq',
             'C:\\Users\\dlsrk\\Desktop\\nilm\\data\\redd.h5')
# .h5 데이터(컨버트된) read
# redd = DataSet('C:\\Users\\Kim-Taesu\\Documents\\nilm\\data\\redd.h5')
redd = DataSet('C:\\Users\\dlsrk\\Desktop\\nilm\\data\\redd.h5')


# date load 함수
def getData(inputPath, convertOutputPath):
    convert_redd(inputPath, convertOutputPath)
    return DataSet(convertOutputPath)


# # 시각화 데이터 준비
#
# ---

# ### 변수

# > buildings_count : 전체 빌딩 수 (int)
Ejemplo n.º 21
0
def getData(inputPath, convertOutputPath):
    convert_redd(inputPath, convertOutputPath)
    return DataSet(convertOutputPath)
Ejemplo n.º 22
0
from __future__ import print_function, division
import time

from matplotlib import rcParams
import matplotlib.pyplot as plt

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from windowgrudisaggregator import WindowGRUDisaggregator
import metrics

print("========== OPEN DATASETS ============")
meterList = []
mainsList = []
test = DataSet('ukdale.h5')
# test = DataSet('redd.h5')
# test.set_window(start='2016-04-01',end='2016-05-01')
test_building_list = [2, 3, 4, 5]  #[2,5]
sample_period = 6
meter_key = 'kettle'

file = open('baseTrainSetsInfo_' + meter_key, 'r')
for line in file:
    toks = line.split(',')
    train = DataSet(toks[0])
    print(toks[2], '-', toks[3])
    train.set_window(start=toks[2], end=toks[3])
    train_elec = train.buildings[int(toks[1])].elec
    meterList.append(train_elec.submeters()[meter_key])
    mainsList.append(train_elec.mains())

disaggregator = WindowGRUDisaggregator(window_size=100)
Ejemplo n.º 23
0
#initialize key map for building 1
kmap = Key_map()

#set the disaggregated plot here
disag_apl = 'fridge'
disag_key = kmap.get_key(disag_apl)

#First we must load in the converted REDD Dataset
print ("Loading DataSet.....")

#declare datastore and load converted HDF that stores REDD data
r_datastore = HDFDataStore("C:/NILM/Data_Sets/redd_data.h5")

#declare dataset object to work with and load datastore into Dataset
r_dataset = DataSet()
r_dataset.load(r_datastore)

#output sucessfull loading of data to user
print("DataSet Sucessfully Loaded!")

#now we take the data and elminate all sections with no samples
print("Conditioning Data... \n")

#load the metergroup from building one (house1 in REDD)
r_elec = r_dataset.buildings[1].elec


print("\nConditioning Finished.")

#now we must train the disaggregation model to ensure accuracy
Ejemplo n.º 24
0
from __future__ import print_function, division
from nilmtk import DataSet, TimeFrame, MeterGroup
import plot_config
import seaborn as sns
from matplotlib.dates import DateFormatter, HourLocator
import matplotlib.pyplot as plt
import pytz
from os.path import join

from pylab import rcParams
rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 120)})

print("plotting good sections...")

dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
# dataset.set_window("2013-06-01", "2013-06-02") 
dataset.set_window(None, None) 

axes = dataset.plot_good_sections(color=plot_config.BLUE)

for i, ax in enumerate(axes):
    plot_config.format_axes(ax, tick_size=2)
    ax.set_title('House {:d}'.format(i+1), x=0.05, y=.4, va='top')    
    ax.set_ylabel('Meter' if i == 1 else '', 
                  rotation=0, ha='center', va='center', y=.4)


plt.savefig(join(plot_config.FIG_DIR, '03_good_sections.eps'), 
            bbox_inches='tight')
Ejemplo n.º 25
0
        },
        {
            'type': DimshuffleLayer,
            'pattern': (0, 2, 1)  # back to (batch, time, features)
        }
    ]
    net = Net(**net_dict_copy)
    return net


os.chdir('/data/dk3810/figures/e446o/')
net = exp_o('e446o')
net.compile()
net.load_params(50000, '/data/dk3810/figures/e446o/e446o.hdf5')

dataset = DataSet('/data/dk3810/ukdale.h5')
dataset.set_window("2013-06-01", "2014-07-01")
elec = dataset.buildings[1].elec
elec.use_alternative_mains()
mains = elec.mains().power_series_all_data()
washer = elec['washer dryer'].power_series_all_data()

N = 131072
estimates = disaggregate(mains.values[:N], net)

fig, axes = plt.subplots(3, 1, sharex=True)
axes[0].plot(mains[:N].index, estimates)
axes[1].plot(mains[:N].index, mains[:N])
axes[2].plot(washer[:N].index, washer[:N])

Ejemplo n.º 26
0
import plot_config
import seaborn as sns
from matplotlib.dates import DateFormatter, HourLocator
from datetime import timedelta
import matplotlib.pyplot as plt
import pytz
import pandas as pd
from os.path import join

from pylab import rcParams

rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 100)})

UNIT = 'kW'

dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
TZ_STRING = dataset.metadata['timezone']
TZ = pytz.timezone(TZ_STRING)
elec = dataset.buildings[1].elec
submeters = elec.meters_directly_downstream_of_mains()

# Select appliances used in top K plot
APPLIANCES = [
    'fridge freezer', 'HTPC', 'dish washer', 'washer dryer', 'kettle'
]
selected_meters = [submeters[appliance] for appliance in APPLIANCES]
remainder = []
for meter in submeters.meters:
    for appliance in APPLIANCES:
        if meter.matches_appliances({'type': appliance}):
            break
Ejemplo n.º 27
0
from __future__ import print_function, division
from nilmtk import DataSet, HDFDataStore
from nilmtk.disaggregate import fhmm_exact
from nilmtk.metrics import f1_score
from os.path import join
import matplotlib.pyplot as plt
"""
This file replicates issue #376 (which should now be fixed)
https://github.com/nilmtk/nilmtk/issues/376
"""

data_dir = '/data'
building_number = 3
disag_filename = join(data_dir, 'disag-fhmm' + str(building_number) + '.h5')

data = DataSet(join(data_dir, 'redd.h5'))
print("Loading building " + str(building_number))
elec = data.buildings[building_number].elec

top_train_elec = elec.submeters().select_top_k(k=5)
fhmm = fhmm_exact.FHMM()
fhmm.train(top_train_elec)

output = HDFDataStore(disag_filename, 'w')
fhmm.disaggregate(elec.mains(), output)
output.close()

### f1score fhmm
disag = DataSet(disag_filename)
disag_elec = disag.buildings[building_number].elec
Ejemplo n.º 28
0
from __future__ import print_function, division
from nilmtk import DataSet

dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')

window_per_house = {1: ("2013-04-12", None), 
                    2: ("2013-05-22", None), 
                    3: (None, None), 
                    4: (None, None), 
                    5: (None, "2014-09-06")}

descriptions = []
for building_id, building in dataset.buildings.iteritems():
    print("*********** House", building_id, "*************")
    dataset.set_window(*window_per_house[building_id])
    description = building.describe()
    descriptions.append(description)
    print(description)
    print()
Ejemplo n.º 29
0
print("Disaggregating=========================================================")
output = HDFDataStore(h5_disag, 'w')
loc.dataset.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag)
co.disaggregate(loc.elec.mains(), output, location_data=loc, baseline=vampire_power_in_original, resample_seconds=60)
output.close()

time_start_metrics = time.time()
print("\nTotal elapsed: %s seconds ---" % (time_start_metrics - start_time))
print("Section Disaggregation: %s seconds ---\n" % (time_start_metrics - time_start_disag))



#METRICS=======================================================================
print("Calculating metrics====================================================")
disag  = DataSet(h5_disag)
disago = DataSet(h5_disag_redd_original)

disago.metadata['timezone'] = disag.metadata['timezone']
disago.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag)

disag_elec  = disag.buildings[1].elec
disago_elec = disago.buildings[1].elec

disag_predictions_original = utils.get_disaggregation_predictions(disago_elec,
                                          vampire_power_in_original, 
                                          start_date = dataset_start_date_disag, 
                                          end_date = dataset_end_date_disag)
disag_predictions_location = utils.get_disaggregation_predictions(disag_elec,
                                          vampire_power_in_original, 
                                          start_date = dataset_start_date_disag, 
                                          end_date = dataset_end_date_disag)                                          
Ejemplo n.º 30
0
class REDD_Data(object):

	'''
	REDD_Data Class is an object designed to abstract the lower level commands of
	the NILMTK software package, with focus on the use of REDD DataSet. Function is 
	designed to allow rapid experimentation and disaggregation compared to attempting 
	to set package up from scratch.

	This class requires the following for proper usage:
	- NILMTK package: https://github.com/nilmtk
	- REDD Dataset (converted to .h5): redd.csail.mit.edu
	- Various dependancies (that NILMTK also requires), most can be downloaded through
	  Anaconda: continuum.io/downloads


	Parameters
	-----------
	in_filepath:		Filepath of converted REDD dataset (in .h5 format)
	out_filepath:		filepath to place output disaggregation dataset (in .h5 format)

	Attributes
	-----------
	km: Key_Map Object
		initializes the key_map object which will allow for the mapping of a meters
		appliance name to its specific .H5 key.

	dataStore: NILMTK HDFDataStore Object
		the HDFDataStore that will contain the converted REDD DataSet.

	dataSet: NILMTK DataSet Object
		the DataSet object that is generated from the REDD DataStore (self.dataStore)		

	outDataStore: NILMTK HDFDataStore Object
		the HDFDataStore that will contain the disaggregated dataset.

	co: NILMTK CombinatorialOptimisation object
		the disaggregation model object that will be trained and will disaggregate the 
		working dataset

	train_group: NILMTK MeterGroup object
		the MeterGroup object that is used to train the disaggregation model (self.co)

	'''
	def __init__ (self,in_filepath,out_filepath):
		print("Loading DataStore and Generating Dataset...")
		self.km = {}
		self.dataStore = HDFDataStore(in_filepath)
		self.dataSet = DataSet()
		self.dataSet.load(self.dataStore)
		self.outDataStore = HDFDataStore(out_filepath,'w')
		self.co = CombinatorialOptimisation()
		self.train_group = {}
		print("Data Properly Loaded!")


	def train_disag_model(self,building_inst, use_topk = False, k = 5):
		'''
		Function trains the disaggregation model using a selected MeterGroup.

		Parameters
		-----------

		building_inst: 	the instance # of the building that you wish to grab the 
					   	training group from.

		use_topk:		true if you wish to only grab the top k most energy intensive
						appliance to train the model, false if you wish to use all
						appliances.

		k:				the # of appliances you wish to use (if use_topk = True)

		'''

		print("Training CO Disaggregation Model using given metergroup...")

		if (building_inst <= 6) & (building_inst > 0): 
			#Select appropiate meter group to train with
			if use_topk == True:
				self.train_group = self.dataSet.buildings[building_inst].elec.select_top_k(k)
			else:
				self.train_group = self.dataSet.buildings[building_inst].elec

			self.co.train(self.train_group)
			print("CO Disaggreation Model Sucessfully Trained!")

		else:
			print("Error: Please select a building_inst of 1-6.")
			print("Model unsucessfully trained.")


	def load_disag_model(self, filepath):
		'''
		Function loads the disaggregation model to a file.

		Parameters
		-----------

		filepath:	exact filepath of the model file.

		'''
		print("Loading CO Disaggreation Model...")
		self.co.import_model(filepath)
		print("Model Sucessfully Loaded!")
		

	def save_disag_model(self,filepath):
		'''
		Function saves the disaggregation model to a file.

		Parameters
		-----------

		filepath:	exact filepath of the model file.

		'''
		print("Saving CO Disaggregation Model...")
		self.co.export_model(filepath)
		print("Model Sucessfully Saved!")


	def disaggregate(self,building_inst):
		'''
		Function will disaggregate the mains MeterGroup of the passed building 
		instance, and save this to the self.outDataStore object.

		Parameters
		-----------

		building_inst:	instance # of the building mains you wish to disaggregate.

		'''
		print("Disaggregating Building Mains...")		
		self.co.disaggregate(self.dataSet.buildings[building_inst].elec.mains(),self.outDataStore)
		print("Mains sucessfully disaggregated!")


	def close(self):
		'''
		Function closes all open DataStore's being used by the program.

		'''
		print("Closing DataStores...")
		self.dataStore.close()
		self.outDataStore.close()
		print("Output DataStores Sucessfully Closed")
		

	'''
	All Plot Functions below are a WORK IN PROGRESS!-----------------------------------
	Documentation will be provided upon completion.------------------------------------

	'''
		

	def plot_disag_apl(self,inst,appliance,t1="",t2=""):
		self.km = Key_Map(inst)
		plot_series(self.outDataStore.store.get(self.km.get_key(appliance))[t1: t2])
		plt.title("Disaggregated " + appliance.capitalize()+" Energy") 
		plt.show()

	
	def show_plots(self):
		plt.show()


	def building_plot_all(self,building_inst,t1,t2):
		self.dataSet.buildings[building_inst].elec.plot(t1,t2)
		plt.title("Building "+str(building_inst)+" Energy per Appliance")
		plt.ylabel('Power [W]')
		plt.xlabel('Hour')


	def plot_redd_mains_data(self, inst=1, t1 = "", t2 = ""):
		self.km = Key_Map(inst)
		series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2]
		series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2]
		plot_series(series1 + series2)
		plt.title("Building "+str(inst)+" Mains Energy")
		plt.show()
Ejemplo n.º 31
0
from __future__ import print_function, division
from nilmtk import DataSet
import plot_config
import seaborn as sns
import matplotlib.pyplot as plt
from os.path import join
from pylab import rcParams

print("plotting energy bar...")

dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2013-04-01", None)
elec = dataset.buildings[1].elec

submeters = elec.meters_directly_downstream_of_mains()
grouped = submeters.groupby('type')
top_k = grouped.select_top_k(group_remainder=False)
try:
    top_k['HTPC'].name = "Home theatre PC"
except KeyError:
    pass

############
# Plot
rcParams.update({'figure.figsize': plot_config._mm_to_inches(70, 90)})
ax = top_k.plot(kind='energy bar', mains=elec.mains())
sns.despine(ax=ax, bottom=True, left=True)

plt.tight_layout()

plt.draw()
Ejemplo n.º 32
0
from __future__ import print_function, division
from nilmtk import DataSet, TimeFrame
from nilmtk.elecmeter import ElecMeterID
import pandas as pd

ukdale = DataSet('/data/mine/vadeec/merged/ukdale.h5')
# TZ = 'Europe/London'
# ukdale.store.window = TimeFrame(pd.Timestamp("2014-01-01 00:00", tz=TZ),
#                                 pd.Timestamp("2014-01-02 00:00", tz=TZ))

ukdale.set_window("2013-04-01", "2013-05-01")

elec = ukdale.buildings[1].elec
meter = elec[2]
# ukdale.plot_good_sections()

# best = meter._convert_physical_quantity_and_ac_type_to_cols(ac_type='best')
# elec2 = ukdale.buildings[2].elec
# elec.use_alternative_mains()
# elec2.use_alternative_mains()
# submeters2 = elec2.submeters()

# gen = submeters2.load()
# df = next(gen)

# gen = elec.load(verbose=True) 
# df = gen.next()
# corr = elec.correlation_of_sum_of_submeters_with_mains(verbose=True)

# prop = elec.proportion_of_energy_submetered()
Ejemplo n.º 33
0
# to add:
#			1) load REDD data from database (SQL interface)*
#
#			*Cannot be implemented until database is setup in environment

# Verify input appliance exists in building
km = Key_Map(1)

# verify a real appliance has been entered
if km.is_in_map(disag_appliance) == False:
    sys.exit(
        "An incorrect appliance name has been entered. Please ensure the entered name is exactly correct."
    )

redd_data = DataSet("/home/mike/workspace/data/redd_data.h5")

# load mains of the building
building_mains = redd_data.buildings[1].elec.mains()

#train disaggregation set
co = CombinatorialOptimisation()
training_set = redd_data.buildings[1].elec
co.train(training_set)

#set output datastore
outputData = HDFDataStore("/home/mike/workspace/data/redd_output.h5", 'w')

#disaggregate
co.disaggregate(building_mains, outputData)
Ejemplo n.º 34
0
	device_family.append( nilm.select_appliances_by_type("washer dryer")[0] )
	device_family.append( nilm.select_appliances_by_type("electric space heater")[0] )

	#top_devs = nilm.select_top_consuming_appliances_for_training(6, 5)
	print device_family
	return MeterGroup(device_family), device_family
	
def train_group(group):
	nilm.train_nilm_model(group, sample_period=60)




# Example at https://github.com/nilmtk/nilmtk/blob/master/docs/manual/user_guide/disaggregation_and_metrics.ipynb

train = DataSet('/home/andrea/Desktop/redd.h5')
test = DataSet('/home/andrea/Desktop/redd.h5')

train.set_window(end="30-4-2011")
test.set_window(start="30-4-2011")

train_elect = train.buildings[1].elec
test_elec = test.buildings[1].elec
best_devices = test_elec.submeters().select_top_k(k=5)

test_elec.mains().plot()

fhmm = fhmm_exact.FHMM()
fhmm.train(best_devices, sample_period=60)

# Save disaggregation to external dataset
Ejemplo n.º 35
0
from __future__ import print_function, division
import time

from matplotlib import rcParams
import matplotlib.pyplot as plt

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.elecmeter import ElecMeterID
import metrics
from rnndisaggregator import RNNDisaggregator

print("========== OPEN DATASETS ============")
train = DataSet('../../Datasets/REDD/redd.h5')
train.set_window(end="30-4-2011")
test = DataSet('../../Datasets/REDD/redd.h5')
test.set_window(start="30-4-2011")

train_building = 1
test_building = 1
sample_period = 6
meter_key = 'fridge'
train_elec = train.buildings[train_building].elec
test_elec = test.buildings[test_building].elec

train_meter = train_elec.submeters()[meter_key]
train_mains = train_elec.mains().all_meters()[0]
test_mains = test_elec.mains().all_meters()[0]
rnn = RNNDisaggregator()

start = time.time()
print("========== TRAIN ============")
from __future__ import print_function, division
from nilmtk import DataSet, TimeFrame, MeterGroup
import plot_config
import seaborn as sns
from matplotlib.dates import DateFormatter, HourLocator
from matplotlib.ticker import MaxNLocator
import matplotlib.pyplot as plt
import pytz
from os.path import join

from pylab import rcParams
rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 100)})

print("plotting appliance power histograms...")

dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2013-04-26", None) # ignore tungsten kitchen lamps
elec = dataset.buildings[1].elec

fig, axes = plt.subplots(3, 3)
meter_keys = ['fridge freezer', 'kettle', 'toaster', 
              'vacuum cleaner', 'television', 'oven',
              'laptop computer', 'computer monitor', ('light', 1)]
kwargs_per_meter = {'range': [(  2,  275), (2200, 2460), (1480, 1650), 
                              ( 400, 2200), (  80,  140), (None,   60),
                              (   2,   65), (  30,   85), (35, 290)]}

axes = elec.plot_multiple(axes, meter_keys, 'plot_power_histogram', 
                          kwargs_per_meter, 
                          plot_kwargs={'color': plot_config.BLUE})
Ejemplo n.º 37
0
from __future__ import print_function, division
from nilmtk import DataSet, TimeFrame, MeterGroup
import plot_config
import seaborn as sns
from matplotlib.ticker import MultipleLocator
import matplotlib.pyplot as plt
import pytz
from os.path import join

from pylab import rcParams
rcParams.update({'figure.figsize': plot_config._mm_to_inches(88, 150)})

print("plotting activity histograms...")

dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2013-03-01", None)#"2013-08-01")
elec = dataset.buildings[1].elec

N = 9
fig, axes = plt.subplots(N, 1)
meter_keys = ['boiler', 'kettle', 'toaster', 'oven',
              'vacuum cleaner', 'television', 
              'laptop computer', 'computer monitor', ('light', 1)]

axes = elec.plot_multiple(axes, meter_keys, 'plot_activity_histogram')

# Formatting
for i, ax in enumerate(axes):
    ax.grid(False)
    ax.set_yticks([])
    ax.set_ylabel('')
Ejemplo n.º 38
0
for b_id, building in building_chunk_items[home_group]:

    try:
        if b_id in existing_files_names:
            print("Skipping", b_id)
            continue
        print b_id

        out[b_id] = {}
        start = time.time()
        #cls_dict = {"Hart":Hart85()}
        cls_dict = {"CO": CombinatorialOptimisation(), "FHMM": FHMM(), "Hart": Hart85()}
        elec = building.elec
        mains = elec.mains()

        train = DataSet(ds_path)
        test = DataSet(ds_path)
        split_point = datetime.date(2013, 7, 16)
        train.set_window(end=split_point)
        #test.set_window(start=split_point)
        train_elec = train.buildings[b_id].elec
        test_elec = test.buildings[b_id].elec
        test_mains = test_elec.mains()

        # AC elec
        ac_elec_train = train_elec[('air conditioner', 1)]
        ac_elec_test = test_elec[('air conditioner', 1)]

        num_states_dict = {ac_elec_train: num_states}

Ejemplo n.º 39
0
def random_forest(dataset_path, train_building, train_start, train_end,
                  val_building, val_start, val_end, test_building, test_start,
                  test_end, meter_key, sample_period, n_estimators, criterion,
                  min_sample_split):

    # Start tracking time
    start = time.time()

    # Prepare dataset and options
    dataset_path = dataset_path
    train = DataSet(dataset_path)
    train.set_window(start=train_start, end=train_end)
    val = DataSet(dataset_path)
    val.set_window(start=val_start, end=val_end)
    test = DataSet(dataset_path)
    test.set_window(start=test_start, end=test_end)
    train_building = train_building
    val_building = val_building
    test_building = test_building
    meter_key = meter_key

    sample_period = sample_period

    train_elec = train.buildings[train_building].elec
    val_elec = val.buildings[val_building].elec
    test_elec = test.buildings[test_building].elec

    try:  # REDD
        X_train = next(train_elec.mains().all_meters()[0].load(
            sample_period=sample_period)).fillna(0)
        y_train = next(
            train_elec[meter_key].load(sample_period=sample_period)).fillna(0)
        X_test = next(test_elec.mains().all_meters()[0].load(
            sample_period=sample_period)).fillna(0)
        y_test = next(
            test_elec[meter_key].load(sample_period=sample_period)).fillna(0)
        X_val = next(val_elec.mains().all_meters()[0].load(
            sample_period=sample_period)).fillna(0)
        y_val = next(
            val_elec[meter_key].load(sample_period=sample_period)).fillna(0)

        # Intersect between two dataframe - to make sure same trining instances in X and y
        # Train set
        intersect_index = pd.Index(
            np.sort(list(set(X_train.index).intersection(set(y_train.index)))))
        X_train = X_train.ix[intersect_index]
        y_train = y_train.ix[intersect_index]
        # Test set
        intersect_index = pd.Index(
            np.sort(list(set(X_test.index).intersection(set(y_test.index)))))
        X_test = X_test.ix[intersect_index]
        y_test = y_test.ix[intersect_index]
        # Val set
        intersect_index = pd.Index(
            np.sort(list(set(X_val.index).intersection(set(y_val.index)))))
        X_val = X_val.ix[intersect_index]
        y_val = y_val.ix[intersect_index]

        # Get values from numpy array
        X_train = X_train.values
        y_train = y_train.values
        X_test = X_test.values
        y_test = y_test.values
        X_val = X_val.values
        y_val = y_val.values
    except AttributeError:  # UKDALE
        X_train = train_elec.mains().power_series_all_data(
            sample_period=sample_period).fillna(0)
        y_train = next(train_elec[meter_key].power_series(
            sample_period=sample_period)).fillna(0)
        X_test = test_elec.mains().power_series_all_data(
            sample_period=sample_period).fillna(0)
        y_test = next(test_elec[meter_key].power_series(
            sample_period=sample_period)).fillna(0)

        # Intersect between two dataframe - to make sure same trining instances in X and y
        # Train set
        intersect_index = pd.Index(
            np.sort(list(set(X_train.index).intersection(set(y_train.index)))))
        X_train = X_train.ix[intersect_index]
        y_train = y_train.ix[intersect_index]
        # Test set
        intersect_index = pd.Index(
            np.sort(list(set(X_test.index).intersection(set(y_test.index)))))
        X_test = X_test.ix[intersect_index]
        y_test = y_test.ix[intersect_index]

        # X_train = X_train.reshape(-1, 1)
        # y_train = y_train.reshape(-1, 1)
        # X_test = X_test.reshape(-1, 1)
        # y_test = y_test.reshape(-1, 1)

        # Get values from numpy array - Avoid server error
        X_train = X_train.values.reshape(-1, 1)
        y_train = y_train.values.reshape(-1, 1)
        X_test = X_test.values.reshape(-1, 1)
        y_test = y_test.values.reshape(-1, 1)

    # Model settings and hyperparameters
    min_samples_split = min_sample_split
    rf_regr = RandomForestRegressor(n_estimators=n_estimators,
                                    criterion=criterion,
                                    min_samples_split=min_samples_split,
                                    random_state=0)

    # print("========== TRAIN ============")
    rf_regr.fit(X_train, y_train)

    # print("========== DISAGGREGATE ============")
    y_val_predict = rf_regr.predict(X_val)
    y_test_predict = rf_regr.predict(X_test)

    # print("========== RESULTS ============")
    # me = Metrics(state_boundaries=[10])
    on_power_threshold = train_elec[meter_key].on_power_threshold()
    me = Metrics(state_boundaries=[on_power_threshold])
    val_metrics_results_dict = Metrics.compute_metrics(me, y_val_predict,
                                                       y_val.flatten())
    test_metrics_results_dict = Metrics.compute_metrics(
        me, y_test_predict, y_test.flatten())

    # end tracking time
    end = time.time()

    time_taken = end - start  # in seconds

    # model_result_data = {
    #     'algorithm_name': 'Random Forest Regressor',
    #     'datapath': dataset_path,
    #     'train_building': train_building,
    #     'train_start': str(train_start.date()) if train_start != None else None ,
    #     'train_end': str(train_end.date()) if train_end != None else None ,
    #     'test_building': test_building,
    #     'test_start': str(test_start.date()) if test_start != None else None ,
    #     'test_end': str(test_end.date()) if test_end != None else None ,
    #     'appliance': meter_key,
    #     'sampling_rate': sample_period,
    #
    #     'algorithm_info': {
    #         'options': {
    #             'epochs': None
    #         },
    #         'hyperparameters': {
    #             'sequence_length': None,
    #             'min_sample_split': min_sample_split,
    #             'num_layers': None
    #         },
    #         'profile': {
    #             'parameters': None
    #         }
    #     },
    #
    #     'metrics':  metrics_results_dict,
    #
    #     'time_taken': format(time_taken, '.2f'),
    # }

    model_result_data = {
        'val_metrics': val_metrics_results_dict,
        'test_metrics': test_metrics_results_dict,
        'time_taken': format(time_taken, '.2f'),
        'epochs': None,
    }

    # Close Dataset files
    train.store.close()
    val.store.close()
    test.store.close()

    return model_result_data
Ejemplo n.º 40
0
from nilmtk import DataSet, TimeFrame, MeterGroup
import plot_config
import seaborn as sns
from matplotlib.dates import DateFormatter, HourLocator
from datetime import timedelta
import matplotlib.pyplot as plt
import pytz
import pandas as pd
from os.path import join

from pylab import rcParams
rcParams.update({'figure.figsize': plot_config._mm_to_inches(180, 100)})

UNIT = 'kW'

dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
TZ_STRING = dataset.metadata['timezone']
TZ = pytz.timezone(TZ_STRING)
elec = dataset.buildings[1].elec
submeters = elec.meters_directly_downstream_of_mains()

# Select appliances used in top K plot
APPLIANCES = ['fridge freezer', 'HTPC', 'dish washer', 'washer dryer', 'kettle']
selected_meters = [submeters[appliance] for appliance in APPLIANCES]
remainder = []
for meter in submeters.meters:
    for appliance in APPLIANCES:
        if meter.matches_appliances({'type': appliance}):
            break
    else:
        remainder.append(meter)
Ejemplo n.º 41
0
def fcnn(dataset_path, train_building, train_start, train_end, val_building,
         val_start, val_end, test_building, test_start, test_end, meter_key,
         sample_period, num_epochs, patience, num_layers, optimizer,
         learning_rate, dropout_prob, loss):

    # Start tracking time
    start = time.time()

    # Prepare dataset and options
    dataset_path = dataset_path
    train = DataSet(dataset_path)
    train.set_window(start=train_start, end=train_end)
    val = DataSet(dataset_path)
    val.set_window(start=val_start, end=val_end)
    test = DataSet(dataset_path)
    test.set_window(start=test_start, end=test_end)
    train_building = train_building
    val_building = val_building
    test_building = test_building
    meter_key = meter_key

    sample_period = sample_period

    train_elec = train.buildings[train_building].elec
    val_elec = val.buildings[val_building].elec
    test_elec = test.buildings[test_building].elec

    try:  # REDD
        X_train = next(train_elec.mains().all_meters()[0].load(
            sample_period=sample_period)).fillna(0)
        y_train = next(
            train_elec[meter_key].load(sample_period=sample_period)).fillna(0)
        X_test = next(test_elec.mains().all_meters()[0].load(
            sample_period=sample_period)).fillna(0)
        y_test = next(
            test_elec[meter_key].load(sample_period=sample_period)).fillna(0)
        X_val = next(val_elec.mains().all_meters()[0].load(
            sample_period=sample_period)).fillna(0)
        y_val = next(
            val_elec[meter_key].load(sample_period=sample_period)).fillna(0)

        # Intersect between two dataframe - to make sure same trining instances in X and y
        # Train set
        intersect_index = pd.Index(
            np.sort(list(set(X_train.index).intersection(set(y_train.index)))))
        X_train = X_train.ix[intersect_index]
        y_train = y_train.ix[intersect_index]
        # Test set
        intersect_index = pd.Index(
            np.sort(list(set(X_test.index).intersection(set(y_test.index)))))
        X_test = X_test.ix[intersect_index]
        y_test = y_test.ix[intersect_index]
        # Val set
        intersect_index = pd.Index(
            np.sort(list(set(X_val.index).intersection(set(y_val.index)))))
        X_val = X_val.ix[intersect_index]
        y_val = y_val.ix[intersect_index]

        # Get values from numpy array
        X_train = X_train.values
        y_train = y_train.values
        X_test = X_test.values
        y_test = y_test.values
        X_val = X_val.values
        y_val = y_val.values
    except AttributeError:  # UKDALE
        X_train = train_elec.mains().power_series_all_data(
            sample_period=sample_period).fillna(0)
        y_train = next(train_elec[meter_key].power_series(
            sample_period=sample_period)).fillna(0)
        X_test = test_elec.mains().power_series_all_data(
            sample_period=sample_period).fillna(0)
        y_test = next(test_elec[meter_key].power_series(
            sample_period=sample_period)).fillna(0)

        # Intersect between two dataframe - to make sure same trining instances in X and y
        # Train set
        intersect_index = pd.Index(
            np.sort(list(set(X_train.index).intersection(set(y_train.index)))))
        X_train = X_train.ix[intersect_index]
        y_train = y_train.ix[intersect_index]
        # Test set
        intersect_index = pd.Index(
            np.sort(list(set(X_test.index).intersection(set(y_test.index)))))
        X_test = X_test.ix[intersect_index]
        y_test = y_test.ix[intersect_index]

        # X_train = X_train.reshape(-1, 1)
        # y_train = y_train.reshape(-1, 1)
        # X_test = X_test.reshape(-1, 1)
        # y_test = y_test.reshape(-1, 1)

        # Get values from numpy array - Avoid server error
        X_train = X_train.values.reshape(-1, 1)
        y_train = y_train.values.reshape(-1, 1)
        X_test = X_test.values.reshape(-1, 1)
        y_test = y_test.values.reshape(-1, 1)

    # Model settings and hyperparameters
    layers_array = array_layers(num_layers)
    fc_model = build_fc_model(layers_array, dropout_prob)
    # adam = Adam(lr = 1e-5)
    optimizer = optimizer(lr=learning_rate)
    fc_model.compile(loss=loss, optimizer=optimizer)

    # print("========== TRAIN ============")
    #checkpointer = ModelCheckpoint(filepath="results/fcnn-model-{}-{}epochs.h5".format(meter_key, num_epochs), verbose=0, save_best_only=True)

    # Early stopping when validation loss increases
    earlystop = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=patience,
                              verbose=0,
                              mode='auto')

    hist_fc_ = fc_model.fit(X_train,
                            y_train,
                            batch_size=512,
                            verbose=1,
                            nb_epoch=num_epochs,
                            validation_split=0.2,
                            shuffle=True,
                            callbacks=[earlystop])  #  , checkpointer])

    # Get number of earlystop epochs
    num_epochs = earlystop.stopped_epoch if earlystop.stopped_epoch != 0 else num_epochs

    # print("========== DISAGGREGATE ============")
    val_pred_fc = fc_model.predict(X_val).reshape(-1)
    test_pred_fc = fc_model.predict(X_test).reshape(-1)

    # print("========== RESULTS ============")
    # me = Metrics(state_boundaries=[10])
    on_power_threshold = train_elec[meter_key].on_power_threshold()
    me = Metrics(state_boundaries=[on_power_threshold])
    val_metrics_results_dict = Metrics.compute_metrics(me, val_pred_fc,
                                                       y_val.flatten())
    test_metrics_results_dict = Metrics.compute_metrics(
        me, test_pred_fc, y_test.flatten())

    # end tracking time
    end = time.time()

    time_taken = end - start  # in seconds

    # model_result_data = {
    #     'algorithm_name': 'FCNN',
    #     'datapath': dataset_path,
    #     'train_building': train_building,
    #     'train_start': str(train_start.date()) if train_start != None else None ,
    #     'train_end': str(train_end.date()) if train_end != None else None ,
    #     'test_building': test_building,
    #     'test_start': str(test_start.date()) if test_start != None else None ,
    #     'test_end': str(test_end.date()) if test_end != None else None ,
    #     'appliance': meter_key,
    #     'sampling_rate': sample_period,
    #
    #     'algorithm_info': {
    #         'options': {
    #             'epochs': num_epochs
    #         },
    #         'hyperparameters': {
    #             'sequence_length': None,
    #             'min_sample_split': None,
    #             'num_layers': num_layers
    #         },
    #         'profile': {
    #             'parameters': None
    #         }
    #     },
    #
    #     'metrics':  metrics_results_dict,
    #
    #     'time_taken': format(time_taken, '.2f'),
    # }

    model_result_data = {
        'val_metrics': val_metrics_results_dict,
        'test_metrics': test_metrics_results_dict,
        'time_taken': format(time_taken, '.2f'),
        'epochs': num_epochs,
    }

    # Close Dataset files
    train.store.close()
    val.store.close()
    test.store.close()

    return model_result_data
Ejemplo n.º 42
0
def dae(dataset_path, train_building, train_start, train_end, test_building,
        test_start, test_end, val_building, val_start, val_end, meter_key,
        sample_period, num_epochs, patience, sequence_length, optimizer,
        learning_rate, loss):

    # Start tracking time
    start = time.time()

    # Prepare dataset and options
    # print("========== OPEN DATASETS ============")
    dataset_path = dataset_path
    train = DataSet(dataset_path)
    train.set_window(start=train_start, end=train_end)
    val = DataSet(dataset_path)
    val.set_window(start=val_start, end=val_end)
    test = DataSet(dataset_path)
    test.set_window(start=test_start, end=test_end)
    train_building = train_building
    test_building = test_building
    meter_key = meter_key

    sample_period = sample_period

    train_elec = train.buildings[train_building].elec
    val_elec = val.buildings[val_building].elec
    test_elec = test.buildings[test_building].elec

    train_meter = train_elec.submeters()[meter_key]
    try:
        train_mains = train_elec.mains().all_meters()[0]
        val_mains = val_elec.mains().all_meters()[0]
        test_mains = test_elec.mains().all_meters()[0]
    except AttributeError:
        train_mains = train_elec.mains()
        test_mains = test_elec.mains()

    dae = DAEDisaggregator(sequence_length, patience, optimizer, learning_rate,
                           loss)

    # print("========== TRAIN ============")
    dae.train(train_mains,
              train_meter,
              epochs=num_epochs,
              sample_period=sample_period)

    # Get number of earlystop epochs
    num_epochs = dae.stopped_epoch if dae.stopped_epoch != 0 else num_epochs

    #dae.export_model("results/dae-model-{}-{}epochs.h5".format(meter_key, num_epochs))

    # print("========== DISAGGREGATE ============")
    # Validation
    val_disag_filename = 'disag-out-val.h5'
    output = HDFDataStore(val_disag_filename, 'w')
    dae.disaggregate(val_mains,
                     output,
                     train_meter,
                     sample_period=sample_period)
    output.close()
    # Test
    test_disag_filename = 'disag-out-test.h5'
    output = HDFDataStore(test_disag_filename, 'w')
    dae.disaggregate(test_mains,
                     output,
                     train_meter,
                     sample_period=sample_period)
    output.close()

    # print("========== RESULTS ============")
    # Validation
    result_val = DataSet(val_disag_filename)
    res_elec_val = result_val.buildings[val_building].elec
    rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key],
                                                    val_elec[meter_key])

    val_metrics_results_dict = {
        'recall_score':
        rpaf_val[0],
        'precision_score':
        rpaf_val[1],
        'accuracy_score':
        rpaf_val[2],
        'f1_score':
        rpaf_val[3],
        'mean_absolute_error':
        metrics.mean_absolute_error(res_elec_val[meter_key],
                                    val_elec[meter_key]),
        'mean_squared_error':
        metrics.mean_square_error(res_elec_val[meter_key],
                                  val_elec[meter_key]),
        'relative_error_in_total_energy':
        metrics.relative_error_total_energy(res_elec_val[meter_key],
                                            val_elec[meter_key]),
        'nad':
        metrics.nad(res_elec_val[meter_key], val_elec[meter_key]),
        'disaggregation_accuracy':
        metrics.disaggregation_accuracy(res_elec_val[meter_key],
                                        val_elec[meter_key])
    }
    # Test
    result = DataSet(test_disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                                test_elec[meter_key])

    test_metrics_results_dict = {
        'recall_score':
        rpaf[0],
        'precision_score':
        rpaf[1],
        'accuracy_score':
        rpaf[2],
        'f1_score':
        rpaf[3],
        'mean_absolute_error':
        metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]),
        'mean_squared_error':
        metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]),
        'relative_error_in_total_energy':
        metrics.relative_error_total_energy(res_elec[meter_key],
                                            test_elec[meter_key]),
        'nad':
        metrics.nad(res_elec[meter_key], test_elec[meter_key]),
        'disaggregation_accuracy':
        metrics.disaggregation_accuracy(res_elec[meter_key],
                                        test_elec[meter_key])
    }

    # end tracking time
    end = time.time()

    time_taken = end - start  # in seconds

    # model_result_data = {
    #     'algorithm_name': 'DAE',
    #     'datapath': dataset_path,
    #     'train_building': train_building,
    #     'train_start': str(train_start.date()) if train_start != None else None ,
    #     'train_end': str(train_end.date()) if train_end != None else None ,
    #     'test_building': test_building,
    #     'test_start': str(test_start.date()) if test_start != None else None ,
    #     'test_end': str(test_end.date()) if test_end != None else None ,
    #     'appliance': meter_key,
    #     'sampling_rate': sample_period,
    #
    #     'algorithm_info': {
    #         'options': {
    #             'epochs': num_epochs
    #         },
    #         'hyperparameters': {
    #             'sequence_length': sequence_length,
    #             'min_sample_split': None,
    #             'num_layers': None
    #         },
    #         'profile': {
    #             'parameters': None
    #         }
    #     },
    #
    #     'metrics':  metrics_results_dict,
    #
    #     'time_taken': format(time_taken, '.2f'),
    # }

    model_result_data = {
        'val_metrics': val_metrics_results_dict,
        'test_metrics': test_metrics_results_dict,
        'time_taken': format(time_taken, '.2f'),
        'epochs': num_epochs,
    }

    # Close digag_filename
    result.store.close()
    result_val.store.close()

    # Close Dataset files
    train.store.close()
    val.store.close()
    test.store.close()

    return model_result_data
Ejemplo n.º 43
0
class NILM:
	def __init__(self):
		pass
	
	def convert_dataset(self, folder, destination_file):
		#convert_greend(folder, destination_file)
		convert_redd(folder, destination_file)

	def import_dataset(self, source_file, start_end):
		self.ds = DataSet(source_file)
		
		self.ds_train = DataSet(source_file)
		self.ds_train.set_window(end=start_end)
		
		self.ds_test = DataSet(source_file)
		self.ds_test.set_window(start=start_end)
		
	def show_wiring(self, building_no):
		self.ds.buildings[building_no].elec.draw_wiring_graph()

	def show_available_devices(self, building_no):
		return self.ds.buildings[building_no].elec

	def show_available_data(self, building_no, device_id):
		return self.ds.buildings[building_no].elec[device_id].available_columns() #.device["measurements"]
		
	def get_aggregated_power(self, building_no):
		return self.ds.buildings[building_no].elec.mains().power_series_all_data() #.head()

	def get_device_power(self, building_no, device_id):
		""" 
		Returns a generator over the power timeserie
		"""
		return self.ds.buildings[building_no].elec[device_id].power_series()
		
	def get_energy_per_meter(self, building_no):
		return self.ds_train.buildings[building_no].elec.submeters().energy_per_meter().loc['active']

	def get_total_energy_per_device(self, building_no, device_id):
		return self.ds.buildings[building_no].elec[device_id].total_energy()
		
	def plot_aggregated_power(self, building_no):
		self.ds.buildings[building_no].elec.mains().plot()
		
	def plot_meter_power(self, building_no, device_id):
		self.ds.buildings[building_no].elec[device_id].plot()
		
	def plot_all_meters(self, building_no):
		self.ds.buildings[building_no].elec.plot()
	
	def plot_appliance_states(self, building_no, device_id):
		self.ds.buildings[building_no].elec[device_id].plot_power_histogram()
		
	def plot_spectrum(self, building_no, device_id):
		self.ds.buildings[building_no].elec[device_id].plot_spectrum()
		
	def plot_appliance_usage(self, building_no, device_id):
		self.ds.buildings[building_no].elec[device_id].plot_activity_histogram()
		
	def select_appliances_by_id(self, building_no, names):
		pass
		
	def select_top_consuming_appliances_for_training(self, building_no, k=5):
		return self.ds.buildings[building_no].elec.submeters().select_top_k(k)

	def select_appliances_by_type(self, t):
		import nilmtk
		meters = nilmtk.global_meter_group.select_using_appliances(type=t).all_meters()
		#print([m.total_energy() for m in meters])
		meters = sorted(meters, key=(lambda m: m.total_energy()[0]), reverse=True)   # sort by energy consumption
		#print([m.total_energy() for m in meters])
		return meters

	def create_nilm_model(self, m_type):
		if m_type is "FHMM":
			self.model = fhmm_exact.FHMM()
		elif m_type is "CombOpt":
			self.model = combinatorial_optimisation.CombinatorialOptimisation()
	
	def import_nilm_model(self, filepath, m_type):
		if m_type is "FHMM":
			self.model = fhmm_exact.FHMM()
			self.model.import_model(filepath)
		elif m_type is "CombOpt":
			self.model = combinatorial_optimisation.CombinatorialOptimisation()
			self.model.import_model(filepath)

	def train_nilm_model(self, top_devices, sample_period=None):
		if sample_period is None:
			self.model.train(top_devices)
		else:
			self.model.train(top_devices, sample_period)
	
	def save_disaggregator(self, filepath):
		self.model.export_model(filepath)
	
	def disaggregate(self, aggregate_timeserie, output_file, sample_period):
		self.model.disaggregate(aggregate_timeserie, output_file, sample_period)
		
	def plot_f_score(self, disag_filename):
		plt.figure()
		from nilmtk.metrics import f1_score
		disag = DataSet(disag_filename)
		disag_elec = disag.buildings[building].elec
		f1 = f1_score(disag_elec, test_elec)
		f1.index = disag_elec.get_labels(f1.index)
		f1.plot(kind='barh')
		plt.ylabel('appliance');
		plt.xlabel('f-score');
		plt.title(type(self.model).__name__);
Ejemplo n.º 44
0
from __future__ import print_function, division
from nilmtk import DataSet, HDFDataStore, TimeFrame
from os.path import join
import matplotlib.pyplot as plt
from matplotlib import rcParams
import math
from nilmtk.metrics import f1_score # metrics is actually different; need to look at this
from nilmtk.metrics import rms_error_power
from nilmtk.metrics import mean_normalized_error_power
from nilmtk.disaggregate import fhmm_exact # OK, only different in what is printed to screen (and this is diagonal covariance matrix)

building_number = 1
ds = DataSet('/nilmtk/data/iawe.h5') #('/nilmtk/data/ukdale.h5') #("/data/REDD/redd.h5")
print(ds.buildings)

train = DataSet('/nilmtk/data/iawe.h5') #('/nilmtk/data/ukdale.h5') #("/data/REDD/redd.h5")
test = DataSet('/nilmtk/data/iawe.h5') #('/nilmtk/data/ukdale.h5') #("/data/REDD/redd.h5")

elec = train.buildings[building_number].elec

mains = elec.mains()
df_all = mains.power_series_all_data() #df_all has a bunch of NaNs
df_all_noNan = df_all.dropna()
a = df_all_noNan.keys()
middleTime = a[int(math.floor(a.size/2))]
middleTimeStr = "%d-%02d-%02d %02d:%02d:%02d" % (middleTime.year, middleTime.month, middleTime.day, middleTime.hour, middleTime.minute, middleTime.second)

print(middleTimeStr)

train.set_window(end=middleTimeStr)
test.set_window(start=middleTimeStr)
Ejemplo n.º 45
0
K = int(sys.argv[3])
train_fraction = int(sys.argv[4]) / 100.0

print("*"*80)
print("Arguments")

print("Number states", num_states)
print("Train fraction is ", train_fraction)
print("Top k", K)

out_file_name = "N%d_K%d_T%s" % (num_states, K, sys.argv[4])
OUTPUT_PATH = os.path.join(BASH_RUN, out_file_name)
existing_files = glob.glob(OUTPUT_PATH+str("/*.h5"))
existing_files_names = [int(x.split("/")[-1].split(".")[0]) for x in existing_files]

ds = DataSet(ds_path)
fridges = nilmtk.global_meter_group.select_using_appliances(type='fridge')

fridges_id_building_id = {i: fridges.meters[i].building() for i in range(len(fridges.meters))}

fridge_id_building_id_ser = pd.Series(fridges_id_building_id)

from fridge_compressor_durations_optimised_jul_7 import compressor_powers

fridge_ids_to_consider = compressor_powers.keys()

building_ids_to_consider = fridge_id_building_id_ser[fridge_ids_to_consider]

#sys.exit(0)

def find_specific_appliance(appliance_name, appliance_instance, list_of_elecs):
Ejemplo n.º 46
0
def nilmtkECOfunc(dataset_loc, train_start, train_end, test_start, test_end,
                  output_period):
    #### configuration ####
    period_s = output_period
    building = 2
    #### load ####
    total = DataSet(dataset_loc)
    train = DataSet(dataset_loc)
    test = DataSet(dataset_loc)
    train.set_window(start=train_start, end=train_end)
    test.set_window(start=test_start, end=test_end)
    print(train_start)
    print(train_end)
    print(test_start)
    print(test_end)
    #### get timeframe ####
    tf_total = total.buildings[building].elec.mains().get_timeframe()
    tf_train = train.buildings[building].elec.mains().get_timeframe()
    tf_test = test.buildings[building].elec.mains().get_timeframe()
    #### eletrical metergroup ####
    total_elec = total.buildings[building].elec
    train_elec = train.buildings[building].elec
    test_elec = test.buildings[building].elec
    #### training process ####
    start = time.time()
    from nilmtk.disaggregate import CombinatorialOptimisation
    co = CombinatorialOptimisation()
    co.train(train_elec, sample_period=period_s)
    end = time.time()
    print("Runtime =", end - start, "seconds.")
    #### disaggregation process ####
    start = time.time()
    disag_filename = '../dataset/ecob-b2-kall-co-1w:11-1m.h5'
    output = HDFDataStore(disag_filename, 'w')
    co.disaggregate(test_elec.mains(), output, sample_period=period_s)
    end = time.time()
    print("Runtime =", end - start, "seconds.")
    output.close()
    disag_co = DataSet(disag_filename)
    disag_co_elec = disag_co.buildings[building].elec
    #### fraction energy assigned correctly ####
    #FTE_co_all = FTE_func(disag_co_elec, test_elec);
    #### total disaaggregation error ####
    #Te_co_all = total_disag_err(disag_co_elec, test_elec);
    #### creating dataframe from both disaggregated and ground truth metergroups
    disag_co_elec_df = disag_co_elec.dataframe_of_meters()
    disag_co_elec_df_nona = disag_co_elec_df.dropna()
    gt_full_df = test_elec.dataframe_of_meters()
    gt_full_df_nona = gt_full_df.dropna()
    gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index]
    #### jaccard ####
    #Ja_co_all = jaccard_similarity(disag_co_elec_df_nona, gt_df_nona, disag_co_elec.submeters().instance(), test_elec.instance());
    #print("FTE all", FTE_co_all);
    #print("TE  all", Te_co_all);
    #print("Ja  all",  Ja_co_all);
    #### output ####
    # drop aggregated power
    disag_co_elec_submeter_df = disag_co_elec_df.drop(
        disag_co_elec_df.columns[[0]], axis=1)
    # disag_co_elec_submeter_df = disag_co_elec_df
    # drop the unwanted timestamp
    gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index]
    # drop aggregated power
    gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0, 1, 2]], axis=1)
    # train
    train_elec_df = train_elec.dataframe_of_meters()
    train_elec_df_aligned = train_elec_df.resample(str(period_s) +
                                                   'S').asfreq()[0:]
    train_elec_df_aligned_drop = train_elec_df_aligned.drop(
        train_elec_df_aligned.columns[[0, 1, 2]], axis=1)
    return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
Ejemplo n.º 47
0
def fhmm(dataset_path, train_building, train_start, train_end, val_building,
         val_start, val_end, test_building, test_start, test_end, meter_key,
         sample_period):

    # Start tracking time
    start = time.time()

    # Prepare dataset and options
    # print("========== OPEN DATASETS ============")
    dataset_path = dataset_path
    train = DataSet(dataset_path)
    train.set_window(start=train_start, end=train_end)
    val = DataSet(dataset_path)
    val.set_window(start=val_start, end=val_end)
    test = DataSet(dataset_path)
    test.set_window(start=test_start, end=test_end)
    train_building = train_building
    test_building = test_building
    meter_key = meter_key

    sample_period = sample_period

    train_elec = train.buildings[train_building].elec
    val_elec = val.buildings[val_building].elec
    test_elec = test.buildings[test_building].elec

    appliances = [meter_key]
    selected_meters = [train_elec[app] for app in appliances]
    selected_meters.append(train_elec.mains())
    selected = MeterGroup(selected_meters)

    fhmm = FHMM()

    # print("========== TRAIN ============")
    fhmm.train(selected, sample_period=sample_period)

    # print("========== DISAGGREGATE ============")
    # Validation
    val_disag_filename = 'disag-out-val.h5'
    output = HDFDataStore(val_disag_filename, 'w')
    fhmm.disaggregate(val_elec.mains(), output_datastore=output)
    output.close()
    # Test
    test_disag_filename = 'disag-out-test.h5'
    output = HDFDataStore(test_disag_filename, 'w')
    fhmm.disaggregate(test_elec.mains(), output_datastore=output)
    output.close()

    # print("========== RESULTS ============")
    # Validation
    result_val = DataSet(val_disag_filename)
    res_elec_val = result_val.buildings[val_building].elec
    rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key],
                                                    val_elec[meter_key])

    val_metrics_results_dict = {
        'recall_score':
        rpaf_val[0],
        'precision_score':
        rpaf_val[1],
        'accuracy_score':
        rpaf_val[2],
        'f1_score':
        rpaf_val[3],
        'mean_absolute_error':
        metrics.mean_absolute_error(res_elec_val[meter_key],
                                    val_elec[meter_key]),
        'mean_squared_error':
        metrics.mean_square_error(res_elec_val[meter_key],
                                  val_elec[meter_key]),
        'relative_error_in_total_energy':
        metrics.relative_error_total_energy(res_elec_val[meter_key],
                                            val_elec[meter_key]),
        'nad':
        metrics.nad(res_elec_val[meter_key], val_elec[meter_key]),
        'disaggregation_accuracy':
        metrics.disaggregation_accuracy(res_elec_val[meter_key],
                                        val_elec[meter_key])
    }
    # Test
    result = DataSet(test_disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                                test_elec[meter_key])

    test_metrics_results_dict = {
        'recall_score':
        rpaf[0],
        'precision_score':
        rpaf[1],
        'accuracy_score':
        rpaf[2],
        'f1_score':
        rpaf[3],
        'mean_absolute_error':
        metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]),
        'mean_squared_error':
        metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]),
        'relative_error_in_total_energy':
        metrics.relative_error_total_energy(res_elec[meter_key],
                                            test_elec[meter_key]),
        'nad':
        metrics.nad(res_elec[meter_key], test_elec[meter_key]),
        'disaggregation_accuracy':
        metrics.disaggregation_accuracy(res_elec[meter_key],
                                        test_elec[meter_key])
    }

    # end tracking time
    end = time.time()

    time_taken = end - start  # in seconds

    # model_result_data = {
    #     'algorithm_name': 'FHMM',
    #     'datapath': dataset_path,
    #     'train_building': train_building,
    #     'train_start': str(train_start.date()) if train_start != None else None ,
    #     'train_end': str(train_end.date()) if train_end != None else None ,
    #     'test_building': test_building,
    #     'test_start': str(test_start.date()) if test_start != None else None ,
    #     'test_end': str(test_end.date()) if test_end != None else None ,
    #     'appliance': meter_key,
    #     'sampling_rate': sample_period,
    #
    #     'algorithm_info': {
    #         'options': {
    #             'epochs': None
    #         },
    #         'hyperparameters': {
    #             'sequence_length': None,
    #             'min_sample_split': None,
    #             'num_layers': None
    #         },
    #         'profile': {
    #             'parameters': None
    #         }
    #     },
    #
    #     'metrics':  metrics_results_dict,
    #
    #     'time_taken': format(time_taken, '.2f'),
    # }

    model_result_data = {
        'val_metrics': val_metrics_results_dict,
        'test_metrics': test_metrics_results_dict,
        'time_taken': format(time_taken, '.2f'),
        'epochs': None,
    }

    # Close digag_filename
    result.store.close()
    result_val.store.close()

    # Close Dataset files
    train.store.close()
    val.store.close()
    test.store.close()

    return model_result_data
from __future__ import print_function, division
from nilmtk import DataSet, TimeFrame, MeterGroup
import plot_config
import seaborn as sns
from matplotlib.dates import DateFormatter, HourLocator
import matplotlib.pyplot as plt
import pytz
from os.path import join

from pylab import rcParams
rcParams.update({'figure.figsize': plot_config._mm_to_inches(88, 60)})

print("plotting histograms...")

dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
#dataset.set_window("2013-04-01", "2013-05-01")
dataset.set_window(None, None)

axes = dataset.plot_mains_power_histograms(bins=500, range=(5, 500), 
                                           plot_kwargs={'color': plot_config.BLUE})

for i, ax in enumerate(axes):
    ax.grid(False)
    ax.set_yticks([])
    ax.set_ylabel("")
    plot_config.format_axes(ax, tick_size=2)
    sns.despine(ax=ax, left=True)
    ax.spines['bottom'].set_linewidth(0.2)    
    ax.set_title('House {}'.format(i+1), y=.5, va='top', x=0.08)
    if i != 4:
        ax.set_xlabel('')