Example #1
0
def disaggregate_original_co(h5_input, h5_output,dataset_start_date_disag, dataset_end_date_disag, centroids=None ):
    import nilmtk.disaggregate as original_nilmtk
    ds = DataSet(h5_input)
    elec = ds.buildings[1].elec
    
    vampire_power_used_in_original = elec.mains().vampire_power()

    #Train
    plain_co = original_nilmtk.CombinatorialOptimisation()
    plain_co.train(elec)
    
    #Modify centroids manually
    if centroids is not None:            
        for i, model in enumerate(plain_co.model):
            instance = model['training_metadata'].instance()
            model['states'] = centroids[instance]
    
    
    #Disaggregate
    ds.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag)
    elec = ds.buildings[1].elec
    output_plain_co = HDFDataStore(h5_output, 'w')
    plain_co.disaggregate(elec.mains(), output_plain_co)
    output_plain_co.close()
       
    return plain_co, vampire_power_used_in_original
def hart85(start_train, end_train, start_test, end_test, train_elec):

    #Start training
    data.set_window(start_train, end_train)
    elec = data.buildings[1].elec
    hart = hart_85.Hart85()
    hart.train(train_elec, sample_period=1)

    #Start disaggregating
    data.set_window(start_test, end_test)
    disag_filename = './build/disagg_sum_hart85_{}_k.h5'.format(
        len(train_elec.meters))
    output = HDFDataStore(disag_filename, 'w')
    hart.disaggregate(elec.mains(), output)
    output.close()

    disag = DataSet(disag_filename)
    disag_elec = disag.buildings[1].elec
    disag_elec.plot()
    plt.show()
    plt.title("HART85")

    #Calculate F1-Score
    f1 = f1_score(disag_elec, train_elec)
    f1.index = disag_elec.get_labels(f1.index)
    f1.plot(kind='barh')
    plt.ylabel('appliance')
    plt.xlabel('f-score')
    plt.title("Hart85")
    plt.show()
Example #3
0
 def __init__(self, in_filepath, out_filepath):
     print("Loading DataStore and Generating Dataset...")
     self.km = {}
     self.dataStore = HDFDataStore(in_filepath)
     self.dataSet = DataSet()
     self.dataSet.load(self.dataStore)
     self.outDataStore = HDFDataStore(out_filepath, 'w')
     self.co = CombinatorialOptimisation()
     self.train_group = {}
     print("Data Properly Loaded!")
	def __init__ (self,in_filepath,out_filepath):
		print("Loading DataStore and Generating Dataset...")
		self.km = {}
		self.dataStore = HDFDataStore(in_filepath)
		self.dataSet = DataSet()
		self.dataSet.load(self.dataStore)
		self.outDataStore = HDFDataStore(out_filepath,'w')
		self.co = CombinatorialOptimisation()
		self.train_group = {}
		print("Data Properly Loaded!")
Example #5
0
    def disaggregate_building_to_file(self,
                                      building_idx,
                                      filename,
                                      model=None):
        if model == None:
            model = self.fit_a_model(building_idx)
        elec = self.get_elec_meter_data_of_a_building(building_idx)

        output = HDFDataStore(filename, 'w')
        model.disaggregate(elec.mains(), output)
        output.close()
def co(start_train, end_train, start_test, end_test, train_elec):

    #Start training
    data.set_window(start_train, end_train)
    elec = data.buildings[1].elec
    co = CombinatorialOptimisation()
    co.train(train_elec,
             ac_type='active',
             physical_quantity='power',
             sample_period=1)

    #Start disaggregating
    data.set_window(start_test, end_test)
    disag_filename = './build/disagg_sum_co_{}_k.h5'.format(
        len(train_elec.meters))
    output = HDFDataStore(disag_filename, 'w')
    co.disaggregate(elec.mains(),
                    output,
                    ac_type='active',
                    physical_quantity='power',
                    sample_period=1)
    output.close()
    dates_dict = {
        "start_train": start_train,
        "end_train": end_train,
        "start_test": start_test,
        "end_test": end_test
    }
    # write test and train timeframe into json file
    with open(disag_filename + ".json", 'w') as dates_file:
        json.dump(dates_dict, dates_file)

    #Calulate F1-Score
    disag = DataSet(disag_filename)
    disag_elec = disag.buildings[1].elec
    disag_elec.plot()
    plt.title("CO")
    plt.show()

    f1 = f1_score(disag_elec, train_elec)
    f1.index = disag_elec.get_labels(f1.index)
    f1.plot(kind='barh')
    plt.ylabel('appliance')
    plt.xlabel('f-score')
    plt.title("CO")
    plt.show()
def mle(start_train, end_train, start_test, end_test, train_elec):

    # #Start training
    data.set_window(start_train, end_train)
    elec = data.buildings[1].elec
    mle = maximum_likelihood_estimation.MLE()
    mle.sample_period = "1s"
    mle.train(train_elec)

    #Start disaggregating
    data.set_window(start_test, end_test)
    disag_filename = './build/disagg_sum_mle_{}_k.h5'.format(
        len(train_elec.meters))
    output = HDFDataStore(disag_filename, 'w')
    mle.disaggregate(elec.mains(), output)
    output.close()
    dates_dict = {
        "start_train": start_train,
        "end_train": end_train,
        "start_test": start_test,
        "end_test": end_test
    }
    # write test and train timeframe into json file
    with open(disag_filename + ".json", 'w') as dates_file:
        json.dump(dates_dict, dates_file)

    disag = DataSet(disag_filename)
    disag_elec = disag.buildings[1].elec
    disag_elec.plot()
    plt.show()
    plt.title("FHMM")

    #Calculate F1-Score
    f1 = f1_score(disag_elec, train_elec)
    f1.index = disag_elec.get_labels(f1.index)
    f1.plot(kind='barh')
    plt.ylabel('appliance')
    plt.xlabel('f-score')
    plt.title("FHMM")
    plt.show()
class REDD_Data(object):

	'''
	REDD_Data Class is an object designed to abstract the lower level commands of
	the NILMTK software package, with focus on the use of REDD DataSet. Function is 
	designed to allow rapid experimentation and disaggregation compared to attempting 
	to set package up from scratch.

	This class requires the following for proper usage:
	- NILMTK package: https://github.com/nilmtk
	- REDD Dataset (converted to .h5): redd.csail.mit.edu
	- Various dependancies (that NILMTK also requires), most can be downloaded through
	  Anaconda: continuum.io/downloads


	Parameters
	-----------
	in_filepath:		Filepath of converted REDD dataset (in .h5 format)
	out_filepath:		filepath to place output disaggregation dataset (in .h5 format)

	Attributes
	-----------
	km: Key_Map Object
		initializes the key_map object which will allow for the mapping of a meters
		appliance name to its specific .H5 key.

	dataStore: NILMTK HDFDataStore Object
		the HDFDataStore that will contain the converted REDD DataSet.

	dataSet: NILMTK DataSet Object
		the DataSet object that is generated from the REDD DataStore (self.dataStore)		

	outDataStore: NILMTK HDFDataStore Object
		the HDFDataStore that will contain the disaggregated dataset.

	co: NILMTK CombinatorialOptimisation object
		the disaggregation model object that will be trained and will disaggregate the 
		working dataset

	train_group: NILMTK MeterGroup object
		the MeterGroup object that is used to train the disaggregation model (self.co)

	'''
	def __init__ (self,in_filepath,out_filepath):
		print("Loading DataStore and Generating Dataset...")
		self.km = {}
		self.dataStore = HDFDataStore(in_filepath)
		self.dataSet = DataSet()
		self.dataSet.load(self.dataStore)
		self.outDataStore = HDFDataStore(out_filepath,'w')
		self.co = CombinatorialOptimisation()
		self.train_group = {}
		print("Data Properly Loaded!")


	def train_disag_model(self,building_inst, use_topk = False, k = 5):
		'''
		Function trains the disaggregation model using a selected MeterGroup.

		Parameters
		-----------

		building_inst: 	the instance # of the building that you wish to grab the 
					   	training group from.

		use_topk:		true if you wish to only grab the top k most energy intensive
						appliance to train the model, false if you wish to use all
						appliances.

		k:				the # of appliances you wish to use (if use_topk = True)

		'''

		print("Training CO Disaggregation Model using given metergroup...")

		if (building_inst <= 6) & (building_inst > 0): 
			#Select appropiate meter group to train with
			if use_topk == True:
				self.train_group = self.dataSet.buildings[building_inst].elec.select_top_k(k)
			else:
				self.train_group = self.dataSet.buildings[building_inst].elec

			self.co.train(self.train_group)
			print("CO Disaggreation Model Sucessfully Trained!")

		else:
			print("Error: Please select a building_inst of 1-6.")
			print("Model unsucessfully trained.")


	def load_disag_model(self, filepath):
		'''
		Function loads the disaggregation model to a file.

		Parameters
		-----------

		filepath:	exact filepath of the model file.

		'''
		print("Loading CO Disaggreation Model...")
		self.co.import_model(filepath)
		print("Model Sucessfully Loaded!")
		

	def save_disag_model(self,filepath):
		'''
		Function saves the disaggregation model to a file.

		Parameters
		-----------

		filepath:	exact filepath of the model file.

		'''
		print("Saving CO Disaggregation Model...")
		self.co.export_model(filepath)
		print("Model Sucessfully Saved!")


	def disaggregate(self,building_inst):
		'''
		Function will disaggregate the mains MeterGroup of the passed building 
		instance, and save this to the self.outDataStore object.

		Parameters
		-----------

		building_inst:	instance # of the building mains you wish to disaggregate.

		'''
		print("Disaggregating Building Mains...")		
		self.co.disaggregate(self.dataSet.buildings[building_inst].elec.mains(),self.outDataStore)
		print("Mains sucessfully disaggregated!")


	def close(self):
		'''
		Function closes all open DataStore's being used by the program.

		'''
		print("Closing DataStores...")
		self.dataStore.close()
		self.outDataStore.close()
		print("Output DataStores Sucessfully Closed")
		

	'''
	All Plot Functions below are a WORK IN PROGRESS!-----------------------------------
	Documentation will be provided upon completion.------------------------------------

	'''
		

	def plot_disag_apl(self,inst,appliance,t1="",t2=""):
		self.km = Key_Map(inst)
		plot_series(self.outDataStore.store.get(self.km.get_key(appliance))[t1: t2])
		plt.title("Disaggregated " + appliance.capitalize()+" Energy") 
		plt.show()

	
	def show_plots(self):
		plt.show()


	def building_plot_all(self,building_inst,t1,t2):
		self.dataSet.buildings[building_inst].elec.plot(t1,t2)
		plt.title("Building "+str(building_inst)+" Energy per Appliance")
		plt.ylabel('Power [W]')
		plt.xlabel('Hour')


	def plot_redd_mains_data(self, inst=1, t1 = "", t2 = ""):
		self.km = Key_Map(inst)
		series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2]
		series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2]
		plot_series(series1 + series2)
		plt.title("Building "+str(inst)+" Mains Energy")
		plt.show()
 def setUpClass(cls):
     filename = join(data_dir(), 'energy.h5')
     cls.datastore = HDFDataStore(filename)
     ElecMeter.load_meter_devices(cls.datastore)
Example #10
0
    plt.clf()

# Define the buildings to be used for training and disaggregation
train_building = 1
disag_building = 1

#~ ## Dummy training and disaggregation

### Training
dum = DummyDisaggregator()
print('\n== dum.train(dataset.buildings[%d].elec)' % (train_building))
dum.train(dataset.buildings[train_building].elec)

### Disaggregation
dum_outfile = dataset_directory / ('%s-da-co.h5' % (dataset_name.lower()))
output = HDFDataStore(str(dum_outfile), 'w')
print('\n== dum.disaggregate(dataset.buildings[%d].mains(), output)' %
      (disag_building))
dum.disaggregate(dataset.buildings[disag_building].elec.mains(), output)
output.close()

### Results
print('\n== Plotting Dummy disaggregation results...')
da_data = DataSet(str(dum_outfile))
da_elec = da_data.buildings[disag_building].elec
ax = da_elec.plot()
ax.set_title("B%d Dummy disaggregation results" % (disag_building))
plt.savefig('results/%s__b%d__elec__dummy.png' %
            (dataset_name, disag_building))
plt.clf()
f1 = f1_score(da_elec, dataset.buildings[disag_building].elec)
Example #11
0
def generate_vertices():
    """
    Predicts the power demand of the target appliance using the intermediate models which are exported during training.
    Generates a polygon from those predictions.
    """
    train = DataSet('../data/ukdale.h5')
    train.clear_cache()
    train.set_window(start="13-4-2013", end="31-7-2013")
    test = DataSet('../data/ukdale.h5')
    test.clear_cache()
    test.set_window(start='7-2-2014 08:00:00', end='7-3-2014')

    train_building = 1
    test_building = 5
    sample_period = 6
    meter_key = 'kettle'
    learning_rate = 1e-5

    train_elec = train.buildings[train_building].elec
    test_elec = test.buildings[test_building].elec

    train_meter = train_elec.submeters()[meter_key]
    test_mains = test_elec.mains()

    results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-03-11-48-12'
    train_logfile = os.path.join(results_dir, 'training.log')
    val_logfile = os.path.join(results_dir, 'validation.log')
    rnn = RNNDisaggregator(train_logfile,
                           val_logfile,
                           learning_rate,
                           init=False)

    verts = []
    zs = []  # epochs
    for z in np.arange(10, 341, 10):

        # disaggregate model
        model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(z)
        rnn.import_model(os.path.join(results_dir, model))
        disag_filename = 'disag-out-{}epochs.h5'.format(z)
        output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w')
        results_file = os.path.join(results_dir,
                                    'results-{}epochs.txt'.format(z))
        rnn.disaggregate(test_mains,
                         output,
                         results_file,
                         train_meter,
                         sample_period=sample_period)
        os.remove(results_file)
        output.close()

        # get predicted curve for epoch=z
        result = DataSet(os.path.join(results_dir, disag_filename))
        res_elec = result.buildings[test_building].elec
        os.remove(os.path.join(results_dir, disag_filename))
        predicted = res_elec[meter_key]
        predicted = predicted.power_series(sample_period=sample_period)
        predicted = next(predicted)
        predicted.fillna(0, inplace=True)
        ys = np.array(predicted)  # power
        xs = np.arange(ys.shape[0])  # timestamps

        verts.append(list(zip(xs, ys)))  # add list of x-y-coordinates
        zs.append(z)

    ground_truth = test_elec[meter_key]
    ground_truth = ground_truth.power_series(sample_period=sample_period)
    ground_truth = next(ground_truth)
    ground_truth.fillna(0, inplace=True)
    ys = np.array(ground_truth)  # power
    xs = np.arange(ys.shape[0])  # timestamps

    verts.append(list(zip(xs, ys)))  # add list of x-y-coordinates
    zs.append(350)

    zs = np.asarray(zs)

    for i in range(len(verts)):
        verts[i].insert(0, [0, np.array([0])])
        verts[i].append([len(verts[i]), np.array([0])])

    pickle.dump(verts, open(os.path.join(results_dir, 'vertices.pkl'), 'wb'))
    pickle.dump(zs, open(os.path.join(results_dir, 'zs.pkl'), 'wb'))
    pickle.dump(ys, open(os.path.join(results_dir, 'ys.pkl'), 'wb'))
Example #12
0
def get_disaggregation(device, total_aggregate):

    here = os.path.dirname(os.path.abspath(__file__))
    dataset_file = os.path.join(here, "dataset/iawe2.h5")

    devices = ["fridge", "air conditioner", "washing machine"]
    if device not in devices:
        return None

    total_seconds = 30 * 24 * 60
    val_per_second = float(total_aggregate) / (total_seconds)

    print(val_per_second)

    start = 0
    end = 0

    with h5py.File(dataset_file, "r+") as f1:
        table = f1["building1/elec/meter1/table"].value

        start = int(str(table[0][0])[:10])
        end = start + total_seconds
        print(end - start, total_seconds)

        # for i in range(total_seconds):
        #     # for j in range(7):
        #     print("Progress {:2.1%}".format(i / total_seconds), end="\r")
        #     table[i][1][2] = val_per_second + np.random.uniform(-1e-17,
        #  1e-17, 1)

        # f1["building1/elec/meter1/table"][...] = table
        # print(table)

    # start = datetime.fromtimestamp(start)
    end = datetime.fromtimestamp(end)

    # start = start.isoformat(' ', 'seconds')
    end = end.isoformat(' ', 'seconds')

    # print(start, end)

    test = DataSet(dataset_file)
    # test.set_window(start=start, end=end)
    test.set_window(end=end)
    test_elec = test.buildings[1].elec
    test_mains = test_elec.mains()[1]

    df = next(test_mains.load())
    print(df)

    test_meter = test_elec.submeters()[device]

    disag_filename = 'disag-out.h5'  # The filename of the resulting datastore
    output = HDFDataStore(disag_filename, 'w')

    disaggregator = ShortSeq2PointDisaggregator()
    model_file = os.path.join(
        here, "disag1/IAWE-RNN-h{}-{}-{}epochs.h5".format(1, device, 10))
    disaggregator.import_model(model_file)

    # anykey = input()
    # test_mains: The aggregated signal meter
    # output: The output datastore
    # train_meter: This is used in order to copy the metadata of the train
    # meter into the datastore
    disaggregator.disaggregate(test_mains, output, test_mains, sample_period=1)
    output.close()

    result = DataSet(disag_filename)
    res_elec = result.buildings[1].elec

    # prediction = res_elec[device]
    prediction = res_elec
    # df = next(prediction.load())
    # prediction = df["power"]["active"][0]

    return prediction
Example #13
0
def dae(dataset_path, train_building, train_start, train_end, test_building,
        test_start, test_end, val_building, val_start, val_end, meter_key,
        sample_period, num_epochs, patience, sequence_length, optimizer,
        learning_rate, loss):

    # Start tracking time
    start = time.time()

    # Prepare dataset and options
    # print("========== OPEN DATASETS ============")
    dataset_path = dataset_path
    train = DataSet(dataset_path)
    train.set_window(start=train_start, end=train_end)
    val = DataSet(dataset_path)
    val.set_window(start=val_start, end=val_end)
    test = DataSet(dataset_path)
    test.set_window(start=test_start, end=test_end)
    train_building = train_building
    test_building = test_building
    meter_key = meter_key

    sample_period = sample_period

    train_elec = train.buildings[train_building].elec
    val_elec = val.buildings[val_building].elec
    test_elec = test.buildings[test_building].elec

    train_meter = train_elec.submeters()[meter_key]
    try:
        train_mains = train_elec.mains().all_meters()[0]
        val_mains = val_elec.mains().all_meters()[0]
        test_mains = test_elec.mains().all_meters()[0]
    except AttributeError:
        train_mains = train_elec.mains()
        test_mains = test_elec.mains()

    dae = DAEDisaggregator(sequence_length, patience, optimizer, learning_rate,
                           loss)

    # print("========== TRAIN ============")
    dae.train(train_mains,
              train_meter,
              epochs=num_epochs,
              sample_period=sample_period)

    # Get number of earlystop epochs
    num_epochs = dae.stopped_epoch if dae.stopped_epoch != 0 else num_epochs

    #dae.export_model("results/dae-model-{}-{}epochs.h5".format(meter_key, num_epochs))

    # print("========== DISAGGREGATE ============")
    # Validation
    val_disag_filename = 'disag-out-val.h5'
    output = HDFDataStore(val_disag_filename, 'w')
    dae.disaggregate(val_mains,
                     output,
                     train_meter,
                     sample_period=sample_period)
    output.close()
    # Test
    test_disag_filename = 'disag-out-test.h5'
    output = HDFDataStore(test_disag_filename, 'w')
    dae.disaggregate(test_mains,
                     output,
                     train_meter,
                     sample_period=sample_period)
    output.close()

    # print("========== RESULTS ============")
    # Validation
    result_val = DataSet(val_disag_filename)
    res_elec_val = result_val.buildings[val_building].elec
    rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key],
                                                    val_elec[meter_key])

    val_metrics_results_dict = {
        'recall_score':
        rpaf_val[0],
        'precision_score':
        rpaf_val[1],
        'accuracy_score':
        rpaf_val[2],
        'f1_score':
        rpaf_val[3],
        'mean_absolute_error':
        metrics.mean_absolute_error(res_elec_val[meter_key],
                                    val_elec[meter_key]),
        'mean_squared_error':
        metrics.mean_square_error(res_elec_val[meter_key],
                                  val_elec[meter_key]),
        'relative_error_in_total_energy':
        metrics.relative_error_total_energy(res_elec_val[meter_key],
                                            val_elec[meter_key]),
        'nad':
        metrics.nad(res_elec_val[meter_key], val_elec[meter_key]),
        'disaggregation_accuracy':
        metrics.disaggregation_accuracy(res_elec_val[meter_key],
                                        val_elec[meter_key])
    }
    # Test
    result = DataSet(test_disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                                test_elec[meter_key])

    test_metrics_results_dict = {
        'recall_score':
        rpaf[0],
        'precision_score':
        rpaf[1],
        'accuracy_score':
        rpaf[2],
        'f1_score':
        rpaf[3],
        'mean_absolute_error':
        metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]),
        'mean_squared_error':
        metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]),
        'relative_error_in_total_energy':
        metrics.relative_error_total_energy(res_elec[meter_key],
                                            test_elec[meter_key]),
        'nad':
        metrics.nad(res_elec[meter_key], test_elec[meter_key]),
        'disaggregation_accuracy':
        metrics.disaggregation_accuracy(res_elec[meter_key],
                                        test_elec[meter_key])
    }

    # end tracking time
    end = time.time()

    time_taken = end - start  # in seconds

    # model_result_data = {
    #     'algorithm_name': 'DAE',
    #     'datapath': dataset_path,
    #     'train_building': train_building,
    #     'train_start': str(train_start.date()) if train_start != None else None ,
    #     'train_end': str(train_end.date()) if train_end != None else None ,
    #     'test_building': test_building,
    #     'test_start': str(test_start.date()) if test_start != None else None ,
    #     'test_end': str(test_end.date()) if test_end != None else None ,
    #     'appliance': meter_key,
    #     'sampling_rate': sample_period,
    #
    #     'algorithm_info': {
    #         'options': {
    #             'epochs': num_epochs
    #         },
    #         'hyperparameters': {
    #             'sequence_length': sequence_length,
    #             'min_sample_split': None,
    #             'num_layers': None
    #         },
    #         'profile': {
    #             'parameters': None
    #         }
    #     },
    #
    #     'metrics':  metrics_results_dict,
    #
    #     'time_taken': format(time_taken, '.2f'),
    # }

    model_result_data = {
        'val_metrics': val_metrics_results_dict,
        'test_metrics': test_metrics_results_dict,
        'time_taken': format(time_taken, '.2f'),
        'epochs': num_epochs,
    }

    # Close digag_filename
    result.store.close()
    result_val.store.close()

    # Close Dataset files
    train.store.close()
    val.store.close()
    test.store.close()

    return model_result_data
Example #14
0
def nilmtkECOfunc(dataset_loc, train_start, train_end, test_start, test_end,
                  output_period):
    #### configuration ####
    period_s = output_period
    building = 2
    #### load ####
    total = DataSet(dataset_loc)
    train = DataSet(dataset_loc)
    test = DataSet(dataset_loc)
    train.set_window(start=train_start, end=train_end)
    test.set_window(start=test_start, end=test_end)
    print(train_start)
    print(train_end)
    print(test_start)
    print(test_end)
    #### get timeframe ####
    tf_total = total.buildings[building].elec.mains().get_timeframe()
    tf_train = train.buildings[building].elec.mains().get_timeframe()
    tf_test = test.buildings[building].elec.mains().get_timeframe()
    #### eletrical metergroup ####
    total_elec = total.buildings[building].elec
    train_elec = train.buildings[building].elec
    test_elec = test.buildings[building].elec
    #### training process ####
    start = time.time()
    from nilmtk.disaggregate import CombinatorialOptimisation
    co = CombinatorialOptimisation()
    co.train(train_elec, sample_period=period_s)
    end = time.time()
    print("Runtime =", end - start, "seconds.")
    #### disaggregation process ####
    start = time.time()
    disag_filename = '../dataset/ecob-b2-kall-co-1w:11-1m.h5'
    output = HDFDataStore(disag_filename, 'w')
    co.disaggregate(test_elec.mains(), output, sample_period=period_s)
    end = time.time()
    print("Runtime =", end - start, "seconds.")
    output.close()
    disag_co = DataSet(disag_filename)
    disag_co_elec = disag_co.buildings[building].elec
    #### fraction energy assigned correctly ####
    #FTE_co_all = FTE_func(disag_co_elec, test_elec);
    #### total disaaggregation error ####
    #Te_co_all = total_disag_err(disag_co_elec, test_elec);
    #### creating dataframe from both disaggregated and ground truth metergroups
    disag_co_elec_df = disag_co_elec.dataframe_of_meters()
    disag_co_elec_df_nona = disag_co_elec_df.dropna()
    gt_full_df = test_elec.dataframe_of_meters()
    gt_full_df_nona = gt_full_df.dropna()
    gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index]
    #### jaccard ####
    #Ja_co_all = jaccard_similarity(disag_co_elec_df_nona, gt_df_nona, disag_co_elec.submeters().instance(), test_elec.instance());
    #print("FTE all", FTE_co_all);
    #print("TE  all", Te_co_all);
    #print("Ja  all",  Ja_co_all);
    #### output ####
    # drop aggregated power
    disag_co_elec_submeter_df = disag_co_elec_df.drop(
        disag_co_elec_df.columns[[0]], axis=1)
    # disag_co_elec_submeter_df = disag_co_elec_df
    # drop the unwanted timestamp
    gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index]
    # drop aggregated power
    gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0, 1, 2]], axis=1)
    # train
    train_elec_df = train_elec.dataframe_of_meters()
    train_elec_df_aligned = train_elec_df.resample(str(period_s) +
                                                   'S').asfreq()[0:]
    train_elec_df_aligned_drop = train_elec_df_aligned.drop(
        train_elec_df_aligned.columns[[0, 1, 2]], axis=1)
    return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
Example #15
0
        meter_key, epochs))
    end = time.time()
    print("Train =", end - start, "seconds.")

file = open('stackTrainSetsInfo_' + meter_key, 'r')
for line in file:
    toks = line.split(',')
    StackTrain = DataSet(toks[0])
    print(toks[2], '-', toks[3])
    StackTrain.set_window(start=toks[2], end=toks[3])
    test_elec = StackTrain.buildings[int(toks[1])].elec
    test_mains = test_elec.mains()

    print("========== DISAGGREGATE (stackTrain)============")
    disag_filename = "StackTrain-h" + toks[1] + ".h5"
    output = HDFDataStore(disag_filename, 'w')
    disaggregator.disaggregate(test_mains,
                               output,
                               test_elec[meter_key],
                               sample_period=sample_period)
    output.close()

for i in test_building_list:
    test_elec = test.buildings[i].elec
    test_mains = test_elec.mains()

    print("========== DISAGGREGATE ============")
    disag_filename = "StackTest-" + str(i) + ".h5"
    output = HDFDataStore(disag_filename, 'w')
    disaggregator.disaggregate(test_mains,
                               output,
Example #16
0
def nilmtkDREDfunc(dataset_loc, train_start, train_end, test_start, test_end,
                   output_period):
    #### configuration ####
    period_s = output_period
    building = 1
    #### load ####
    total = DataSet(dataset_loc)
    train = DataSet(dataset_loc)
    test = DataSet(dataset_loc)
    train.set_window(start=train_start, end=train_end)
    test.set_window(start=test_start, end=test_end)
    print(train_start)
    print(train_end)
    print(test_start)
    print(test_end)
    #### get timeframe ####
    tf_total = total.buildings[building].elec.mains().get_timeframe()
    tf_train = train.buildings[building].elec.mains().get_timeframe()
    tf_test = test.buildings[building].elec.mains().get_timeframe()
    #### eletrical metergroup ####
    total_elec = total.buildings[building].elec
    train_elec = train.buildings[building].elec
    test_elec = test.buildings[building].elec
    #### training process ####
    start = time.time()
    from nilmtk.disaggregate import CombinatorialOptimisation
    co = CombinatorialOptimisation()
    co.train(train_elec, sample_period=period_s)
    end = time.time()
    print("Runtime =", end - start, "seconds.")
    #### disaggregation process ####
    start = time.time()
    disag_filename = dataset_loc + 'DREDapp.h5'
    output = HDFDataStore(disag_filename, 'w')
    co.disaggregate(test_elec.mains(), output, sample_period=period_s)
    end = time.time()
    print("Runtime =", end - start, "seconds.")
    output.close()
    disag_co = DataSet(disag_filename)
    disag_co_elec = disag_co.buildings[building].elec
    #### creating dataframe from both disaggregated and ground truth metergroups
    disag_co_elec_df = disag_co_elec.dataframe_of_meters()
    gt_full_df = test_elec.dataframe_of_meters()
    # drop the NA, it might be needed (initially it is used for Ja)
    disag_co_elec_df_nona = disag_co_elec_df.dropna()
    gt_full_df_nona = gt_full_df.dropna()
    # drop the unwanted timestamp
    gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index]
    #### output ####
    # drop aggregated power from output
    disag_co_elec_submeter_df = disag_co_elec_df.drop(
        disag_co_elec_df.columns[[0]], axis=1)
    # drop the unwanted timestamp on ground truth (take the sampled timestamp)
    gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index]
    # drop aggregated power from ground truth
    gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0]], axis=1)
    # train data frame, resample based in disaggregation period, drop the main power
    train_elec_df = train_elec.dataframe_of_meters()
    train_elec_df_aligned = train_elec_df.resample(str(period_s) +
                                                   'S').asfreq()[0:]
    train_elec_df_aligned_drop = train_elec_df_aligned.drop(
        train_elec_df_aligned.columns[[0]], axis=1)
    return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
Example #17
0
print(
    "Calculating ground truth===============================================")
loc.dataset.set_window(start=dataset_start_date_disag,
                       end=dataset_end_date_disag)
gt = GroundTruth(loc, co, baseline=vampire_power_in_original)
gt.generate()

time_start_disag = time.time()
print("\nTotal elapsed: %s seconds ---" % (time_start_disag - start_time))
print("Section Ground truth  : %s seconds ---\n" %
      (time_start_disag - time_start_gt))

#DISAGREGGATION================================================================
print(
    "Disaggregating=========================================================")
output = HDFDataStore(h5_disag, 'w')
loc.dataset.set_window(start=dataset_start_date_disag,
                       end=dataset_end_date_disag)
co.disaggregate(loc.elec.mains(),
                output,
                location_data=loc,
                baseline=vampire_power_in_original,
                resample_seconds=60)
output.close()

time_start_metrics = time.time()
print("\nTotal elapsed: %s seconds ---" % (time_start_metrics - start_time))
print("Section Disaggregation: %s seconds ---\n" %
      (time_start_metrics - time_start_disag))

#METRICS=======================================================================
Example #18
0
def test_all(path_to_directory):
    '''
    path_to_directory: Contains the h5 files on which the tests are supposed to be run
    '''

    check_directory_exists(path_to_directory)

#files=[f for f in listdir(path_to_directory) and '.h5' in f and '.swp' not in f]
    files = [f for f in listdir(path_to_directory) if isfile(join(path_to_directory, f)) and
         '.h5' in f and '.swp' not in f]
    files.sort()

    print ("Datasets collected and sorted. Processing...")


    try:
        for i, file in enumerate(files):
            current_file=DataSet(join(path_to_directory, file))
            
            print ("Printing metadata for current file...done.")
            print_dict(current_file.metadata)
            print (" Loading file # ", i, " : ", file, ". Please wait.")
            for building_number in range(1, len(current_file.buildings)+1):
    #Examine metadata for a single house
                elec=current_file.buildings[building_number].elec
                print ("The dataset being processed is : ", elec.dataset())
                print ("Metadata for current file: ")
                print_dict(current_file.buildings[building_number].metadata)
                print ("Appliance label information: ", elec.appliance_label())
                #print (elec.appliances)
                print ("Appliances:- ")
                for i in elec.appliances:
                    print (i)

                print ("Examining sub-metered appliances...")
                
                
                print ("Collecting stats on meters...Done.")
                print (elec._collect_stats_on_all_meters)
                
                print ("Timeframe: ", elec.get_timeframe())
                
                
                
                
                print ("Available power AC types: ", elec.available_power_ac_types())
                
                print ("Clearing cache...done.")
                elec.clear_cache()
                
                print ("Testing if there are meters from multiple buildings. Result returned by method: ", elec.contains_meters_from_multiple_buildings())
                
                # TODO: Find a better way to test the correlation function
                # print ("Testing the correlation function. ", elec.correlation(elec))
                
                
                print ("List of disabled meters: ", elec.disabled_meters)
                print ("Trying to determine the dominant appliance: ")
                try:
                    elec.dominant_appliance()
                except RuntimeError:
                    print ('''More than one dominant appliance in MeterGroup! (The dominant appliance per meter should be manually specified in the metadata. If it isn't and if there are multiple appliances for a meter then NILMTK assumes all appliances on that meter are dominant. NILMTK can't automatically distinguish between multiple appliances on the same meter (at least, not without using NILM!))''')
                    pass
                print ("Dropout rate: ", elec.dropout_rate())
                try:
                    print ("Calculating energy per meter:")
                    print (elec.energy_per_meter())
                
                    print ("Calculating total entropy")
                    print (elec.entropy())
                
                    print ("Calculating entropy per meter: ")
                    print (elec.entropy_per_meter())
                except ValueError:
                    print ("ValueError: Total size of array must remain unchanged.")
                    pass
                
                print ("Calculating fraction per meter.")
                print (elec.fraction_per_meter())

                
                

#print ("Average energy per period: ", elec.average_energy_per_period())
                
                
                print ("Executing functions...")
                lis=[]
                func=""
                '''for function in dir(elec):
                    try:
                        start=time.time()
                        if ("__" not in function or "dataframe_of_meters" not in function):
                            func=getattr(elec, function)
                        print ("Currently executing ", function, ". Please wait...")
                        print (func())
                        # print ("cProfile stats - printed")
                        # cProfile.run("func")
                        end=time.time()
                        print ("Time taken for the entire process : ", (end - start))
                    except AttributeError:
                        print ("Attribute error occured. ")
                    except TypeError:
                        lis.append(function)
                        print ("Warning: TypeError")
                        pass'''
                
                print ("Plotting wiring hierarchy of meters....")
                elec.draw_wiring_graph()
                ## DISAGGREGATION STARTS HERE
                appliance_type="unknown"
    #TODO : appliance_type should cycle through all appliances and check for each of them. For this, use a list.
                selected_appliance=nilmtk.global_meter_group.select_using_appliances(type=appliance_type)
                appliance_restricted = MeterGroup(selected_appliance.meters)
                if ((appliance_restricted.proportion_of_upstream_total_per_meter()) is not None):
                    proportion_per_appliance = appliance_restricted.proportion_of_upstream_total_per_meter()


                    proportion_per_appliance.plot(kind='bar');
                    plt.title('Appliance energy as proportion of total building energy');
                    plt.ylabel('Proportion');
                    plt.xlabel('Appliance (<appliance instance>, <building instance>, <dataset name>)');
                    selected_appliance.select(building=building_number).total_energy()
                    selected_appliance.select(building=1).plot();


                    appliance_restricted = MeterGroup(selected_appliance.meters)
                    daily_energy = pd.DataFrame([meter.average_energy_per_period(offset_alias='D')
                                     for meter in appliance_restricted.meters])

                    daily_energy.plot(kind='hist');
                    plt.title('Histogram of daily energy');
                    plt.xlabel('energy (kWh)');
                    plt.ylabel('Occurences');
                    plt.legend().set_visible(False)
                    
                    current_file.store.window=TimeFrame(start='2012-04-01 00:00:00-05:00', end='2012-04-02 00:00:00-05:00')
                    #elec.plot();

                    fraction = elec.submeters().fraction_per_meter().dropna()

                    labels = elec.get_appliance_labels(fraction.index)
                    plt.figure(figsize=(8,8))
                    fraction.plot(kind='pie', labels=labels);

                    elec.select_using_appliances(category='heating')
                    elec.select_using_appliances(category='single-phase induction motor')


                    co = CombinatorialOptimisation()
                    co.train(elec)

                    for model in co.model:
                        print_dict(model)


                    disag_filename = join(data_dir, 'ampds-disag.h5')
                    output = HDFDataStore(disag_filename, 'w')
                    co.disaggregate(elec.mains(), output)
                    output.close()



                    disag = DataSet(disag_filename)








                    disag_elec = disag.buildings[building_number].elec

                    f1 = f1_score(disag_elec, elec)
                    f1.index = disag_elec.get_appliance_labels(f1.index)
                    f1.plot(kind='bar')
                    plt.xlabel('appliance');
                    plt.ylabel('f-score');
                    disag_elec.plot()

                    disag.store.close()
    except AttributeError:
        print ("AttributeError occured while executing. This means that the value returned by  proportion_per_appliance = appliance_restricted.proportion_of_upstream_total_per_meter() is None")
        pass
Example #19
0
def runExperiment(experiment: experimentInfo, metricsResFileName,
                  clearMetricsFile):
    dsPathsList_Test = experiment.dsList
    outFileName = experiment.outName
    test_building = experiment.building
    meter_key = experiment.meter_key
    pathOrigDS = experiment.pathOrigDS
    meterTH = experiment.meterTH
    print('House ', test_building)

    # Load a "complete" dataset to have the test's timerange
    test = DataSet(dsPathsList_Test[0])
    test_elec = test.buildings[test_building].elec
    testRef_meter = test_elec.submeters(
    )[meter_key]  # will be used as reference to align all meters based on this

    # Align every test meter with testRef_meter as master
    test_series_list = []
    for path in dsPathsList_Test:
        test = DataSet(path)
        test_elec = test.buildings[test_building].elec
        test_meter = test_elec.submeters()[meter_key]
        # print('Stack test: ', test_meter.get_timeframe().start.date(), " - ", test_meter.get_timeframe().end.date())
        aligned_meters = align_two_meters(testRef_meter, test_meter)
        test_series_list.append(aligned_meters)

    # Init vars for the output
    MIN_CHUNK_LENGTH = 300  # Depends on the basemodels of the ensemble
    timeframes = []
    building_path = '/building{}'.format(test_meter.building())
    mains_data_location = building_path + '/elec/meter1'
    data_is_available = False
    disag_filename = outFileName
    output_datastore = HDFDataStore(disag_filename, 'w')

    run = True
    chunkDataForOutput = None
    # -- Used to hold necessary data for saving the results using NILMTK (e.g. timeframes).
    # -- (in case where chunks have different size (not in current implementation), must use the chunk whose windowsSize is the least (to have all the data))

    while run:
        try:
            testX = []
            columnInd = 0
            # Get Next chunk of each series
            for testXGen in test_series_list:
                chunkALL = next(testXGen)
                chunk = chunkALL[
                    'slave']  # slave is the meter needed (master is only for aligning)
                chunk.fillna(0, inplace=True)
                if (columnInd == 0):
                    chunkDataForOutput = chunk  # Use 1st found chunk for it's metadata
                if (testX == []):
                    testX = np.zeros(
                        [len(chunk), len(test_series_list)]
                    )  # Initialize the array that will hold all of the series as columns
                testX[:, columnInd] = chunk[:]
                columnInd += 1
            testX = scaler.transform(testX)
        except:
            run = False
            break

        if len(chunkDataForOutput) < MIN_CHUNK_LENGTH:
            continue
        # print("New sensible chunk: {}".format(len(chunk)))

        startTime = chunkDataForOutput.index[0]
        endTime = chunkDataForOutput.index[
            -1]  # chunkDataForOutput.shape[0] - 1
        # print('Start:',startTime,'End:',endTime)
        timeframes.append(TimeFrame(
            startTime, endTime))  #info needed for output for use with NILMTK
        measurement = ('power', 'active')

        pred = clf.predict(testX)
        column = pd.Series(pred, index=chunkDataForOutput.index, name=0)
        appliance_powers_dict = {}
        appliance_powers_dict[0] = column
        appliance_power = pd.DataFrame(appliance_powers_dict)
        appliance_power[appliance_power < 0] = 0

        # Append prediction to output
        data_is_available = True
        cols = pd.MultiIndex.from_tuples([measurement])
        meter_instance = test_meter.instance()
        df = pd.DataFrame(appliance_power.values,
                          index=appliance_power.index,
                          columns=cols,
                          dtype="float32")
        key = '{}/elec/meter{}'.format(building_path, meter_instance)
        output_datastore.append(key, df)

        # Append aggregate data to output
        mains_df = pd.DataFrame(chunkDataForOutput,
                                columns=cols,
                                dtype="float32")
        # Note (For later): not 100% right. Should be mains. But it won't be used anywhere, so it doesn't matter in this case
        output_datastore.append(key=mains_data_location, value=mains_df)

    # Save metadata to output
    if data_is_available:

        disagr = Disaggregator()
        disagr.MODEL_NAME = 'Stacked model'

        disagr._save_metadata_for_disaggregation(
            output_datastore=output_datastore,
            sample_period=sample_period,
            measurement=measurement,
            timeframes=timeframes,
            building=test_meter.building(),
            meters=[test_meter])

    #======================== Calculate Metrics =====================================
    testYDS = DataSet(pathOrigDS)
    testYDS.set_window(start=test_meter.get_timeframe().start.date(),
                       end=test_meter.get_timeframe().end.date())
    testY_elec = testYDS.buildings[test_building].elec
    testY_meter = testY_elec.submeters()[meter_key]
    test_mains = testY_elec.mains()

    result = DataSet(disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                                testY_meter, meterTH, meterTH)
    relError = metrics.relative_error_total_energy(res_elec[meter_key],
                                                   testY_meter)
    MAE = metrics.mean_absolute_error(res_elec[meter_key], testY_meter)
    RMSE = metrics.RMSE(res_elec[meter_key], testY_meter)
    print("============ Recall: {}".format(rpaf[0]))
    print("============ Precision: {}".format(rpaf[1]))
    print("============ Accuracy: {}".format(rpaf[2]))
    print("============ F1 Score: {}".format(rpaf[3]))
    print("============ Relative error in total energy: {}".format(relError))
    print("============ Mean absolute error(in Watts): {}".format(MAE))
    print("=== For docs: {:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}".format(
        rpaf[0], rpaf[1], rpaf[2], rpaf[3], relError, MAE))
    # print("============ RMSE: {}".format(RMSE))
    # print("============ TECA: {}".format(metrics.TECA([res_elec[meter_key]],[testY_meter],test_mains)))

    resDict = {
        'model': 'TEST',
        'building': test_building,
        'Appliance': meter_key,
        'Appliance_Type': 2,
        'Recall': rpaf[0],
        'Precision': rpaf[1],
        'Accuracy': rpaf[2],
        'F1': rpaf[3],
        'relError': relError,
        'MAE': MAE,
        'RMSE': RMSE
    }
    metrics.writeResultsToCSV(resDict, metricsResFileName, clearMetricsFile)
co7.train(training_set7)
print("set 7 trained")
co8.train(training_set8)
print("set 8 trained")
co9.train(training_set9)
print("set 9 trained")

print("Algorithms trained!")
'''
Create 9 output files to hold disaggregated data.

'''

print("Creating output files...")

outData1 = HDFDataStore("C:/NILM/Data/Model_Train/output1.h5", 'w')
outData2 = HDFDataStore("C:/NILM/Data/Model_Train/output2.h5", 'w')
outData3 = HDFDataStore("C:/NILM/Data/Model_Train/output3.h5", 'w')
outData4 = HDFDataStore("C:/NILM/Data/Model_Train/output4.h5", 'w')
outData5 = HDFDataStore("C:/NILM/Data/Model_Train/output5.h5", 'w')
outData6 = HDFDataStore("C:/NILM/Data/Model_Train/output6.h5", 'w')
outData7 = HDFDataStore("C:/NILM/Data/Model_Train/output7.h5", 'w')
outData8 = HDFDataStore("C:/NILM/Data/Model_Train/output8.h5", 'w')
outData9 = HDFDataStore("C:/NILM/Data/Model_Train/output9.h5", 'w')

print("output files created!")
'''
Disaggregate building 1 data using each training set
'''

print("Disaggregating building 1 mains using each trained model...")
Example #21
0
def plot_zoomed_new_predicted_energy_consumption():
    """
    Predicts a new short window (of the given test set).
    """
    train = DataSet('../data/ukdale.h5')
    train.clear_cache()
    train.set_window(start="13-4-2013", end="31-7-2013")
    test = DataSet('../data/ukdale.h5')
    test.clear_cache()
    test.set_window(start='16-9-2013 17:00:00', end='16-9-2013 18:00:00')

    train_building = 1
    test_building = 1
    sample_period = 6
    meter_key = 'kettle'
    learning_rate = 1e-5
    best_epoch = 140

    train_elec = train.buildings[train_building].elec
    test_elec = test.buildings[test_building].elec

    train_meter = train_elec.submeters()[meter_key]
    test_mains = test_elec.mains()

    results_dir = '../results/UKDALE-RNN-lr=1e-05-2018-02-16-18-52-34'
    train_logfile = os.path.join(results_dir, 'training.log')
    val_logfile = os.path.join(results_dir, 'validation.log')
    rnn = RNNDisaggregator(train_logfile,
                           val_logfile,
                           learning_rate,
                           init=False)

    model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(best_epoch)
    rnn.import_model(os.path.join(results_dir, model))
    disag_filename = 'disag-out-{}epochs.h5'.format(best_epoch)
    output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w')
    results_file = os.path.join(results_dir,
                                'results-{}epochs.txt'.format(best_epoch))
    rnn.disaggregate(test_mains,
                     output,
                     results_file,
                     train_meter,
                     sample_period=sample_period)
    os.remove(results_file)
    output.close()

    # get predicted curve for the best epoch
    result = DataSet(os.path.join(results_dir, disag_filename))
    res_elec = result.buildings[test_building].elec
    os.remove(os.path.join(results_dir, disag_filename))
    predicted = res_elec[meter_key]
    predicted = predicted.power_series(sample_period=sample_period)
    predicted = next(predicted)
    predicted.fillna(0, inplace=True)
    y1 = np.array(predicted)  # power
    x1 = np.arange(y1.shape[0])  # timestamps

    ground_truth = test_elec[meter_key]
    ground_truth = ground_truth.power_series(sample_period=sample_period)
    ground_truth = next(ground_truth)
    ground_truth.fillna(0, inplace=True)
    y2 = np.array(ground_truth)  # power
    x2 = np.arange(y2.shape[0])  # timestamps

    fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True)
    ax1.plot(x1, y1, color='r', label='predicted')
    ax1.plot(x2, y2, color='b', label='ground truth')
    ax2.plot(x1, y1, color='r')
    ax3.plot(x2, y2, color='b')
    ax1.set_title('Appliance: {}'.format(meter_key))
    fig.legend()
    fig.savefig(
        os.path.join(results_dir, 'zoomed_new_predicted_vs_ground_truth.png'))
Example #22
0
class REDD_Data(object):
    '''
	REDD_Data Class is an object designed to abstract the lower level commands of
	the NILMTK software package, with focus on the use of REDD DataSet. Function is 
	designed to allow rapid experimentation and disaggregation compared to attempting 
	to set package up from scratch.

	This class requires the following for proper usage:
	- NILMTK package: https://github.com/nilmtk
	- REDD Dataset (converted to .h5): redd.csail.mit.edu
	- Various dependancies (that NILMTK also requires), most can be downloaded through
	  Anaconda: continuum.io/downloads


	Parameters
	-----------
	in_filepath:		Filepath of converted REDD dataset (in .h5 format)
	out_filepath:		filepath to place output disaggregation dataset (in .h5 format)

	Attributes
	-----------
	km: Key_Map Object
		initializes the key_map object which will allow for the mapping of a meters
		appliance name to its specific .H5 key.

	dataStore: NILMTK HDFDataStore Object
		the HDFDataStore that will contain the converted REDD DataSet.

	dataSet: NILMTK DataSet Object
		the DataSet object that is generated from the REDD DataStore (self.dataStore)		

	outDataStore: NILMTK HDFDataStore Object
		the HDFDataStore that will contain the disaggregated dataset.

	co: NILMTK CombinatorialOptimisation object
		the disaggregation model object that will be trained and will disaggregate the 
		working dataset

	train_group: NILMTK MeterGroup object
		the MeterGroup object that is used to train the disaggregation model (self.co)

	'''
    def __init__(self, in_filepath, out_filepath):
        print("Loading DataStore and Generating Dataset...")
        self.km = {}
        self.dataStore = HDFDataStore(in_filepath)
        self.dataSet = DataSet()
        self.dataSet.load(self.dataStore)
        self.outDataStore = HDFDataStore(out_filepath, 'w')
        self.co = CombinatorialOptimisation()
        self.train_group = {}
        print("Data Properly Loaded!")

    def train_disag_model(self, building_inst, use_topk=False, k=5):
        '''
		Function trains the disaggregation model using a selected MeterGroup.

		Parameters
		-----------

		building_inst: 	the instance # of the building that you wish to grab the 
					   	training group from.

		use_topk:		true if you wish to only grab the top k most energy intensive
						appliance to train the model, false if you wish to use all
						appliances.

		k:				the # of appliances you wish to use (if use_topk = True)

		'''

        print("Training CO Disaggregation Model using given metergroup...")

        if (building_inst <= 6) & (building_inst > 0):
            #Select appropiate meter group to train with
            if use_topk == True:
                self.train_group = self.dataSet.buildings[
                    building_inst].elec.select_top_k(k)
            else:
                self.train_group = self.dataSet.buildings[building_inst].elec

            self.co.train(self.train_group)
            print("CO Disaggreation Model Sucessfully Trained!")

        else:
            print("Error: Please select a building_inst of 1-6.")
            print("Model unsucessfully trained.")

    def load_disag_model(self, filepath):
        '''
		Function loads the disaggregation model to a file.

		Parameters
		-----------

		filepath:	exact filepath of the model file.

		'''
        print("Loading CO Disaggreation Model...")
        self.co.import_model(filepath)
        print("Model Sucessfully Loaded!")

    def save_disag_model(self, filepath):
        '''
		Function saves the disaggregation model to a file.

		Parameters
		-----------

		filepath:	exact filepath of the model file.

		'''
        print("Saving CO Disaggregation Model...")
        self.co.export_model(filepath)
        print("Model Sucessfully Saved!")

    def disaggregate(self, building_inst):
        '''
		Function will disaggregate the mains MeterGroup of the passed building 
		instance, and save this to the self.outDataStore object.

		Parameters
		-----------

		building_inst:	instance # of the building mains you wish to disaggregate.

		'''
        print("Disaggregating Building Mains...")
        self.co.disaggregate(
            self.dataSet.buildings[building_inst].elec.mains(),
            self.outDataStore)
        print("Mains sucessfully disaggregated!")

    def close(self):
        '''
		Function closes all open DataStore's being used by the program.

		'''
        print("Closing DataStores...")
        self.dataStore.close()
        self.outDataStore.close()
        print("Output DataStores Sucessfully Closed")

    '''
	All Plot Functions below are a WORK IN PROGRESS!-----------------------------------
	Documentation will be provided upon completion.------------------------------------

	'''

    def plot_disag_apl(self, inst, appliance, t1="", t2=""):
        self.km = Key_Map(inst)
        plot_series(
            self.outDataStore.store.get(self.km.get_key(appliance))[t1:t2])
        plt.title("Disaggregated " + appliance.capitalize() + " Energy")
        plt.show()

    def show_plots(self):
        plt.show()

    def building_plot_all(self, building_inst, t1, t2):
        self.dataSet.buildings[building_inst].elec.plot(t1, t2)
        plt.title("Building " + str(building_inst) + " Energy per Appliance")
        plt.ylabel('Power [W]')
        plt.xlabel('Hour')

    def plot_redd_mains_data(self, inst=1, t1="", t2=""):
        self.km = Key_Map(inst)
        series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2]
        series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2]
        plot_series(series1 + series2)
        plt.title("Building " + str(inst) + " Mains Energy")
        plt.show()
Example #23
0
#same day but smaller scale for more refined data plots
t1 ="2011-05-1 6:00"
t2 ="2011-05-1 7:00"

#initialize key map for building 1
kmap = Key_map()

#set the disaggregated plot here
disag_apl = 'fridge'
disag_key = kmap.get_key(disag_apl)

#First we must load in the converted REDD Dataset
print ("Loading DataSet.....")

#declare datastore and load converted HDF that stores REDD data
r_datastore = HDFDataStore("C:/NILM/Data_Sets/redd_data.h5")

#declare dataset object to work with and load datastore into Dataset
r_dataset = DataSet()
r_dataset.load(r_datastore)

#output sucessfull loading of data to user
print("DataSet Sucessfully Loaded!")

#now we take the data and elminate all sections with no samples
print("Conditioning Data... \n")

#load the metergroup from building one (house1 in REDD)
r_elec = r_dataset.buildings[1].elec

https://github.com/nilmtk/nilmtk/issues/376
"""

data_dir = '/data/REDD'
building_number = 3
disag_filename = join(data_dir, 'disag-fhmm' + str(building_number) + '.h5')

data = DataSet(join(data_dir, 'redd.h5'))
print("Loading building " + str(building_number))
elec = data.buildings[building_number].elec

top_train_elec = elec.submeters().select_top_k(k=5)
fhmm = fhmm_exact.FHMM()
fhmm.train(top_train_elec)

output = HDFDataStore(disag_filename, 'w')
fhmm.disaggregate(elec.mains(), output)
output.close()

### f1score fhmm
disag = DataSet(disag_filename)
disag_elec = disag.buildings[building_number].elec

f1 = f1_score(disag_elec, elec)
f1.index = disag_elec.get_labels(f1.index)
f1.plot(kind='barh')
plt.ylabel('appliance');
plt.xlabel('f-score');
plt.title("FHMM");
plt.savefig(join(data_dir, 'f1-fhmm' + str(building_number) + '.png'))
disag.store.close()
Example #25
0
if km.is_in_map(disag_appliance) == False:
    sys.exit(
        "An incorrect appliance name has been entered. Please ensure the entered name is exactly correct."
    )

redd_data = DataSet("C:/NILM/Data/REDD/redd.h5")

# load mains of the building
building_mains = redd_data.buildings[redd_building].elec.mains()

#train disaggregation set
co = CombinatorialOptimisation()
training_set = redd_data.buildings[redd_building].elec
co.train(training_set)

#set output datastore
outputData = HDFDataStore("C:/NILM/Data/Output/output.h5", 'w')

#disaggregate
co.disaggregate(building_mains, outputData)

# to add:
#			1) get the meter instance # of the appliance selected
#			2) export the meter instance series of the output datastore to database using SQL, within t1-t2 parameters*
#
#			*Cannot be implemented until database is setup in environment

#Close open datastores
redd_data.store.close()
outputData.store.close()
Example #26
0
print(middleTimeStr)

train.set_window(end=middleTimeStr)
test.set_window(start=middleTimeStr)

train_elec = train.buildings[building_number].elec
test_elec = test.buildings[building_number].elec

top_train_elec = train_elec.submeters().select_top_k(k=5)

fhmm = fhmm_exact.FHMM() #mk change this later  to default
fhmm.train(top_train_elec, sample_period=60, resample=True)

outputAddress = "/nilmtk/data/iawe_449_3.h5"
output = HDFDataStore(outputAddress, 'w')
fhmm.disaggregate(test_elec.mains(), output, sample_period=60, resample=True)
output.close()

disag = DataSet(outputAddress) #load FHMM prediction
disag_elec = disag.buildings[building_number].elec
#disag_elec.plot() # plot all disaggregated data
f1 = f1_score(disag_elec, test_elec)
f1.index = disag_elec.get_labels(f1.index)
f1.plot(kind='barh')

disag.store.window = TimeFrame(start='2013-07-10 18:00:00-05:00', end='2013-07-17 04:00:00-05:00')
disag.buildings[building_number].elec.plot() # plot all disaggregated data


Example #27
0
# verify a real appliance has been entered
#if km.is_in_map(disag_appliance) == False:
#	sys.exit("An incorrect appliance name has been entered. Please ensure the entered name is exactly correct.")

redd_data = DataSet(redd_fp)

# load mains of the building
building_mains = redd_data.buildings[1].elec.mains()

#train disaggregation set
co = CombinatorialOptimisation()
training_set = redd_data.buildings[1].elec.select_top_k(15)
co.train(training_set)

#set output datastore
outputData = HDFDataStore(output_fp, 'w')

#disaggregate
co.disaggregate(building_mains, outputData)

print(outputData.store.keys())

#set sub-datastore for CSV outputs
output_csv_store = outputData.store.__getitem__(km.get_key(disag_appliance))

mains1 = redd_data.store.__getitem__(km.get_key("mains1"))
mains2 = redd_data.store.__getitem__(km.get_key("mains2"))

mains1 = mains1.fillna(value=0)
mains1 = mains1.resample("1min")
Example #28
0
    sys.exit(
        "An incorrect appliance name has been entered. Please ensure the entered name is exactly correct."
    )

redd_data = DataSet("/home/mike/workspace/data/redd_data.h5")

# load mains of the building
building_mains = redd_data.buildings[1].elec.mains()

#train disaggregation set
co = CombinatorialOptimisation()
training_set = redd_data.buildings[1].elec
co.train(training_set)

#set output datastore
outputData = HDFDataStore("/home/mike/workspace/data/redd_output.h5", 'w')

#disaggregate
co.disaggregate(building_mains, outputData)

#set sub-datastore for CSV output
output_csv_store = outputData.store.__getitem__(km.get_key(disag_appliance))

#set date parameters
output_csv_store = output_csv_store[t1:t2]

#fill NA values with 0 for graphing
output_csv_store = output_csv_store.fillna(value=0)

# #metrics processing ----------------------------------------------------------
# #create dict to hold energy metrics
Example #29
0
def plot_prediction_over_epochs_ploty():
    """
    Predicts the power demand of the target appliance using the intermediate models which are exported during training.
    Plots the prediction curves using plotly.
    """
    train = DataSet('../data/ukdale.h5')
    train.clear_cache()
    train.set_window(start="13-4-2013", end="31-7-2013")
    test = DataSet('../data/ukdale.h5')
    test.clear_cache()
    test.set_window(start="23-7-2014 10:00:00", end="23-7-2014 11:00:00")

    train_building = 1
    test_building = 5
    sample_period = 6
    meter_key = 'kettle'
    learning_rate = 1e-5

    train_elec = train.buildings[train_building].elec
    test_elec = test.buildings[test_building].elec

    train_meter = train_elec.submeters()[meter_key]
    test_mains = test_elec.mains()

    results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-03-11-48-12'
    train_logfile = os.path.join(results_dir, 'training.log')
    val_logfile = os.path.join(results_dir, 'validation.log')
    rnn = RNNDisaggregator(train_logfile,
                           val_logfile,
                           learning_rate,
                           init=False)

    data = []

    for i in range(10, 401, 10):
        # disaggregate model
        model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(i)
        rnn.import_model(os.path.join(results_dir, model))
        disag_filename = 'disag-out-{}epochs.h5'.format(i)
        output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w')
        results_file = os.path.join(results_dir,
                                    'results-{}epochs.txt'.format(i))
        rnn.disaggregate(test_mains,
                         output,
                         results_file,
                         train_meter,
                         sample_period=sample_period)
        os.remove(results_file)
        output.close()

        # plot predicted curve for epoch=i
        result = DataSet(os.path.join(results_dir, disag_filename))
        res_elec = result.buildings[test_building].elec
        os.remove(os.path.join(results_dir, disag_filename))
        predicted = res_elec[meter_key]
        predicted = predicted.power_series(sample_period=sample_period)
        predicted = next(predicted)
        predicted.fillna(0, inplace=True)
        power = predicted.tolist()
        length = len(power)
        timestamps = list(range(length))

        x = []
        y = []
        z = []
        ci = int(255 / 420 * i)  # ci = "color index"
        for j in range(length):
            x.append([timestamps[j], timestamps[j]])  # timestamps
            y.append([i, i + 5])  # epochs
            z.append([power[j], power[j]])  # power
        data.append(
            dict(
                z=z,
                x=x,
                y=y,
                colorscale=[[i, 'rgb(%d,%d,255)' % (ci, ci)]
                            for i in np.arange(0, 1.1, 0.1)],
                showscale=False,
                type='surface',
            ))

    # plot ground truth curve as the last curve
    ground_truth = test_elec[meter_key]
    ground_truth = ground_truth.power_series(sample_period=sample_period)
    ground_truth = next(ground_truth)
    ground_truth.fillna(0, inplace=True)
    power = ground_truth.tolist()
    length = len(power)
    timestamps = list(range(length))

    i = 410
    x = []
    y = []
    z = []
    ci = int(255 / 410 * i)  # ci = "color index"
    for j in range(length):
        x.append([timestamps[j], timestamps[j]])  # timestamps
        y.append([i, i + 5])  # epochs
        z.append([power[j], power[j]])  # power
    data.append(
        dict(
            z=z,
            x=x,
            y=y,
            colorscale=[[i, 'rgb(%d,%d,255)' % (ci, ci)]
                        for i in np.arange(0, 1.1, 0.1)],
            showscale=False,
            type='surface',
        ))

    layout = dict(title='prediction over epochs',
                  showlegend=False,
                  scene=dict(xaxis=dict(title='timestamps'),
                             yaxis=dict(title='epochs'),
                             zaxis=dict(title='power'),
                             camera=dict(eye=dict(x=-1.7, y=-1.7, z=0.5))))

    fig = dict(data=data, layout=layout)
    plotly.offline.plot(fig, filename='filled-3d-lines')
Example #30
0
def fhmm(dataset_path, train_building, train_start, train_end, val_building,
         val_start, val_end, test_building, test_start, test_end, meter_key,
         sample_period):

    # Start tracking time
    start = time.time()

    # Prepare dataset and options
    # print("========== OPEN DATASETS ============")
    dataset_path = dataset_path
    train = DataSet(dataset_path)
    train.set_window(start=train_start, end=train_end)
    val = DataSet(dataset_path)
    val.set_window(start=val_start, end=val_end)
    test = DataSet(dataset_path)
    test.set_window(start=test_start, end=test_end)
    train_building = train_building
    test_building = test_building
    meter_key = meter_key

    sample_period = sample_period

    train_elec = train.buildings[train_building].elec
    val_elec = val.buildings[val_building].elec
    test_elec = test.buildings[test_building].elec

    appliances = [meter_key]
    selected_meters = [train_elec[app] for app in appliances]
    selected_meters.append(train_elec.mains())
    selected = MeterGroup(selected_meters)

    fhmm = FHMM()

    # print("========== TRAIN ============")
    fhmm.train(selected, sample_period=sample_period)

    # print("========== DISAGGREGATE ============")
    # Validation
    val_disag_filename = 'disag-out-val.h5'
    output = HDFDataStore(val_disag_filename, 'w')
    fhmm.disaggregate(val_elec.mains(), output_datastore=output)
    output.close()
    # Test
    test_disag_filename = 'disag-out-test.h5'
    output = HDFDataStore(test_disag_filename, 'w')
    fhmm.disaggregate(test_elec.mains(), output_datastore=output)
    output.close()

    # print("========== RESULTS ============")
    # Validation
    result_val = DataSet(val_disag_filename)
    res_elec_val = result_val.buildings[val_building].elec
    rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key],
                                                    val_elec[meter_key])

    val_metrics_results_dict = {
        'recall_score':
        rpaf_val[0],
        'precision_score':
        rpaf_val[1],
        'accuracy_score':
        rpaf_val[2],
        'f1_score':
        rpaf_val[3],
        'mean_absolute_error':
        metrics.mean_absolute_error(res_elec_val[meter_key],
                                    val_elec[meter_key]),
        'mean_squared_error':
        metrics.mean_square_error(res_elec_val[meter_key],
                                  val_elec[meter_key]),
        'relative_error_in_total_energy':
        metrics.relative_error_total_energy(res_elec_val[meter_key],
                                            val_elec[meter_key]),
        'nad':
        metrics.nad(res_elec_val[meter_key], val_elec[meter_key]),
        'disaggregation_accuracy':
        metrics.disaggregation_accuracy(res_elec_val[meter_key],
                                        val_elec[meter_key])
    }
    # Test
    result = DataSet(test_disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                                test_elec[meter_key])

    test_metrics_results_dict = {
        'recall_score':
        rpaf[0],
        'precision_score':
        rpaf[1],
        'accuracy_score':
        rpaf[2],
        'f1_score':
        rpaf[3],
        'mean_absolute_error':
        metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]),
        'mean_squared_error':
        metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]),
        'relative_error_in_total_energy':
        metrics.relative_error_total_energy(res_elec[meter_key],
                                            test_elec[meter_key]),
        'nad':
        metrics.nad(res_elec[meter_key], test_elec[meter_key]),
        'disaggregation_accuracy':
        metrics.disaggregation_accuracy(res_elec[meter_key],
                                        test_elec[meter_key])
    }

    # end tracking time
    end = time.time()

    time_taken = end - start  # in seconds

    # model_result_data = {
    #     'algorithm_name': 'FHMM',
    #     'datapath': dataset_path,
    #     'train_building': train_building,
    #     'train_start': str(train_start.date()) if train_start != None else None ,
    #     'train_end': str(train_end.date()) if train_end != None else None ,
    #     'test_building': test_building,
    #     'test_start': str(test_start.date()) if test_start != None else None ,
    #     'test_end': str(test_end.date()) if test_end != None else None ,
    #     'appliance': meter_key,
    #     'sampling_rate': sample_period,
    #
    #     'algorithm_info': {
    #         'options': {
    #             'epochs': None
    #         },
    #         'hyperparameters': {
    #             'sequence_length': None,
    #             'min_sample_split': None,
    #             'num_layers': None
    #         },
    #         'profile': {
    #             'parameters': None
    #         }
    #     },
    #
    #     'metrics':  metrics_results_dict,
    #
    #     'time_taken': format(time_taken, '.2f'),
    # }

    model_result_data = {
        'val_metrics': val_metrics_results_dict,
        'test_metrics': test_metrics_results_dict,
        'time_taken': format(time_taken, '.2f'),
        'epochs': None,
    }

    # Close digag_filename
    result.store.close()
    result_val.store.close()

    # Close Dataset files
    train.store.close()
    val.store.close()
    test.store.close()

    return model_result_data
Example #31
0
validation = pd.read_csv(val_logfile)
epochs = np.array(validation.as_matrix()[:, 0], dtype='int')
loss = np.array(validation.as_matrix()[:, 1], dtype='float32')
argmin = np.argmin(loss)
best_epoch = epochs[argmin] + 1
rnn.import_model(
    os.path.join(results_dir,
                 "UKDALE-RNN-{}-{}epochs.h5".format(meter_key, best_epoch)))
test_loss = rnn.evaluate(test_mains, test_meter, sample_period=sample_period)
line = 'Test loss: {}'.format(test_loss)
with open(results_file, "a") as text_file:
    text_file.write(line + '\n')
print(line)

disag_filename = 'disag-out.h5'
output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w')
rnn.disaggregate(test_mains,
                 output,
                 results_file,
                 train_meterlist[0],
                 sample_period=sample_period)
output.close()

print("========== PLOTS ============")
# plot train, validation and test loss
plot_loss(train_logfile, val_logfile, results_dir, best_epoch, test_loss)

# plot predicted energy consumption
result = DataSet(os.path.join(results_dir, disag_filename))
res_elec = result.buildings[test_building].elec
predicted = res_elec[meter_key]