def get_df_per_day(path_h5: str) -> pd.DataFrame: ''' Dada la ruta del h5 de AMPds devuelve un dataframe con los datos partidos por dia y por medidor. Parameters ---------- path_h5 : str DESCRIPTION. house : int, optional DESCRIPTION. The default is 1. Returns ------- None. ''' ds = nilmtk.DataSet(path_h5) data_meters = list(map(get_array_and_label, ds.buildings[1].elec.all_meters())) # Lo siguiente construye un dataframe multindex como el que se quiere n_days = data_meters[0][1].shape[1] #cantidad de dias days = pd.RangeIndex(0, n_days, name='sample') #construyo uno de los indices names = list(map(lambda x:x[0], data_meters)) #el otro indice correspondiente al nombre iterables = [names, days] #construccion del multindex columns = pd.MultiIndex.from_product(iterables, names=['name', 'day']) data = list(map(lambda x:x[1], data_meters)) data = np.hstack(data) df = pd.DataFrame(data, columns=columns) df = df.swaplevel(axis=1).sort_index(axis=1) return df
def __init__(self, activations, target_appliance, appliances, seq_length, filename, windows, sample_period, target_inclusion_prob=0.5, uniform_prob_of_selecting_each_building=True, allow_incomplete_target=True, include_incomplete_target_in_output=True, allow_multiple_target_activations_in_aggregate=False, include_multiple_targets_in_output=False, rng_seed=None): self.activations = deepcopy(activations) self.target_appliance = target_appliance self.appliances = appliances self.seq_length = seq_length self.filename = filename self.dataset = nilmtk.DataSet(self.filename) check_windows(windows) self.windows = windows self.sample_period = sample_period self.target_inclusion_prob = target_inclusion_prob self.uniform_prob_of_selecting_each_building = ( uniform_prob_of_selecting_each_building) self.allow_incomplete_target = allow_incomplete_target self.include_incomplete_target_in_output = ( include_incomplete_target_in_output) self.allow_multiple_target_activations_in_aggregate = ( allow_multiple_target_activations_in_aggregate) self.include_multiple_targets_in_output = ( include_multiple_targets_in_output) super(RealAggregateSource, self).__init__(rng_seed=rng_seed) self._load_mains_into_memory() self._remove_activations_with_no_mains() self._find_sections_with_no_target() self._compute_gap_probabilities()
def load_csvdata(self, data_path, numApp, typeLoad=0, num_sequences_per_batch=0, target_inclusion_prob=0.5):#appliances, filename, self.sample_period, windows ''' Parameters: data_path numApp to indicate wether all the appliances should be read or just one of them typeLoad: 0 usual in time split training|val|test 1 Kelly's load 2 combination with our own min ON Returns: totalX, totalY two dictionaries with the split of the X and Y in training, validation and testing ''' nilmkt_fileName = os.path.join(data_path, "redd.h5") if(typeLoad==1 and num_sequences_per_batch==0): print("Need to provide number of sequences per batch with kelly sampling") return if(typeLoad==1): numBatches = 1 #keep at 1 for now (1 batch with as many sequences as possible) assert self.windows['train'].keys() == self.windows['test'].keys() assert self.windows['val'].keys() == self.windows['test'].keys() for building_i, window in self.windows['train'].items(): #self.windows.items() #Reformat windows to work with kellys code #WINDOWS = { 'train': { 1: ("2013-02-01", "2013-11-30")} } #kellyWindow = {'train':{building_i:(window[0],window[1])}} totalX, totalY, stdIn, stdTar = kgd.getNILMbatches(self.sample_period, nilmkt_fileName, target_inclusion_prob, self.windows, self.listAppliances, self.pTrain, self.pVal, self.pTest, num_sequences_per_batch, self.time_steps, numApp) print(totalX['train'].shape, totalX['val'].shape, totalX['test'].shape, totalY['train'].shape, totalY['val'].shape, totalY['test'].shape) return totalX,totalY else:# 0 or 2 print("\tTypeLoad is 0 or 2") lenApps = len(self.listAppliances) shapeY = [0,self.time_steps,lenApps] # (batch, seqLen, apps) dataset = nilmtk.DataSet(nilmkt_fileName) if (numApp!=-1): lenApps = 1 shapeY = [0,self.time_steps] totalX = {'train':np.empty([0,self.time_steps]), 'val':np.empty([0,self.time_steps]), 'test':np.empty([0,self.time_steps])} totalY = {'train':np.empty(shapeY), 'val':np.empty(shapeY), 'test':np.empty(shapeY)} for building_i, window in self.windows.items(): dataBuild = self.all_building_data(dataset,building_i, window) allSetsBuild = self.prepare_data(dataBuild, numApp, building_i, typeLoad) totalX['train'] = np.concatenate((totalX['train'], allSetsBuild[0]),axis=0) totalX['val'] = np.concatenate((totalX['val'], allSetsBuild[1]),axis=0) totalX['test'] = np.concatenate((totalX['test'], allSetsBuild[2]),axis=0) totalY['train'] = np.concatenate((totalY['train'], allSetsBuild[3]),axis=0) totalY['val'] = np.concatenate((totalY['val'], allSetsBuild[4]),axis=0) totalY['test'] = np.concatenate((totalY['test'], allSetsBuild[5]),axis=0) print(totalX['train'].shape, totalX['val'].shape, totalX['test'].shape, totalY['train'].shape, totalY['val'].shape, totalY['test'].shape) return totalX, totalY
def get_nilmtk_meters(): HOUSE_1_APPLIANCES = [ 'fridge freezer', 'washer dryer', 'kettle', 'dish washer', 'microwave' ] ukdale = nilmtk.DataSet('/data/mine/vadeec/merged/ukdale.h5') ukdale.set_window("2013-04-12", "2013-05-12") elec = ukdale.buildings[1].elec meters = [] for appliance in HOUSE_1_APPLIANCES: meter = elec[appliance] meters.append(meter) meters = nilmtk.MeterGroup(meters) return meters
def _load_data_into_memory(self): logger.info("Loading NILMTK data...") # Load dataset dataset = nilmtk.DataSet(self.filename) for fold, buildings_and_windows in self.windows.iteritems(): for building_i, window in buildings_and_windows.iteritems(): dataset.set_window(*window) elec = dataset.buildings[building_i].elec building_name = (dataset.metadata['name'] + '_building_{}'.format(building_i)) # Mains logger.info("Loading data for {}...".format(building_name)) mains_meter = elec.mains() mains_good_sections = mains_meter.good_sections() appliance_meter = elec[self.target_appliance] good_sections = appliance_meter.good_sections( sections=mains_good_sections) def load_data(meter): return meter.power_series_all_data( sample_period=self.sample_period).astype( np.float32).dropna() #, #sections=good_sections).astype(np.float32).dropna() mains_data = load_data(mains_meter) appliance_data = load_data(appliance_meter) df = pd.DataFrame( { 'mains': mains_data, 'target': appliance_data }, dtype=np.float32).dropna() del mains_data del appliance_data if not df.empty: self.data.setdefault(fold, {})[building_name] = df logger.info("Loaded data from building {} for fold {}" " from {} to {}.".format(building_name, fold, df.index[0], df.index[-1])) dataset.store.close() logger.info("Done loading NILMTK mains data.")
def _load_mains_into_memory(self): logger.info("Loading NILMTK mains...") # Load dataset dataset = nilmtk.DataSet(self.filename) self.mains = {} self.mains_good_sections = {} for fold, buildings_and_windows in self.windows.iteritems(): for building_i, window in buildings_and_windows.iteritems(): dataset.set_window(*window) elec = dataset.buildings[building_i].elec building_name = ( dataset.metadata['name'] + '_building_{}'.format(building_i)) logger.info( "Loading mains for {}...".format(building_name)) mains_meter = elec.mains() good_sections = mains_meter.good_sections() mains_data = mains_meter.power_series_all_data( sample_period=self.sample_period, sections=good_sections).dropna() def set_mains_data(dictionary, data): dictionary.setdefault(fold, {})[building_name] = data if not mains_data.empty: set_mains_data(self.mains, mains_data) set_mains_data(self.mains_good_sections, good_sections) logger.info( "Loaded mains data from building {} for fold {}" " from {} to {}." .format(building_name, fold, mains_data.index[0], mains_data.index[-1])) dataset.store.close() logger.info("Done loading NILMTK mains data.")
def _load_mains_into_memory(self): logger.info("Loading NILMTK mains...") # Load dataset dataset = nilmtk.DataSet(self.filename, self.format) self.mains = {} self.mains_good_sections = {} self.target = {} for fold in self.fold: window = self.windows[fold][self.building_id] dataset.set_window(*window) elec = dataset.buildings[self.building_id].elec self.building_name = (dataset.metadata['name'] + '_building_{}'.format(self.building_id)) logger.info("Loading mains for {}...".format(self.building_name)) mains_meter = elec.mains() mains_data = mains_meter.power_series_all_data( sample_period=self.sample_period) target_data = defaultdict(lambda: np.array()) for label in self.appliances: target_data[label] = elec[label].power_series_all_data( sample_period=self.sample_period) def set_mains_data(dictionary, data): dictionary.setdefault(fold, {})[self.building_name] = data if not mains_data.empty and len(target_data.keys()): set_mains_data(self.mains, mains_data) set_mains_data(self.target, target_data) else: print('no available data') logger.info( "Loaded mains data from building {} for fold {} from {} to {}." .format(self.building_name, fold, mains_data.index[0], mains_data.index[-1])) dataset.store.close() logger.info("Done loading NILMTK mains data.")
def load_data_from_nilmtk_datasets(windows, dataset_paths, appliances, target_appliance_name, sample_period): data = {} data_good_sections = {} logger.info("Loading NILMTK data...") for dataset_name, folds in windows.items(): # Load dataset dataset = nilmtk.DataSet(dataset_paths[dataset_name]) for fold, buildings_and_windows in folds.items(): for building_i, windows_for_building in buildings_and_windows.items(): dataset.set_window(None, None) elec = dataset.buildings[building_i].elec building_name = ( dataset.metadata['name'] + '_building_{}'.format(building_i)) logger.info( "Loading data for {}...".format(building_name)) mains_meter = elec.mains() good_sections = get_effective_good_sections(mains_meter) appliance_aliases = appliances[dataset_name][target_appliance_name] appliance_meters = [] for meter in elec.meters: if meter.is_site_meter(): continue if len(meter.appliances) == 1: appliancetype = meter.appliances[0].type['type'] if appliancetype in appliance_aliases: appliance_meters.append(meter) else: append_meter = False for a in meter.appliances: if a.type['type'] in appliance_aliases: append_meter = True if append_meter: appliance_meters.append(meter) print(meter.appliances) if not appliance_meters: logger.info( "No {} found in {}".format(target_appliance_name, building_name)) continue if len(appliance_meters) > 1: appliance_metergroup = nilmtk.MeterGroup(meters=appliance_meters) else: appliance_metergroup = appliance_meters[0] data_good_sections.setdefault(fold, {})[building_name] = good_sections def load_data(meter): df = meter.power_series_all_data( sample_period=sample_period ) if df is not None: return df.astype(np.float32).dropna() else: return None dfs = [] for window in windows_for_building: if dataset_name == "ECO": dataset.store.window = TimeFrame(start=window[0], end=window[1], tz='GMT') else: if window is None: ipdb.set_trace() # Something has gone wrong...see what happend! dataset.set_window(*window) # does not work for ECO #ipdb.set_trace() mains_data = load_data(mains_meter) appliance_data = load_data(appliance_metergroup) if (mains_data is None) or (appliance_data is None): continue df = pd.DataFrame( {'mains': mains_data, 'target': appliance_data}, dtype=np.float32).dropna() del mains_data del appliance_data if not df.empty: dfs.append(df) df = pd.concat(dfs, axis=0) dfs = [] for gs in good_sections: dfslice = gs.slice(df) if not dfslice.empty: dfs.append(dfslice) df = pd.concat(dfs, axis=0) if not df.empty: data.setdefault(fold, {})[building_name] = df logger.info( "Loaded data from building {} for fold {}" " from {} to {}." .format(building_name, fold, df.index[0], df.index[-1])) dataset.store.close() logger.info("Done loading NILMTK data.") return data, data_good_sections
from pylab import rcParams import matplotlib.pyplot as plt import nilmtk as ntk import nilmtk.disaggregate as ntkd import nilmtk.metrics as ntkm rcParams['figure.figsize'] = (14, 6) plt.style.use('ggplot') # CONSTANTS===================================================================== h5_path = '/home/t7/Dropbox/Documents/TUDelft/Thesis/Datasets/DRED/DRED.h5' h5_path = r'C:\Users\davwang\Desktop\nilmtk\nilmtk\dataset_converters\dred\DRED.h5' # Load Data===================================================================== dred = ntk.DataSet(h5_path) # dred.set_window(start=None, end='2015-07-10 00:00:00') elec = dred.buildings[1].elec mains = elec.mains() # Train========================================================================== co = ntk.disaggregate.CombinatorialOptimisation() co.train(elec) # Disaggregate==================================================================== output = ntk.HDFDataStore(h5_path + 'outputDRED.h5', 'w') co.disaggregate(mains, output) output.close() # Metrics==========================================================================
def load_nilmtk_activations(appliances, filename, sample_period, windows): """ Parameters ---------- appliances : list of strings filename : string sample_period : int windows : dict Structure example: { 'train': {<building_i>: <window>}, 'unseen_activations_of_seen_appliances': {<building_i>: <window>}, 'unseen_appliances': {<building_i>: <window>} } Returns ------- all_activations : dict Structure example: {<train | unseen_appliances | unseen_activations_of_seen_appliances>: { <appliance>: { <building_name>: [<activations>] }}} Each activation is a pd.Series with DatetimeIndex and the following metadata attributes: building, appliance, fold. """ logger.info("Loading NILMTK activations...") # Sanity check check_windows(windows) # Load dataset dataset = nilmtk.DataSet(filename) all_activations = {} for fold, buildings_and_windows in list(windows.items()): activations_for_fold = {} for building_i, window in list(buildings_and_windows.items()): dataset.set_window(*window) elec = dataset.buildings[building_i].elec building_name = (dataset.metadata['name'] + '_building_{}'.format(building_i)) for appliance in appliances: logger.info("Loading {} for {}...".format( appliance, building_name)) # Get meter for appliance try: meter = elec[appliance] except KeyError as exception: logger.info(building_name + " has no " + appliance + ". Full exception: {}".format(exception)) continue # Get activations_for_fold and process them meter_activations = meter.get_activations( sample_period=sample_period) meter_activations = [ activation.astype(np.float32) for activation in meter_activations ] # Attach metadata for activation in meter_activations: activation._metadata = copy(activation._metadata) activation._metadata.extend( ["building", "appliance", "fold"]) activation.building = building_name activation.appliance = appliance activation.fold = fold # Save if meter_activations: activations_for_fold.setdefault( appliance, {})[building_name] = meter_activations logger.info("Loaded {} {} activations from {}.".format( len(meter_activations), appliance, building_name)) all_activations[fold] = activations_for_fold dataset.store.close() logger.info("Done loading NILMTK activations.") return all_activations
def _load_data_into_memory(self): logger.info("Loading NILMTK data...") # Load dataset dataset = nilmtk.DataSet(self.filename) for fold, buildings_and_windows in self.windows.iteritems(): for building_i, window in buildings_and_windows.iteritems(): dataset.set_window(*window) elec = dataset.buildings[building_i].elec """appliances = elec.get_labels(list(elec.identifier.meters)) meter_complete = True for appliance_name in self.appliances: if appliance_name.title() not in appliances: meter_complete=False if not meter_complete: continue""" building_name = (dataset.metadata['name'] + '_building_{}'.format(building_i)) # Mains logger.info("Loading data for {}...".format(building_name)) mains_meter = elec.mains() good_sections = mains_meter.good_sections() good_sections = elec[self.target_appliance].good_sections( sections=good_sections) if len(good_sections) < 1: continue def load_data(meter): return meter.power_series_all_data( sample_period=self.sample_period, sections=good_sections) power_series_data = defaultdict(lambda: np.array()) power_series_data['mains'] = load_data(mains_meter) main_index = power_series_data['mains'].index is_valid = True for appliance_name in self.appliances: appliance_meter = elec[appliance_name] power_series_data[appliance_name] = load_data( appliance_meter) if power_series_data[appliance_name] is None: is_valid = False break power_series_data[appliance_name] = power_series_data[ appliance_name].loc[main_index] appliance_index = power_series_data[appliance_name].index main_index = main_index.intersection(appliance_index) if not is_valid: continue for meter in power_series_data.keys(): power_series_data[meter] = power_series_data[meter].astype( np.float32).loc[main_index].values for meter in power_series_data.keys(): if power_series_data[meter].shape != power_series_data[ 'mains'].shape: is_valid = False break if not is_valid: continue df = pd.DataFrame(power_series_data, dtype=np.float32).dropna() if not df.empty: self.data.setdefault(fold, {})[building_name] = df logger.info("Loaded data from building {} for fold {}" " from {} to {}.".format(building_name, fold, df.index[0], df.index[-1])) dataset.store.close() logger.info("Done loading NILMTK mains data.")
def load_csvdata(self, data_path, numApp, typeLoad=0, num_sequences_per_batch=0, target_inclusion_prob=0.5 ): #appliances, filename, self.sample_period, windows ''' Parameters: data_path numApp to indicate wether all the appliances should be read or just one of them typeLoad: 0 usual in time split training|val|test 1 Kelly's load 2 combination with our own min ON Returns: totalX, totalY two dictionaries with the split of the X and Y in training, validation and testing ''' nilmkt_fileName = os.path.join(data_path, "ukdale.h5") if (typeLoad == 1 and num_sequences_per_batch == 0): print( "Need to provide number of sequences per batch with kelly sampling" ) return if (typeLoad == 1): numBatches = 1 #keep at 1 for now (1 batch with as many sequences as possible) assert self.windows['train'].keys() == self.windows['test'].keys() assert self.windows['val'].keys() == self.windows['test'].keys() for building_i, window in self.windows['train'].items( ): #self.windows.items() #Reformat windows to work with kellys code #WINDOWS = { 'train': { 1: ("2013-02-01", "2013-11-30")} } #kellyWindow = {'train':{building_i:(window[0],window[1])}} if (numApp != -1): truFileName = data_path + "/pickles/" + str( building_i ) + '_' + self.listAppliances[numApp] + '_' + str( self.sample_period) + '_' + str( num_sequences_per_batch ) + '_' + window[0] + '_' + window[1] #fileName[pos:] else: truFileName = data_path + "/pickles/" + str( building_i) + '_' + 'all' + '_' + str( self.sample_period) + '_' + str( num_sequences_per_batch ) + '_' + window[0] + '_' + window[1] try: total = pickle.load( open(truFileName + "_building_k.pickle", "rb")) totalX = total[0] totalY = total[1] except (OSError, IOError) as e: totalX, totalY, stdIn, stdTar = kgd.getNILMbatches( self.sample_period, nilmkt_fileName, target_inclusion_prob, self.windows, self.listAppliances, self.pTrain, self.pVal, self.pTest, num_sequences_per_batch, self.time_steps, numApp) #this assumes you have a "pickles" directory at the same level as this file with open(truFileName + "_building_k.pickle", 'wb') as fX: pickle.dump([totalX, totalY], fX) print(totalX['train'].shape, totalX['val'].shape, totalX['test'].shape, totalY['train'].shape, totalY['val'].shape, totalY['test'].shape) return totalX, totalY else: # 0 or 2 lenApps = len(self.listAppliances) shapeY = [0, self.time_steps, lenApps] # (batch, seqLen, apps) dataset = nilmtk.DataSet(nilmkt_fileName) if (numApp != -1): lenApps = 1 shapeY = [0, self.time_steps] totalX = { 'train': np.empty([0, self.time_steps]), 'val': np.empty([0, self.time_steps]), 'test': np.empty([0, self.time_steps]) } totalY = { 'train': np.empty(shapeY), 'val': np.empty(shapeY), 'test': np.empty(shapeY) } for building_i, window in self.windows.items(): if (numApp != -1): truFileName = data_path + "/pickles/" + str( building_i ) + '_' + self.listAppliances[numApp] + '_' + str( self.stride_input) + '_' + window[0] + '_' + window[ 1] #fileName[pos:] else: truFileName = data_path + "/pickles/" + str( building_i) + '_' + 'all' + '_' + str( self.stride_input ) + '_' + window[0] + '_' + window[1] try: dataBuild = pickle.load( open(truFileName + "_building.pickle", "rb")) except (OSError, IOError) as e: dataBuild = self.all_building_data(dataset, building_i, window) #this assumes you have a "pickles" directory at the same level as this file with open(truFileName + "_building.pickle", 'wb') as fX: pickle.dump(dataBuild, fX) allSetsBuild = self.prepare_data(dataBuild, numApp, building_i, typeLoad) totalX['train'] = np.concatenate( (totalX['train'], allSetsBuild[0]), axis=0) totalX['val'] = np.concatenate( (totalX['val'], allSetsBuild[1]), axis=0) totalX['test'] = np.concatenate( (totalX['test'], allSetsBuild[2]), axis=0) totalY['train'] = np.concatenate( (totalY['train'], allSetsBuild[3]), axis=0) totalY['val'] = np.concatenate( (totalY['val'], allSetsBuild[4]), axis=0) totalY['test'] = np.concatenate( (totalY['test'], allSetsBuild[5]), axis=0) print(totalX['train'].shape, totalX['val'].shape, totalX['test'].shape, totalY['train'].shape, totalY['val'].shape, totalY['test'].shape) return totalX, totalY
from __future__ import print_function, division import numpy as np import pandas as pd import nilmtk dataset = nilmtk.DataSet('/data/mine/vadeec/merged/ukdale.h5') dataset.set_window("2014-01-01", "2014-06-01") elec = dataset.buildings[1].elec washer = elec['washer dryer'] washer_activations = washer.get_activations() activation = washer_activations[4] activation = activation.clip(lower=0, upper=2068) activation.name = 'watts' PERIOD = 6 activation.index = np.arange(0, len(activation) * PERIOD, PERIOD) segment_indicies = np.where(np.abs(activation.diff()) > 1200)[0] segment_indicies = np.concatenate((segment_indicies, [len(activation)])) smoothed = pd.Series(0, index=activation.index, name='watts') prev_i = 0 for i in segment_indicies: smoothed.iloc[prev_i:i] = activation.iloc[prev_i:i].mean() prev_i = i def to_int(data): return data.round().astype(int)
def __init__(self, **config): if 'filename' not in config.keys(): self.dataSet = nilmtk.DataSet("ukdale.h5") else: self.dataSet = nilmtk.DataSet(config['fileName']) if 'startTime' not in config.keys() or 'endTime' not in config.keys(): self.dataSet.set_window("2012-11-01", "2015-01-31") else: self.dataSet.set_window(config['startTime'], config['endTime']) if 'trainBuildings' not in config.keys(): self.trainBuildings = [1, 3, 4, 5] else: self.trainBuildings = config['trainBuildings'] if 'testBuildings' not in config.keys(): self.testBuildings = [2] else: self.testBuildings = config['testBuildings'] if 'applications' not in config.keys(): raise KeyError("please input applications") self.applications = config['applications'] if 'targetapplication' not in config.keys(): raise KeyError("please input targetapplication") self.targetApplication = config['targetapplication'] if 'randSeed' not in config.keys(): randSeed = 0 else: randSeed = config['randSeed'] self.otherApplications = [ i for i in self.applications if i not in [self.targetApplication] ] self.allBuildings = set(self.trainBuildings + self.testBuildings) self.window = 599 self.inputSeqs = [] self.targetSeqs = [] self.rng = np.random.RandomState(randSeed) activationConfig = { 'fridge': { 'min_off_duration': 18, # 12 in paper here 'min_on_duration': 60, 'on_power_threshold': 50, 'sample_period': 6, }, 'kettle': { 'min_off_duration': 18, # 0 in paper here 'min_on_duration': 12, 'on_power_threshold': 2000, 'sample_period': 6, }, 'washing machine': { 'min_off_duration': 160, 'min_on_duration': 1800, 'on_power_threshold': 20, 'sample_period': 6, }, 'microwave': { 'min_off_duration': 30, 'min_on_duration': 12, 'on_power_threshold': 200, 'sample_period': 6, }, 'dish washer': { 'min_off_duration': 1800, 'min_on_duration': 1800, 'on_power_threshold': 10, 'sample_period': 6, } } self.elecMains = {} self.goodSections = {} for building in self.allBuildings: self.goodSections[building] = self.dataSet.buildings[ building].elec.mains().good_sections() self.elecMains[building] = self.dataSet.buildings[ building].elec.mains().power_series_all_data( sample_period=6, sections=self.goodSections[building]).dropna() self.numApp = {} self.elecApp = {} self.activationsApp = {} self.activationAppSections = {} for app in self.applications: self.elecApp[app] = {} self.activationsApp[app] = {} self.numApp[app] = 0 self.activationAppSections[app] = {} for building in self.allBuildings: try: self.elecApp[app][building] = self.dataSet.buildings[ building].elec[app].power_series_all_data( sample_period=6).dropna() self.activationsApp[app][ building] = self.dataSet.buildings[building].elec[ app].get_activations(**activationConfig[app]) self.activationsApp[app][building] = [ activation.astype(np.float32) for activation in self.activationsApp[app][building] ] self.numApp[app] += len(self.activationsApp[app][building]) self.activationAppSections[app][building] = TimeFrameGroup( ) for activation in self.activationsApp[app][building]: self.activationAppSections[app][building].append( TimeFrame(activation.index[0], activation.index[-1])) except KeyError as exception: logger.info( str(building) + " has no " + app + ". Full exception: {}".format(exception)) continue logger.info("Done loading NILMTK data.") for building in self.allBuildings: activationsToRemove = [] try: activations = self.activationsApp[ self.targetApplication][building] mains = self.elecMains[building] for i, activation in enumerate(activations): activationDuration = (activation.index[-1] - activation.index[0]) start = (activation.index[0] - activationDuration) end = (activation.index[-1] + activationDuration) if start < mains.index[0] or end > mains.index[-1]: activationsToRemove.append(i) else: mainsForAct = mains[start:end] if not self._hasSufficientSamples( start, end, mainsForAct): activationsToRemove.append(i) activationsToRemove.reverse() for i in activationsToRemove: activations.pop(i) self.activationsApp[ self.targetApplication][building] = activations except KeyError as exception: continue self.sectionsWithNoTarget = {} for building in self.allBuildings: try: activationsTarget = self.activationsApp[ self.targetApplication][building] mainGoodSections = self.goodSections[building] mains = self.elecMains[building] gapsBetweenActivations = TimeFrameGroup() prev = mains.index[0] for activation in activationsTarget: try: p2 = prev gapsBetweenActivations.append( TimeFrame(prev, activation.index[0])) prev = activation.index[-1] p1 = activation.index[0] except ValueError: logger.debug("----------------------") logger.debug(p1) logger.debug(p2) logger.debug(activation.index[0]) logger.debug(activation.index[-1]) gapsBetweenActivations.append(TimeFrame(prev, mains.index[-1])) intersection = gapsBetweenActivations.intersection( mainGoodSections) intersection = intersection.remove_shorter_than(6 * self.window) self.sectionsWithNoTarget[building] = intersection except KeyError: continue
def __setstate__(self, dict): self.__dict__ = dict self.dataset = nilmtk.DataSet(dict['filename'])
def load_nilmtk_activations( dataset_paths, target_appliance_name, appliance_names, on_power_threshold, min_on_duration, min_off_duration, sample_period, windows, sanity_check=1 ): """ Parameters ---------- windows : dict Structure example: { 'UKDALE': { 'train': {<building_i>: <window>}, 'unseen_activations_of_seen_appliances': {<building_i>: <window>}, 'unseen_appliances': {<building_i>: <window>} } } Returns ------- all_activations : dict Structure example: {<train | unseen_appliances | unseen_activations_of_seen_appliances>: { <appliance>: { <building_name>: [<activations>] }}} Each activation is a pd.Series with DatetimeIndex and the following metadata attributes: building, appliance, fold. """ logger.info("Loading NILMTK activations...") if sanity_check: # Sanity check for dataset in windows: check_windows(windows[dataset]) all_activations = {} for dataset_name, folds in windows.items(): # Load dataset dataset = nilmtk.DataSet(dataset_paths[dataset_name]) appliance_aliases = appliance_names[dataset_name][target_appliance_name] for fold, buildings_and_windows in folds.items(): logger.info( "Loading activations for fold {}.....".format(fold)) for building_i, windows_for_building in buildings_and_windows.items(): #dataset.set_window(*window) elec = dataset.buildings[building_i].elec building_name = ( dataset.metadata['name'] + '_building_{}'.format(building_i)) appliance_meters = [] for meter in elec.meters: if meter.is_site_meter(): continue append_meter = False for a in meter.appliances: if a.type['type'] in appliance_aliases: append_meter = True if append_meter: appliance_meters.append(meter) print(meter.appliances) if not appliance_meters: logger.info( "No {} found in {}".format(target_appliance_name, building_name)) continue #if appliance_meters: if len(appliance_meters) > 1: meter = nilmtk.MeterGroup(meters=appliance_meters) else: meter = appliance_meters[0] logger.info( "Loading {} for {}...".format(target_appliance_name, building_name)) meter_activations = [] for window in windows_for_building: if dataset_name == "ECO": dataset.store.window = TimeFrame(start=window[0], end=window[1], tz='GMT') else: dataset.set_window(*window) # does not work for ECO # Get activations_for_fold and process them meter_activations_for_building = meter.get_activations( sample_period=sample_period, min_off_duration=min_off_duration, min_on_duration=min_on_duration, on_power_threshold=on_power_threshold, resample_kwargs={'fill_method': 'ffill', 'how': 'mean', 'limit': 20}) #meter_activations_for_building = [activation.astype(np.float32) # for activation in meter_activations_for_building] meter_activations.extend(meter_activations_for_building) # Attach metadata #for activation in meter_activations: # activation._metadata = copy(activation._metadata) # activation._metadata.extend( # ["building", "appliance", "fold"]) # activation.building = building_name # activation.appliance = appliance # activation.fold = fold # Save if meter_activations: all_activations.setdefault( fold, {}).setdefault( target_appliance_name, {})[building_name] = meter_activations logger.info( "Loaded {} {} activations from {}." .format(len(meter_activations), target_appliance_name, building_name)) dataset.store.close() logger.info("Done loading NILMTK activations.") return all_activations
def load_nilmtk_activations(appliances, filename, sample_period, windows, on_power_thresholds=None, min_on_durations=None, min_off_durations=None, sanity_check=1): """ Parameters ---------- appliances : list of strings filename : string sample_period : int windows : dict Structure example: { 'train': {<building_i>: <window>}, 'unseen_activations_of_seen_appliances': {<building_i>: <window>}, 'unseen_appliances': {<building_i>: <window>} } Returns ------- all_activations : dict Structure example: {<train | unseen_appliances | unseen_activations_of_seen_appliances>: { <appliance>: { <building_name>: [<activations>] }}} Each activation is a pd.Series with DatetimeIndex and the following metadata attributes: building, appliance, fold. """ logger.info("Loading NILMTK activations...") # check whether optional parameters are provided and if so, are of the same length as `appliances` # if not provided build a list of the same size as `appliances` and fill it with None entries. if (on_power_thresholds is not None) and (len(on_power_thresholds) != len(appliances)): raise ValueError("`on_power_thresholds` must have the same size as `appliances` ") elif on_power_thresholds is None: on_power_thresholds = [None for i in range(len(appliances))] if (min_on_durations is not None) and (len(min_on_durations) != len(appliances)): raise ValueError("`min_on_durations` must have the same size as `appliances` ") elif min_on_durations is None: min_on_durations = [None for i in range(len(appliances))] if (min_off_durations is not None) and (len(min_off_durations) != len(appliances)): raise ValueError("`min_off_durations` must have the same size as `appliances` ") elif min_off_durations is None: min_off_durations = [None for i in range(len(appliances))] if sanity_check: # Sanity check check_windows(windows) # Load dataset dataset = nilmtk.DataSet(filename) all_activations = {} for fold, buildings_and_windows in windows.items(): logger.info( "Loading activations for fold {}.....".format(fold)) activations_for_fold = {} for building_i, window in buildings_and_windows.items(): dataset.set_window(*window) elec = dataset.buildings[building_i].elec building_name = ( dataset.metadata['name'] + '_building_{}'.format(building_i)) for i, appliance in enumerate(appliances): logger.info( "Loading {} for {}...".format(appliance, building_name)) # Get meter for appliance try: meter = elec[appliance] except KeyError as exception: logger.info(building_name + " has no " + appliance + ". Full exception: {}".format(exception)) continue # Get activations_for_fold and process them meter_activations = meter.get_activations( sample_period=sample_period, min_off_duration=min_off_durations[i], min_on_duration=min_on_durations[i], on_power_threshold=on_power_thresholds[i]) meter_activations = [activation.astype(np.float32) for activation in meter_activations] # Attach metadata for activation in meter_activations: activation._metadata = copy(activation._metadata) activation._metadata.extend( ["building", "appliance", "fold"]) activation.building = building_name activation.appliance = appliance activation.fold = fold # Save if meter_activations: activations_for_fold.setdefault( appliance, {})[building_name] = meter_activations logger.info( "Loaded {} {} activations from {}." .format(len(meter_activations), appliance, building_name)) all_activations[fold] = activations_for_fold dataset.store.close() logger.info("Done loading NILMTK activations.") return all_activations
# Weather data store WEATHER_DATA_STORE = os.path.expanduser("~/git/nilm-actionable/data/hvac/weather_2013.h5") weather_data_df = pd.HDFStore(WEATHER_DATA_STORE)["/weather"] df = pd.read_csv(os.path.join(script_path, "../../data/total/survey_2013.csv")) cols = ['programmable_thermostat_currently_programmed', 'temp_summer_weekday_workday', 'temp_summer_weekday_morning', 'temp_summer_weekday_evening', 'temp_summer_sleeping_hours_hours'] from copy import deepcopy cols_plus_data_id = deepcopy(cols) cols_plus_data_id.insert(0, "dataid") df = df[cols_plus_data_id].dropna() survey_homes = df.dataid.values ds = nilmtk.DataSet(os.path.expanduser("~/wikienergy-2013.h5")) nilmtk_to_dataid = {num: building.metadata["original_name"] for num, building in ds.buildings.iteritems()} dataid_to_nilmtk={v:k for k, v in nilmtk_to_dataid.iteritems()} function_map = {"binary": fcn2min_time_fixed_binary, "minutes": fcn2min_time_fixed} results = {} for folder in to_consider[:]: results[folder]={} output = {"binary": {}, "minutes": {}} algo = folder.split("_")[-1] print algo, folder
from __future__ import print_function import sip import nilmtk import matplotlib.pyplot as plt import pandas as pd dataset = nilmtk.DataSet('ukdale.h5') #dataset.set_window(start="6-4-2013") #dataset.set_window(end="30-1-2013") #dataset.set_window(start="6-11-2014",end="13-11-2014") BUILDING = 2 elec = dataset.buildings[BUILDING].elec gt = {} sample_period = 6 for i, chunk in enumerate(elec.mains().load(sample_period=sample_period)): chunk_drop_na = chunk.dropna() gt[i] = {} for meter in elec.submeters().select_using_appliances(type=[ 'kettle', 'fridge', 'microwave', 'dish washer', 'washing machine' ]).meters: # Only use the meters that we trained on (this saves time!) gt[i][meter] = meter.load(sample_period=sample_period).next() gt[i] = pd.DataFrame({k: v.squeeze() for k, v in gt[i].iteritems()}, index=gt[i].values()[0].index).dropna()