def test_jointly(self,d): # store the test_main readings for all buildings for dataset in d: print("Loading data for ",dataset, " dataset") test=DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: test.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) test_mains=next(test.buildings[building].elec.mains().load(physical_quantity='power', ac_type='apparent', sample_period=self.sample_period)) if self.DROP_ALL_NANS and self.site_only: test_mains, _= self.dropna(test_mains,[]) if self.site_only != True: appliance_readings=[] for appliance in self.appliances: test_df=next((test.buildings[building].elec[appliance].load(physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) appliance_readings.append(test_df) if self.DROP_ALL_NANS: test_mains , appliance_readings = self.dropna(test_mains,appliance_readings) if self.artificial_aggregate: print ("Creating an Artificial Aggregate") test_mains = pd.DataFrame(np.zeros(appliance_readings[0].shape),index = appliance_readings[0].index,columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_mains+=app_reading for i, appliance_name in enumerate(self.appliances): self.test_submeters.append((appliance_name,[appliance_readings[i]])) self.test_mains = [test_mains] self.storing_key = str(dataset) + "_" + str(building) self.call_predict(self.classifiers, test.metadata["timezone"])
def test_jointly(self, d): # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) test_mains = next(test.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) appliance_readings = [] for appliance in self.appliances: test_df = next((test.buildings[building].elec.submeters( ).select_using_appliances(type=appliance).load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) appliance_readings.append(test_df) if self.DROP_ALL_NANS: test_df, appliance_readings = self.dropna( test_df, appliance_readings) self.test_mains = [test_df] for i, appliance_name in enumerate(self.appliances): self.test_submeters.append( (appliance_name, [appliance_readings[i]])) self.call_predict(self.classifiers)
def test_jointly(self, d): # Store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) test_mains = next(test.buildings[building].elec.mains().load( physical_quantity='power', ac_type=all_type_power, sample_period=self.sample_period)) test_mains['active'] = test_mains['power']['active'] test_mains['apparent'] = test_mains['power']['apparent'] test_mains['reactive'] = np.sqrt( test_mains['power']['apparent']**2 - test_mains['power']['active']**2) test_mains.drop(['power'], axis=1, inplace=True) test_mains = test_mains[self.power['mains']] ''' train_df = next(train.buildings[building].elec.mains().load(physical_quantity='power', ac_type= all_type_power,sample_period = self.sample_period)) train_df['active'] = train_df['power']['active'] train_df['apparent'] = train_df['power']['apparent'] train_df['reactive'] = np.sqrt(train_df['power']['apparent']**2 - train_df['power']['active']**2) train_df.drop(['power'], axis = 1,inplace = True) train_df = train_df[self.power['mains']] ''' appliance_readings = [] for appliance in self.appliances: test_df = next( (test.buildings[building].elec[appliance].load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) appliance_readings.append(test_df) if self.DROP_ALL_NANS: test_mains, appliance_readings = self.dropna( test_mains, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") test_mains = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_mains += app_reading self.test_mains = [test_mains] for i, appliance_name in enumerate(self.appliances): self.test_submeters.append( (appliance_name, [appliance_readings[i]])) self.storing_key = str(dataset) + "_" + str(building) self.call_predict(self.classifiers)
def train_jointly(self, clf, d): # This function has a few issues, which should be addressed soon print("............... Loading Data for training ...................") # store the train_main readings for all buildings self.train_mains = [] self.train_submeters = [[] for i in range(len(self.appliances))] for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) train_df = next(train.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) train_df = train_df[[list(train_df.columns)[0]]] appliance_readings = [] for appliance_name in self.appliances: appliance_df = next( train.buildings[building].elec[appliance_name].load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) appliance_df = appliance_df[[ list(appliance_df.columns)[0] ]] appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropna( train_df, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") train_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: train_df += app_reading self.train_mains.append(train_df) for i, appliance_name in enumerate(self.appliances): self.train_submeters[i].append(appliance_readings[i]) appliance_readings = [] for i, appliance_name in enumerate(self.appliances): appliance_readings.append( (appliance_name, self.train_submeters[i])) self.train_submeters = appliance_readings clf.partial_fit(self.train_mains, self.train_submeters)
def train_jointly(self, clf, d): # This function has a few issues, which should be addressed soon print("............... Loading Data for training ...................") # store the train_main readings for all buildings self.train_mains = pd.DataFrame() self.train_submeters = [ pd.DataFrame() for i in range(len(self.appliances)) ] for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) train_df = next(train.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) appliance_readings = [] for appliance_name in self.appliances: appliance_df = next( train.buildings[building].elec.submeters( ).select_using_appliances(type=appliance_name).load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropna( train_df, appliance_readings) self.train_mains = self.train_mains.append(train_df) for i, appliance_name in enumerate(self.appliances): self.train_submeters[i] = self.train_submeters[i].append( appliance_readings[i]) appliance_readings = [] for i, appliance_name in enumerate(self.appliances): appliance_readings.append( (appliance_name, [self.train_submeters[i]])) self.train_mains = [self.train_mains] self.train_submeters = appliance_readings clf.partial_fit(self.train_mains, self.train_submeters)
def train_chunk_wise(self, clf, d, current_epoch): """ This function loads the data from buildings and datasets with the specified chunk size and trains on each of them. """ for dataset in d: # Loading the dataset print("Loading data for ",dataset, " dataset") for building in d[dataset]['buildings']: # Loading the building train=DataSet(d[dataset]['path']) print("Loading building ... ",building) train.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) mains_iterator = train.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period) appliance_iterators = [train.buildings[building].elec[app_name].load(chunksize = self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances] print(train.buildings[building].elec.mains()) for chunk_num,chunk in enumerate (train.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period)): # Loading the chunk for the specifeid building #Dummry loop for executing on outer level. Just for looping till end of a chunk print("Starting enumeration..........") train_df = next(mains_iterator) appliance_readings = [] for i in appliance_iterators: try: appliance_df = next(i) except StopIteration: appliance_df = pd.DataFrame() appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropna(train_df, appliance_readings) if self.artificial_aggregate: print ("Creating an Artificial Aggregate") train_df = pd.DataFrame(np.zeros(appliance_readings[0].shape),index = appliance_readings[0].index,columns=appliance_readings[0].columns) for app_reading in appliance_readings: train_df+=app_reading train_appliances = [] for cnt,i in enumerate(appliance_readings): train_appliances.append((self.appliances[cnt],[i])) self.train_mains = [train_df] self.train_submeters = train_appliances clf.partial_fit(self.train_mains, self.train_submeters, current_epoch) print("...............Finished the Training Process ...................")
def test_chunk_wise(self,d): print("...............Started the Testing Process ...................") for dataset in d: print("Loading data for ",dataset, " dataset") for building in d[dataset]['buildings']: test=DataSet(d[dataset]['path']) test.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) mains_iterator = test.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period) appliance_iterators = [test.buildings[building].elec[app_name].load(chunksize = self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances] for chunk_num,chunk in enumerate (test.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period)): test_df = next(mains_iterator) appliance_readings = [] for i in appliance_iterators: try: appliance_df = next(i) except StopIteration: appliance_df = pd.DataFrame() appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: test_df, appliance_readings = self.dropna(test_df, appliance_readings) if self.artificial_aggregate: print ("Creating an Artificial Aggregate") test_df = pd.DataFrame(np.zeros(appliance_readings[0].shape),index = appliance_readings[0].index,columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_df+=app_reading test_appliances = [] for cnt,i in enumerate(appliance_readings): test_appliances.append((self.appliances[cnt],[i])) self.test_mains = [test_df] self.test_submeters = test_appliances print("Results for Dataset {dataset} Building {building} Chunk {chunk_num}".format(dataset=dataset,building=building,chunk_num=chunk_num)) self.storing_key = str(dataset) + "_" + str(building) + "_" + str(chunk_num) self.call_predict(self.classifiers, test.metadata['timezone'])
def load_datasets(self): self.store_classifier_instances() d = self.train_datasets_dict print("............... Loading Data for training ...................") # store the train_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) self.train_mains = self.train_mains.append( next(train.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period))) # store train submeters reading train_buildings = pd.DataFrame() for appliance in self.appliances: train_df = pd.DataFrame() print("For appliance .. ", appliance) for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) # store data for submeters train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) train_df = train_df.append( next(train.buildings[building].elec.submeters(). select_using_appliances(type=appliance).load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) self.train_submeters.append((appliance, [train_df])) # create instance of the training methods # train models # store data for mains self.train_mains = [self.train_mains] self.call_partial_fit() d = self.test_datasets_dict # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) self.test_mains = (next( test.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period))) self.test_submeters = [] for appliance in self.appliances: test_df = next((test.buildings[building].elec.submeters( ).select_using_appliances(type=appliance).load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) self.test_submeters.append((appliance, [test_df])) self.test_mains = [self.test_mains] self.call_predict(self.classifiers)
def test_jointly(self, d): # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) self.sec_dict = {} for building in d[dataset]['buildings']: self.test_mains = [] self.test_submeters = [[] for i in range(len(self.appliances))] test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) test_meter = test.buildings[building].elec.mains() good_sections = test.buildings[building].elec.mains( ).good_sections() # self.test_sections = good_sections main_df = next( test_meter.load(physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) main_df_list = get_sections_df(main_df, good_sections) # train_df appliance_readings = [] for appliance_name in self.appliances: app_meter = test.buildings[building].elec[appliance_name] if building not in self.sec_dict: self.sec_dict[building] = get_sections_df_2( good_sections, app_meter.good_sections()) app_df = next( app_meter.load(physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) main_df_list = [ main_df[sec[0]:sec[1]] for sec in self.sec_dict[building] ] app_df_list = [ app_df[sec[0]:sec[1]] for sec in self.sec_dict[building] ] appliance_readings.append(app_df_list) if self.DROP_ALL_NANS: main_df_list, appliance_readings = self.dropna( main_df_list, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") test_mains = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_mains += app_reading print("Test Jointly") self.test_mains = (main_df_list) test_submeters = appliance_readings.copy() for j, appliance_name in enumerate(self.appliances): if self.isState: for i, app_df in enumerate(appliance_readings[j]): _, test_submeters[j][i] = get_activations( app_df, config['threshold'][appliance_name]) self.test_submeters[j] = (appliance_name, test_submeters[j]) self.storing_key = str(dataset) + "_" + str(building) self.call_predict(self.classifiers, building)
return colors # Maps from human-readable name to path DATASETS = OrderedDict() DATASETS['REDD'] = join(DATASET_PATH, 'redd/low_freq') # DATASETS['Smart*'] = join(DATASET_PATH, 'smart') #DATASETS['Pecan Street'] = join(DATASET_PATH, 'pecan_1min') # DATASETS['AMPds'] = join(DATASET_PATH, 'ampds') DATASETS['iAWE'] = join(DATASET_PATH, 'iawe') DATASETS['UKPD'] = '/data/mine/vadeec/h5_cropped' if LOAD_DATASETS: electrics = {} proportions = {} for dataset_name, dataset_path in DATASETS.iteritems(): dataset = DataSet() print("Loading", dataset_path) dataset.load_hdf5(dataset_path, [1]) building = dataset.buildings[1] electric = building.utility.electric electrics[dataset_name] = electric proportions[dataset_name] = proportion_per_appliance(electric, merge=True) def pretty_name_appliance_names(appliance_name_list): names = [name.replace('_', ' ') for name in appliance_name_list] names = [name[0].upper() + name[1:] for name in names] for old, new in [('Htpc', 'Home theatre PC'), ('Boiler', 'Gas boiler'), ('Air conditioner', 'Air conditioning')]: try:
COLUMNS['uptime'] = """Mains up-time\\\\per building\\\\(days)""" COLUMNS['proportion_up'] = """Percentage\\\\up-time""" # COLUMNS['prop_timeslices'] = ("""% timeslices\\\\where energy\\\\""" # """submetered > 70%""") for key, value in COLUMNS.iteritems(): if OUTPUT_LATEX: COLUMNS[key] = """\textbf{\specialcell[h]{""" + value + """}}""" else: COLUMNS[key] = key stats_df = pd.DataFrame(index=DATASETS.keys(), columns=COLUMNS.values()) for ds_name, ds_path in DATASETS.iteritems(): if LOAD_DATASETS: dataset = DataSet() print("##################################################") print("Loading", ds_path) dataset.load_hdf5(ds_path) if ds_name == "Smart*": building = dataset.buildings[1] building = prepb.filter_out_implausible_values( building, Measurement('power', 'active'), max_threshold=20000) #electric = building.utility.electric #electric.crop('2012-06-05', '2012-06-10') #building = prepb.filter_channels_with_less_than_x_samples( # building, 100) dataset.buildings[1] = building print("Preprocessed!")
import nilmtk.preprocessing.electricity.building as prepb import nilmtk.preprocessing.electricity.single as prep from nilmtk.stats.electricity.building import plot_missing_samples_using_bitmap from nilmtk.sensors.electricity import Measurement from nilmtk.dataset import DataSet dataset = DataSet() dataset.load_hdf5("/home/nipun/Dropbox/nilmtk_datasets/iawe") building = dataset.buildings[1] # 1. sum together split mains and DualSupply appliances building.utility.electric = building.utility.electric.sum_split_supplies() # optional. (required for iAWE) remove samples where voltage outside range # Fixing implausible voltage values building = prepb.filter_out_implausible_values( building, Measurement('voltage', ''), 160, 260) # optional. (required for iAWE) Note that this will remove motor as it does not have # any data in this period building = prepb.filter_datetime( building, '7-13-2013', '8-4-2013') # 2. downsample mains, circuits and appliances building = prepb.downsample(building, rule='1T') # 3. Fill large gaps in appliances with zeros and forward-fill small gaps building = prepb.fill_appliance_gaps(building) # optional. (required for iAWE)
building = prepb.filter_top_k_appliances(building, k=6) return building def preprocess_pecan(building, freq): building = prepb.downsample(building, rule=freq) building = prepb.filter_top_k_appliances(building, k=6) return building preprocess_map = {'iawe': preprocess_iawe, 'redd/low_freq': preprocess_redd, 'ampds': preprocess_ampds, 'pecan_1min': preprocess_pecan} # <codecell> dataset_name = "iawe" dataset = DataSet() dataset.load_hdf5(os.path.join(base_path, dataset_name)) print("Loaded {}".format(dataset_name)) for freq in frequencies: print("*" * 80) print("Loading {}".format(freq)) building = dataset.buildings[1] building = preprocess_map[dataset_name](building, freq) print("Number of appliance left = {}".format( len(building.utility.electric.appliances.keys()))) print("Dividing data into test and train") train, test = train_test_split(building, train_size=0.5) for disaggregator_name, disaggregator in disaggregators.iteritems(): # Train t1 = time.time() disaggregator.train(train, disagg_features=[DISAGG_FEATURE])
import nilmtk.preprocessing.electricity.building as prepb import nilmtk.preprocessing.electricity.single as prep from nilmtk.stats.electricity.building import plot_missing_samples_using_bitmap from nilmtk.sensors.electricity import Measurement from nilmtk.dataset import DataSet dataset = DataSet() dataset.load_hdf5("/home/nipun/Dropbox/nilmtk_datasets/redd/low_freq") # dataset.load_hdf5("/home/nipun/Dropbox/nilmtk_datasets/iawe") building = dataset.buildings[1] # 1. sum together split mains and DualSupply appliances building.utility.electric = building.utility.electric.sum_split_supplies() # optional. (required for iAWE) remove samples where voltage outside range # Fixing implausible voltage values # building = prepb.filter_out_implausible_values( # building, Measurement('voltage', ''), 160, 260) # optional. (required for iAWE) Note that this will remove motor as it does not have # any data in this period # building = prepb.filter_datetime( # building, '7-13-2013', '8-4-2013') # 2. downsample mains, circuits and appliances building = prepb.downsample(building, rule='1T') # 3. Fill large gaps in appliances with zeros and forward-fill small gaps building = prepb.fill_appliance_gaps(building)
else: LOAD_DATASET = False DATASET_PATH = expanduser('~/Dropbox/Data/nilmtk_datasets/redd/low_freq') N_APPLIANCES = 3 LATEX_PDF_OUTPUT_FILENAME = expanduser('~/PhD/writing/papers/e_energy_2014/' 'nilmtk_e_energy_2014/figures/' 'lost_samples.pdf') plt.close('all') latexify(columns=1, fig_height=1.5) fig = plt.figure() ax = fig.add_subplot(111) if LOAD_DATASET: dataset = DataSet() print('Loading', DATASET_PATH) dataset.load_hdf5(DATASET_PATH) electric = dataset.buildings[1].utility.electric electric_cropped = deepcopy(electric) electric_cropped.appliances = {k:v for k,v in electric.appliances.items()[:N_APPLIANCES]} bstats.plot_missing_samples_using_bitmap(electric_cropped, ax=ax, cmap=plt.cm.Greys, gain=5) format_axes(ax) xlim = ax.get_xlim() ax.set_title('') plt.tight_layout() for spine in ['top', 'right']: ax.spines[spine].set_visible(True) ax.spines[spine].set_color(SPINE_COLOR)
def train_jointly(self, clf, d): # This function has a few issues, which should be addressed soon print("............... Loading Data for training ...................") # store the train_main readings for all buildings self.train_mains = [] self.train_submeters = [[] for i in range(len(self.appliances))] self.sec_dict = {} for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) main_meter = train.buildings[building].elec.mains() good_sections = train.buildings[building].elec.mains( ).good_sections() main_df = next( main_meter.load(physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) # train_df = train_df[[list(train_df.columns)[0]]] # main_df_list = get_sections_df(main_df, good_sections) # train_df appliance_readings = [] for appliance_name in self.appliances: app_meter = train.buildings[building].elec[appliance_name] app_df = next( app_meter.load(physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) # app_df_list = get_sections_df(app_df, good_sections) if building not in self.sec_dict: self.sec_dict[building] = get_sections_df_2( good_sections, app_meter.good_sections()) main_df_list = [ main_df[sec[0]:sec[1]] for sec in self.sec_dict[building] ] app_df_list = [ app_df[sec[0]:sec[1]] for sec in self.sec_dict[building] ] appliance_readings.append( app_df_list) # appliance_readings->app_df_list->app_df if self.DROP_ALL_NANS: main_df_list, appliance_readings = self.dropna( main_df_list, appliance_readings) # Ttrain_list: [pd[sec],pd[sec]..] if self.artificial_aggregate: print("Creating an Artificial Aggregate") train_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: train_df += app_reading print("Train Jointly") self.train_mains += main_df_list # [[sec],[sec]...]] train_submeters = appliance_readings.copy() for j, appliance_name in enumerate(self.appliances): if self.isState: for i, app_df in enumerate(appliance_readings[j]): _, train_submeters[j][i] = get_activations( app_df, config['threshold'][appliance_name]) self.train_submeters[j] += train_submeters[j] appliance_readings = [] for i, appliance_name in enumerate(self.appliances): appliance_readings.append( (appliance_name, self.train_submeters[i])) self.train_submeters = appliance_readings # [(app_name, [[sec],[sec]...])...] clf.partial_fit(self.train_mains, self.train_submeters)
from __future__ import print_function from nilmtk.dataset import DataSet from os.path import expanduser H5_DIR = expanduser('~/Dropbox/Data/nilmtk_datasets/redd/low_freq/') dataset = DataSet() print('loading', H5_DIR) dataset.load_hdf5(H5_DIR) electric = dataset.buildings[1].utility.electric
def load_datasets_chunks(self): self.store_classifier_instances() d = self.train_datasets_dict print( "............... Loading Data for preprocessing ..................." ) # store the train_main readings for all buildings print( "............... Loading Train_Mains for preprocessing ..................." ) for dataset in d: print("Loading data for ", dataset, " dataset") for building in d[dataset]['buildings']: train = DataSet(d[dataset]['path']) print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) mains_iterator = train.buildings[building].elec.mains().load( chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period) print(self.appliances) appliance_iterators = [ train.buildings[building].elec.select_using_appliances( type=app_name).load(chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances ] print(train.buildings[building].elec.mains()) for chunk_num, chunk in enumerate( train.buildings[building].elec.mains().load( chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)): #Dummry loop for executing on outer level. Just for looping till end of a chunk print("starting enumeration..........") train_df = next(mains_iterator) #train_df = train_df.dropna(axis=0) appliance_readings = [] # ix = train_df.index for i in appliance_iterators: try: appliance_df = next(i) except StopIteration: pass #appliance_df = appliance_df.dropna(axis=0) appliance_readings.append(appliance_df) # ix = ix.intersection(appliance_df.index) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropnans( train_df, appliance_readings) if self.FILL_ALL_NANS: train_df, appliance_readings = self.fillnans( train_df, appliance_readings) # train_df = train_df.loc[ix] # Choosing the common timestamps # for i in range(len(appliance_readings)): # appliance_readings[i] = appliance_readings[i].loc[ix] # Choosing the Common timestamps if self.artificial_aggregate: print("Creating an Artificial Aggregate") train_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) #print (train_df) #print (appliance_readings[0]) for app_reading in appliance_readings: train_df += app_reading train_appliances = [] for cnt, i in enumerate(appliance_readings): train_appliances.append((self.appliances[cnt], [i])) self.train_mains = [train_df] self.train_submeters = train_appliances self.call_partial_fit() print( "...............Finished Loading Train mains and Appliances for preprocessing ..................." ) # store train submeters reading d = self.test_datasets_dict # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") for building in d[dataset]['buildings']: test = DataSet(d[dataset]['path']) test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) mains_iterator = test.buildings[building].elec.mains().load( chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period) #print (self.appliances) appliance_iterators = [ test.buildings[building].elec.select_using_appliances( type=app_name).load(chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances ] for chunk_num, chunk in enumerate( test.buildings[building].elec.mains().load( chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)): test_df = next(mains_iterator) #test_df = test_df.dropna(axis=0) appliance_readings = [] #ix = test_df.index for i in appliance_iterators: try: appliance_df = next(i) except StopIteration: pass #appliance_df = appliance_df.dropna(axis=0) appliance_readings.append(appliance_df) #ix = ix.intersection(appliance_df.index) #test_df = test_df.loc[ix] # Choosing the common timestamps if self.DROP_ALL_NANS: test_df, appliance_readings = self.dropnans( test_df, appliance_readings) if self.FILL_ALL_NANS: test_df, appliance_readings = self.fillnans( test_df, appliance_readings) # for i in range(len(appliance_readings)): # appliance_readings[i] = appliance_readings[i].loc[ix] # Choosing the Common timestamps if self.artificial_aggregate: print("Creating an Artificial Aggregate......") test_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_df += app_reading test_appliances = [] for cnt, i in enumerate(appliance_readings): test_appliances.append((self.appliances[cnt], [i])) self.test_mains = test_df self.test_submeters = test_appliances print("Dataset %s Building %s chunk %s" % (dataset, building, chunk_num)) self.test_mains = [self.test_mains] self.call_predict(self.classifiers)
def test_jointly(self, d): # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) self.buildings = d[dataset]['buildings'] self.pkmap1 = {k: None for k in self.buildings} self.pkkeys1 = {k: None for k in self.buildings} for building in self.buildings: test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) test_mains = next(test.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) print('test len is {}'.format(len(test_mains.index))) if self.DROP_ALL_NANS and self.site_only: test_mains, _ = self.dropna(test_mains, []) if not self.site_only: appliance_readings = [] for appliance in self.appliances: test_df = next( (test.buildings[building].elec[appliance].load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) appliance_readings.append(test_df) if self.DROP_ALL_NANS: test_mains, appliance_readings = self.dropna( test_mains, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") test_mains = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_mains += app_reading for i, appliance_name in enumerate(self.appliances): self.test_submeters.append( (appliance_name, [appliance_readings[i]])) if self.do_pkm: p1 = PKMap(test.buildings[building], no_count=True, sample_period=self.sample_period) d1 = p1.data0['active'].loc[test_mains.index].dropna() p1.data0['active'] = d1 self.pkmap1[building] = p1 d2 = d1 > 11 print('counting pk_keys ...', end='\r') self.pkkeys1[building] = pd.DataFrame( np.array([ ''.join([str(int(u)) for u in k]) for k in d2.itertuples(index=False) ]), index=d1.index, columns=appliance_readings[0].columns) self.test_mains = [test_mains] self.storing_key = str(dataset) + "_" + str(building) self.call_predict(self.classifiers, test.metadata["timezone"])
def train_jointly(self, clf, d): # This function has a few issues, which should be addressed soon print("............... Loading Data for training ...................") # store the train_main readings for all buildings self.train_mains = [] self.train_submeters = [[] for i in range(len(self.appliances))] for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) self.buildings = d[dataset]['buildings'] self.pkmap0 = {k: None for k in self.buildings} self.pkkeys0 = {k: None for k in self.buildings} for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) train_df = next(train.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) train_df = train_df[[list(train_df.columns)[0]]] appliance_readings = [] for appliance_name in self.appliances: appliance_df = next( train.buildings[building].elec[appliance_name].load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) appliance_df = appliance_df[[ list(appliance_df.columns)[0] ]] appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropna( train_df, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") train_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: train_df += app_reading self.train_mains.append(train_df) for i, appliance_name in enumerate(self.appliances): self.train_submeters[i].append(appliance_readings[i]) if self.do_pkm: p0 = PKMap(train.buildings[building], no_count=True, sample_period=self.sample_period) d1 = p0.data0['active'].loc[train_df.index].dropna() p0.data0['active'] = d1 self.pkmap0[building] = p0 d2 = d1 > 11 print('counting pk_keys0 ...', end='\r') self.pkkeys0[building] = pd.DataFrame( np.array([ ''.join([str(int(u)) for u in k]) for k in d2.itertuples(index=False) ]), index=d1.index, columns=appliance_readings[0].columns) if isinstance(self.do_pkm, int) or isinstance( self.do_pkm, str): self.pkmap0[building].BM(obj=self.do_pkm, no_show=True) appliance_readings = [] for i, appliance_name in enumerate(self.appliances): appliance_readings.append( (appliance_name, self.train_submeters[i])) self.train_submeters = appliance_readings clf.partial_fit(self.train_mains, self.train_submeters)
"""Indian AC""" DATA_PATH = "/home/nipun/Dropbox/nilmtk_datasets/iawe/" FIG_SAVE_PATH = "/home/nipun/Desktop/abc.pdf" from nilmtk.dataset import DataSet import nilmtk.preprocessing.electricity.building as prepb import nilmtk.preprocessing.electricity.single as prep from nilmtk.sensors.electricity import Measurement import matplotlib.pyplot as plt ds = DataSet() ds.load_hdf5(DATA_PATH) # First building building = ds.buildings[1] # 1. sum together split mains and DualSupply appliances building.utility.electric = building.utility.electric.sum_split_supplies() # optional. (required for iAWE) remove samples where voltage outside range # Fixing implausible voltage values building = prepb.filter_out_implausible_values( building, Measurement('voltage', ''), 160, 260) # optional. (required for iAWE) Note that this will remove motor as it does not have # any data in this period building = prepb.filter_datetime( building, '7-13-2013', '8-4-2013') ac = building.utility.electric.appliances[('air conditioner', 1)] ac_power = ac[('power', 'active')]
import time import pandas as pd import matplotlib.pyplot as plt import resource PATH = '/home/nipun/Desktop/AMPds/' EXPORT_PATH = '/home/nipun/Desktop/temp/ampds/' DISAGG_FEATURE = Measurement('power', 'active') # Setting the limits to 5 GB RAM usage megs = 5000 resource.setrlimit(resource.RLIMIT_AS, (megs * 1048576L, -1L)) # Loading data from HDF5 store dataset = DataSet() t1 = time.time() dataset.load_hdf5(EXPORT_PATH) t2 = time.time() print("Runtime to import from HDF5 = {:.2f}".format(t2 - t1)) # Experiment on first (and only) building b = dataset.buildings[1] # Filtering to include only top 8 appliances b = filter_top_k_appliances(b, 3) # Dividing the data into train and test train, test = train_test_split(b) # Again subdivide data into train, test for testing on even smaller data
def load_datasets(self): d=self.train_datasets_dict print("............... Loading Data for preprocessing ...................") # store the train_main readings for all buildings print("............... Loading Train_Mains for preprocessing ...................") for dataset in d: print("Loading data for ",dataset, " dataset") for building in d[dataset]['buildings']: train=DataSet(d[dataset]['path']) print("Loading building ... ",building) train.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) mains_iterator = train.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period) print (self.appliances) appliance_iterators = [train.buildings[building].elec.select_using_appliances(type=app_name).load(chunksize = self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances] for chunk_num,chunk in enumerate (train.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period)): #Dummry loop for executing on outer level. Just for looping till end of a chunk train_df = next(mains_iterator) train_df = train_df.dropna(axis=0) appliance_readings = [] ix = train_df.index for i in appliance_iterators: appliance_df = next(i) appliance_df = appliance_df.dropna(axis=0) appliance_readings.append(appliance_df) ix = ix.intersection(appliance_df.index) train_df = train_df.loc[ix] # Choosing the common timestamps for i in range(len(appliance_readings)): appliance_readings[i] = appliance_readings[i].loc[ix] # Choosing the Common timestamps train_appliances = [] for cnt,i in enumerate(appliance_readings): train_appliances.append((self.appliances[cnt],i)) train_df, train_appliances = self.preprocess_HDF5(train_df, train_appliances,method='train') self.store_preprocessed_data('train', train_df, train_appliances, dataset, building, chunk_num) print("...............Finished Loading Train mains and Appliances for preprocessing ...................") # store train submeters reading d=self.test_datasets_dict # store the test_main readings for all buildings for dataset in d: print("Loading data for ",dataset, " dataset") for building in d[dataset]['buildings']: test=DataSet(d[dataset]['path']) test.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) mains_iterator = test.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period) #print (self.appliances) appliance_iterators = [test.buildings[building].elec.select_using_appliances(type=app_name).load(chunksize = self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances] for chunk_num,chunk in enumerate (test.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period)): test_df = next(mains_iterator) test_df = test_df.dropna(axis=0) appliance_readings = [] ix = test_df.index for i in appliance_iterators: appliance_df = next(i) appliance_df = appliance_df.dropna(axis=0) appliance_readings.append(appliance_df) ix = ix.intersection(appliance_df.index) test_df = test_df.loc[ix] # Choosing the common timestamps for i in range(len(appliance_readings)): appliance_readings[i] = appliance_readings[i].loc[ix] # Choosing the Common timestamps test_appliances = [] for cnt,i in enumerate(appliance_readings): test_appliances.append((self.appliances[cnt],i)) test_df= self.preprocess_HDF5(test_df, submeters=None,method='test') print (test_df.shape, test_appliances[0][1].shape) self.store_preprocessed_data('test', test_df, test_appliances, dataset, building, chunk_num)