def test_jointly(self,d): # store the test_main readings for all buildings for dataset in d: print("Loading data for ",dataset, " dataset") test=DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: test.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) test_mains=next(test.buildings[building].elec.mains().load(physical_quantity='power', ac_type='apparent', sample_period=self.sample_period)) if self.DROP_ALL_NANS and self.site_only: test_mains, _= self.dropna(test_mains,[]) if self.site_only != True: appliance_readings=[] for appliance in self.appliances: test_df=next((test.buildings[building].elec[appliance].load(physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) appliance_readings.append(test_df) if self.DROP_ALL_NANS: test_mains , appliance_readings = self.dropna(test_mains,appliance_readings) if self.artificial_aggregate: print ("Creating an Artificial Aggregate") test_mains = pd.DataFrame(np.zeros(appliance_readings[0].shape),index = appliance_readings[0].index,columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_mains+=app_reading for i, appliance_name in enumerate(self.appliances): self.test_submeters.append((appliance_name,[appliance_readings[i]])) self.test_mains = [test_mains] self.storing_key = str(dataset) + "_" + str(building) self.call_predict(self.classifiers, test.metadata["timezone"])
def test_jointly(self, d): # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) test_mains = next(test.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) appliance_readings = [] for appliance in self.appliances: test_df = next((test.buildings[building].elec.submeters( ).select_using_appliances(type=appliance).load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) appliance_readings.append(test_df) if self.DROP_ALL_NANS: test_df, appliance_readings = self.dropna( test_df, appliance_readings) self.test_mains = [test_df] for i, appliance_name in enumerate(self.appliances): self.test_submeters.append( (appliance_name, [appliance_readings[i]])) self.call_predict(self.classifiers)
def test_jointly(self, d): # Store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) test_mains = next(test.buildings[building].elec.mains().load( physical_quantity='power', ac_type=all_type_power, sample_period=self.sample_period)) test_mains['active'] = test_mains['power']['active'] test_mains['apparent'] = test_mains['power']['apparent'] test_mains['reactive'] = np.sqrt( test_mains['power']['apparent']**2 - test_mains['power']['active']**2) test_mains.drop(['power'], axis=1, inplace=True) test_mains = test_mains[self.power['mains']] ''' train_df = next(train.buildings[building].elec.mains().load(physical_quantity='power', ac_type= all_type_power,sample_period = self.sample_period)) train_df['active'] = train_df['power']['active'] train_df['apparent'] = train_df['power']['apparent'] train_df['reactive'] = np.sqrt(train_df['power']['apparent']**2 - train_df['power']['active']**2) train_df.drop(['power'], axis = 1,inplace = True) train_df = train_df[self.power['mains']] ''' appliance_readings = [] for appliance in self.appliances: test_df = next( (test.buildings[building].elec[appliance].load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) appliance_readings.append(test_df) if self.DROP_ALL_NANS: test_mains, appliance_readings = self.dropna( test_mains, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") test_mains = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_mains += app_reading self.test_mains = [test_mains] for i, appliance_name in enumerate(self.appliances): self.test_submeters.append( (appliance_name, [appliance_readings[i]])) self.storing_key = str(dataset) + "_" + str(building) self.call_predict(self.classifiers)
def train_jointly(self, clf, d): # This function has a few issues, which should be addressed soon print("............... Loading Data for training ...................") # store the train_main readings for all buildings self.train_mains = [] self.train_submeters = [[] for i in range(len(self.appliances))] for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) train_df = next(train.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) train_df = train_df[[list(train_df.columns)[0]]] appliance_readings = [] for appliance_name in self.appliances: appliance_df = next( train.buildings[building].elec[appliance_name].load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) appliance_df = appliance_df[[ list(appliance_df.columns)[0] ]] appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropna( train_df, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") train_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: train_df += app_reading self.train_mains.append(train_df) for i, appliance_name in enumerate(self.appliances): self.train_submeters[i].append(appliance_readings[i]) appliance_readings = [] for i, appliance_name in enumerate(self.appliances): appliance_readings.append( (appliance_name, self.train_submeters[i])) self.train_submeters = appliance_readings clf.partial_fit(self.train_mains, self.train_submeters)
def train_jointly(self, clf, d): # This function has a few issues, which should be addressed soon print("............... Loading Data for training ...................") # store the train_main readings for all buildings self.train_mains = pd.DataFrame() self.train_submeters = [ pd.DataFrame() for i in range(len(self.appliances)) ] for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) train_df = next(train.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) appliance_readings = [] for appliance_name in self.appliances: appliance_df = next( train.buildings[building].elec.submeters( ).select_using_appliances(type=appliance_name).load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropna( train_df, appliance_readings) self.train_mains = self.train_mains.append(train_df) for i, appliance_name in enumerate(self.appliances): self.train_submeters[i] = self.train_submeters[i].append( appliance_readings[i]) appliance_readings = [] for i, appliance_name in enumerate(self.appliances): appliance_readings.append( (appliance_name, [self.train_submeters[i]])) self.train_mains = [self.train_mains] self.train_submeters = appliance_readings clf.partial_fit(self.train_mains, self.train_submeters)
def train_chunk_wise(self, clf, d, current_epoch): """ This function loads the data from buildings and datasets with the specified chunk size and trains on each of them. """ for dataset in d: # Loading the dataset print("Loading data for ",dataset, " dataset") for building in d[dataset]['buildings']: # Loading the building train=DataSet(d[dataset]['path']) print("Loading building ... ",building) train.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) mains_iterator = train.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period) appliance_iterators = [train.buildings[building].elec[app_name].load(chunksize = self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances] print(train.buildings[building].elec.mains()) for chunk_num,chunk in enumerate (train.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period)): # Loading the chunk for the specifeid building #Dummry loop for executing on outer level. Just for looping till end of a chunk print("Starting enumeration..........") train_df = next(mains_iterator) appliance_readings = [] for i in appliance_iterators: try: appliance_df = next(i) except StopIteration: appliance_df = pd.DataFrame() appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropna(train_df, appliance_readings) if self.artificial_aggregate: print ("Creating an Artificial Aggregate") train_df = pd.DataFrame(np.zeros(appliance_readings[0].shape),index = appliance_readings[0].index,columns=appliance_readings[0].columns) for app_reading in appliance_readings: train_df+=app_reading train_appliances = [] for cnt,i in enumerate(appliance_readings): train_appliances.append((self.appliances[cnt],[i])) self.train_mains = [train_df] self.train_submeters = train_appliances clf.partial_fit(self.train_mains, self.train_submeters, current_epoch) print("...............Finished the Training Process ...................")
def test_chunk_wise(self,d): print("...............Started the Testing Process ...................") for dataset in d: print("Loading data for ",dataset, " dataset") for building in d[dataset]['buildings']: test=DataSet(d[dataset]['path']) test.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) mains_iterator = test.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period) appliance_iterators = [test.buildings[building].elec[app_name].load(chunksize = self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances] for chunk_num,chunk in enumerate (test.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period)): test_df = next(mains_iterator) appliance_readings = [] for i in appliance_iterators: try: appliance_df = next(i) except StopIteration: appliance_df = pd.DataFrame() appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: test_df, appliance_readings = self.dropna(test_df, appliance_readings) if self.artificial_aggregate: print ("Creating an Artificial Aggregate") test_df = pd.DataFrame(np.zeros(appliance_readings[0].shape),index = appliance_readings[0].index,columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_df+=app_reading test_appliances = [] for cnt,i in enumerate(appliance_readings): test_appliances.append((self.appliances[cnt],[i])) self.test_mains = [test_df] self.test_submeters = test_appliances print("Results for Dataset {dataset} Building {building} Chunk {chunk_num}".format(dataset=dataset,building=building,chunk_num=chunk_num)) self.storing_key = str(dataset) + "_" + str(building) + "_" + str(chunk_num) self.call_predict(self.classifiers, test.metadata['timezone'])
def load_datasets(self): self.store_classifier_instances() d = self.train_datasets_dict print("............... Loading Data for training ...................") # store the train_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) self.train_mains = self.train_mains.append( next(train.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period))) # store train submeters reading train_buildings = pd.DataFrame() for appliance in self.appliances: train_df = pd.DataFrame() print("For appliance .. ", appliance) for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) # store data for submeters train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) train_df = train_df.append( next(train.buildings[building].elec.submeters(). select_using_appliances(type=appliance).load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) self.train_submeters.append((appliance, [train_df])) # create instance of the training methods # train models # store data for mains self.train_mains = [self.train_mains] self.call_partial_fit() d = self.test_datasets_dict # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) self.test_mains = (next( test.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period))) self.test_submeters = [] for appliance in self.appliances: test_df = next((test.buildings[building].elec.submeters( ).select_using_appliances(type=appliance).load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) self.test_submeters.append((appliance, [test_df])) self.test_mains = [self.test_mains] self.call_predict(self.classifiers)
def load_datasets_chunks(self): self.store_classifier_instances() d = self.train_datasets_dict print( "............... Loading Data for preprocessing ..................." ) # store the train_main readings for all buildings print( "............... Loading Train_Mains for preprocessing ..................." ) for dataset in d: print("Loading data for ", dataset, " dataset") for building in d[dataset]['buildings']: train = DataSet(d[dataset]['path']) print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) mains_iterator = train.buildings[building].elec.mains().load( chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period) print(self.appliances) appliance_iterators = [ train.buildings[building].elec.select_using_appliances( type=app_name).load(chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances ] print(train.buildings[building].elec.mains()) for chunk_num, chunk in enumerate( train.buildings[building].elec.mains().load( chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)): #Dummry loop for executing on outer level. Just for looping till end of a chunk print("starting enumeration..........") train_df = next(mains_iterator) #train_df = train_df.dropna(axis=0) appliance_readings = [] # ix = train_df.index for i in appliance_iterators: try: appliance_df = next(i) except StopIteration: pass #appliance_df = appliance_df.dropna(axis=0) appliance_readings.append(appliance_df) # ix = ix.intersection(appliance_df.index) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropnans( train_df, appliance_readings) if self.FILL_ALL_NANS: train_df, appliance_readings = self.fillnans( train_df, appliance_readings) # train_df = train_df.loc[ix] # Choosing the common timestamps # for i in range(len(appliance_readings)): # appliance_readings[i] = appliance_readings[i].loc[ix] # Choosing the Common timestamps if self.artificial_aggregate: print("Creating an Artificial Aggregate") train_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) #print (train_df) #print (appliance_readings[0]) for app_reading in appliance_readings: train_df += app_reading train_appliances = [] for cnt, i in enumerate(appliance_readings): train_appliances.append((self.appliances[cnt], [i])) self.train_mains = [train_df] self.train_submeters = train_appliances self.call_partial_fit() print( "...............Finished Loading Train mains and Appliances for preprocessing ..................." ) # store train submeters reading d = self.test_datasets_dict # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") for building in d[dataset]['buildings']: test = DataSet(d[dataset]['path']) test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) mains_iterator = test.buildings[building].elec.mains().load( chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period) #print (self.appliances) appliance_iterators = [ test.buildings[building].elec.select_using_appliances( type=app_name).load(chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances ] for chunk_num, chunk in enumerate( test.buildings[building].elec.mains().load( chunksize=self.chunk_size, physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)): test_df = next(mains_iterator) #test_df = test_df.dropna(axis=0) appliance_readings = [] #ix = test_df.index for i in appliance_iterators: try: appliance_df = next(i) except StopIteration: pass #appliance_df = appliance_df.dropna(axis=0) appliance_readings.append(appliance_df) #ix = ix.intersection(appliance_df.index) #test_df = test_df.loc[ix] # Choosing the common timestamps if self.DROP_ALL_NANS: test_df, appliance_readings = self.dropnans( test_df, appliance_readings) if self.FILL_ALL_NANS: test_df, appliance_readings = self.fillnans( test_df, appliance_readings) # for i in range(len(appliance_readings)): # appliance_readings[i] = appliance_readings[i].loc[ix] # Choosing the Common timestamps if self.artificial_aggregate: print("Creating an Artificial Aggregate......") test_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_df += app_reading test_appliances = [] for cnt, i in enumerate(appliance_readings): test_appliances.append((self.appliances[cnt], [i])) self.test_mains = test_df self.test_submeters = test_appliances print("Dataset %s Building %s chunk %s" % (dataset, building, chunk_num)) self.test_mains = [self.test_mains] self.call_predict(self.classifiers)
def test_jointly(self, d): # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) self.buildings = d[dataset]['buildings'] self.pkmap1 = {k: None for k in self.buildings} self.pkkeys1 = {k: None for k in self.buildings} for building in self.buildings: test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) test_mains = next(test.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) print('test len is {}'.format(len(test_mains.index))) if self.DROP_ALL_NANS and self.site_only: test_mains, _ = self.dropna(test_mains, []) if not self.site_only: appliance_readings = [] for appliance in self.appliances: test_df = next( (test.buildings[building].elec[appliance].load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period))) appliance_readings.append(test_df) if self.DROP_ALL_NANS: test_mains, appliance_readings = self.dropna( test_mains, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") test_mains = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_mains += app_reading for i, appliance_name in enumerate(self.appliances): self.test_submeters.append( (appliance_name, [appliance_readings[i]])) if self.do_pkm: p1 = PKMap(test.buildings[building], no_count=True, sample_period=self.sample_period) d1 = p1.data0['active'].loc[test_mains.index].dropna() p1.data0['active'] = d1 self.pkmap1[building] = p1 d2 = d1 > 11 print('counting pk_keys ...', end='\r') self.pkkeys1[building] = pd.DataFrame( np.array([ ''.join([str(int(u)) for u in k]) for k in d2.itertuples(index=False) ]), index=d1.index, columns=appliance_readings[0].columns) self.test_mains = [test_mains] self.storing_key = str(dataset) + "_" + str(building) self.call_predict(self.classifiers, test.metadata["timezone"])
def train_jointly(self, clf, d): # This function has a few issues, which should be addressed soon print("............... Loading Data for training ...................") # store the train_main readings for all buildings self.train_mains = [] self.train_submeters = [[] for i in range(len(self.appliances))] for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) self.buildings = d[dataset]['buildings'] self.pkmap0 = {k: None for k in self.buildings} self.pkkeys0 = {k: None for k in self.buildings} for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) train_df = next(train.buildings[building].elec.mains().load( physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) train_df = train_df[[list(train_df.columns)[0]]] appliance_readings = [] for appliance_name in self.appliances: appliance_df = next( train.buildings[building].elec[appliance_name].load( physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) appliance_df = appliance_df[[ list(appliance_df.columns)[0] ]] appliance_readings.append(appliance_df) if self.DROP_ALL_NANS: train_df, appliance_readings = self.dropna( train_df, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") train_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: train_df += app_reading self.train_mains.append(train_df) for i, appliance_name in enumerate(self.appliances): self.train_submeters[i].append(appliance_readings[i]) if self.do_pkm: p0 = PKMap(train.buildings[building], no_count=True, sample_period=self.sample_period) d1 = p0.data0['active'].loc[train_df.index].dropna() p0.data0['active'] = d1 self.pkmap0[building] = p0 d2 = d1 > 11 print('counting pk_keys0 ...', end='\r') self.pkkeys0[building] = pd.DataFrame( np.array([ ''.join([str(int(u)) for u in k]) for k in d2.itertuples(index=False) ]), index=d1.index, columns=appliance_readings[0].columns) if isinstance(self.do_pkm, int) or isinstance( self.do_pkm, str): self.pkmap0[building].BM(obj=self.do_pkm, no_show=True) appliance_readings = [] for i, appliance_name in enumerate(self.appliances): appliance_readings.append( (appliance_name, self.train_submeters[i])) self.train_submeters = appliance_readings clf.partial_fit(self.train_mains, self.train_submeters)
def test_jointly(self, d): # store the test_main readings for all buildings for dataset in d: print("Loading data for ", dataset, " dataset") test = DataSet(d[dataset]['path']) self.sec_dict = {} for building in d[dataset]['buildings']: self.test_mains = [] self.test_submeters = [[] for i in range(len(self.appliances))] test.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) test_meter = test.buildings[building].elec.mains() good_sections = test.buildings[building].elec.mains( ).good_sections() # self.test_sections = good_sections main_df = next( test_meter.load(physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) main_df_list = get_sections_df(main_df, good_sections) # train_df appliance_readings = [] for appliance_name in self.appliances: app_meter = test.buildings[building].elec[appliance_name] if building not in self.sec_dict: self.sec_dict[building] = get_sections_df_2( good_sections, app_meter.good_sections()) app_df = next( app_meter.load(physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) main_df_list = [ main_df[sec[0]:sec[1]] for sec in self.sec_dict[building] ] app_df_list = [ app_df[sec[0]:sec[1]] for sec in self.sec_dict[building] ] appliance_readings.append(app_df_list) if self.DROP_ALL_NANS: main_df_list, appliance_readings = self.dropna( main_df_list, appliance_readings) if self.artificial_aggregate: print("Creating an Artificial Aggregate") test_mains = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: test_mains += app_reading print("Test Jointly") self.test_mains = (main_df_list) test_submeters = appliance_readings.copy() for j, appliance_name in enumerate(self.appliances): if self.isState: for i, app_df in enumerate(appliance_readings[j]): _, test_submeters[j][i] = get_activations( app_df, config['threshold'][appliance_name]) self.test_submeters[j] = (appliance_name, test_submeters[j]) self.storing_key = str(dataset) + "_" + str(building) self.call_predict(self.classifiers, building)
def train_jointly(self, clf, d): # This function has a few issues, which should be addressed soon print("............... Loading Data for training ...................") # store the train_main readings for all buildings self.train_mains = [] self.train_submeters = [[] for i in range(len(self.appliances))] self.sec_dict = {} for dataset in d: print("Loading data for ", dataset, " dataset") train = DataSet(d[dataset]['path']) for building in d[dataset]['buildings']: print("Loading building ... ", building) train.set_window( start=d[dataset]['buildings'][building]['start_time'], end=d[dataset]['buildings'][building]['end_time']) main_meter = train.buildings[building].elec.mains() good_sections = train.buildings[building].elec.mains( ).good_sections() main_df = next( main_meter.load(physical_quantity='power', ac_type=self.power['mains'], sample_period=self.sample_period)) # train_df = train_df[[list(train_df.columns)[0]]] # main_df_list = get_sections_df(main_df, good_sections) # train_df appliance_readings = [] for appliance_name in self.appliances: app_meter = train.buildings[building].elec[appliance_name] app_df = next( app_meter.load(physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period)) # app_df_list = get_sections_df(app_df, good_sections) if building not in self.sec_dict: self.sec_dict[building] = get_sections_df_2( good_sections, app_meter.good_sections()) main_df_list = [ main_df[sec[0]:sec[1]] for sec in self.sec_dict[building] ] app_df_list = [ app_df[sec[0]:sec[1]] for sec in self.sec_dict[building] ] appliance_readings.append( app_df_list) # appliance_readings->app_df_list->app_df if self.DROP_ALL_NANS: main_df_list, appliance_readings = self.dropna( main_df_list, appliance_readings) # Ttrain_list: [pd[sec],pd[sec]..] if self.artificial_aggregate: print("Creating an Artificial Aggregate") train_df = pd.DataFrame( np.zeros(appliance_readings[0].shape), index=appliance_readings[0].index, columns=appliance_readings[0].columns) for app_reading in appliance_readings: train_df += app_reading print("Train Jointly") self.train_mains += main_df_list # [[sec],[sec]...]] train_submeters = appliance_readings.copy() for j, appliance_name in enumerate(self.appliances): if self.isState: for i, app_df in enumerate(appliance_readings[j]): _, train_submeters[j][i] = get_activations( app_df, config['threshold'][appliance_name]) self.train_submeters[j] += train_submeters[j] appliance_readings = [] for i, appliance_name in enumerate(self.appliances): appliance_readings.append( (appliance_name, self.train_submeters[i])) self.train_submeters = appliance_readings # [(app_name, [[sec],[sec]...])...] clf.partial_fit(self.train_mains, self.train_submeters)
def load_datasets(self): d=self.train_datasets_dict print("............... Loading Data for preprocessing ...................") # store the train_main readings for all buildings print("............... Loading Train_Mains for preprocessing ...................") for dataset in d: print("Loading data for ",dataset, " dataset") for building in d[dataset]['buildings']: train=DataSet(d[dataset]['path']) print("Loading building ... ",building) train.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) mains_iterator = train.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period) print (self.appliances) appliance_iterators = [train.buildings[building].elec.select_using_appliances(type=app_name).load(chunksize = self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances] for chunk_num,chunk in enumerate (train.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period)): #Dummry loop for executing on outer level. Just for looping till end of a chunk train_df = next(mains_iterator) train_df = train_df.dropna(axis=0) appliance_readings = [] ix = train_df.index for i in appliance_iterators: appliance_df = next(i) appliance_df = appliance_df.dropna(axis=0) appliance_readings.append(appliance_df) ix = ix.intersection(appliance_df.index) train_df = train_df.loc[ix] # Choosing the common timestamps for i in range(len(appliance_readings)): appliance_readings[i] = appliance_readings[i].loc[ix] # Choosing the Common timestamps train_appliances = [] for cnt,i in enumerate(appliance_readings): train_appliances.append((self.appliances[cnt],i)) train_df, train_appliances = self.preprocess_HDF5(train_df, train_appliances,method='train') self.store_preprocessed_data('train', train_df, train_appliances, dataset, building, chunk_num) print("...............Finished Loading Train mains and Appliances for preprocessing ...................") # store train submeters reading d=self.test_datasets_dict # store the test_main readings for all buildings for dataset in d: print("Loading data for ",dataset, " dataset") for building in d[dataset]['buildings']: test=DataSet(d[dataset]['path']) test.set_window(start=d[dataset]['buildings'][building]['start_time'],end=d[dataset]['buildings'][building]['end_time']) mains_iterator = test.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period) #print (self.appliances) appliance_iterators = [test.buildings[building].elec.select_using_appliances(type=app_name).load(chunksize = self.chunk_size, physical_quantity='power', ac_type=self.power['appliance'], sample_period=self.sample_period) for app_name in self.appliances] for chunk_num,chunk in enumerate (test.buildings[building].elec.mains().load(chunksize = self.chunk_size, physical_quantity='power', ac_type = self.power['mains'], sample_period=self.sample_period)): test_df = next(mains_iterator) test_df = test_df.dropna(axis=0) appliance_readings = [] ix = test_df.index for i in appliance_iterators: appliance_df = next(i) appliance_df = appliance_df.dropna(axis=0) appliance_readings.append(appliance_df) ix = ix.intersection(appliance_df.index) test_df = test_df.loc[ix] # Choosing the common timestamps for i in range(len(appliance_readings)): appliance_readings[i] = appliance_readings[i].loc[ix] # Choosing the Common timestamps test_appliances = [] for cnt,i in enumerate(appliance_readings): test_appliances.append((self.appliances[cnt],i)) test_df= self.preprocess_HDF5(test_df, submeters=None,method='test') print (test_df.shape, test_appliances[0][1].shape) self.store_preprocessed_data('test', test_df, test_appliances, dataset, building, chunk_num)