def load_training_data(root_folder, dataset_name): ''' global parameters: EVs: list of HVAC external variable names MVs: list of HVAC manipulated variable names i_step: increment step size for TS window t T: Overall TS length t: TS window size start_step: data cleaning start step, crop out data before this time step n_samples = number of samples for TS matrix n_seg: number of segments for PAA conversion n_sec: one sample per n_sec seconds global parameters should be set before using this function ------------------------------------------------------------------------------- inputs: root_folder: root folder path of all datasets dataset_name: folder name of dataset outputs: weather_ts: Divided TS weather data, numpy array in NT format MVs_ts: Corresponding divided TS MVs data, numpy array in NT format ------------------------------------------------------------------------------- Loads FTN data(pickel file) from "root_folder/dataset_name" path. If no FTN data found, will generate one using function gen_FTN_data Then use the FTN file to generate weather_ts and MVs_ts ''' # Load data files import os # dataset_name = '130_HVACv4a_Boston+SmallOffice_Workday' filePath = root_folder + dataset_name os.chdir(filePath) fileNames = os.listdir(filePath) fileNames = [fname for fname in fileNames if fname[-4:] == '.csv'] # check if it's a csv file # Check if FTN files exist, if not, generate one: if not os.path.exists(filePath + '\\EV_FTN.pickle'): print('No FTN files found, generating one...') # Generate FTN files EV_FTN = gen_FTN_data(EVs, range(len(fileNames)), filePath, fileNames, n_sec=20) MV_FTN = gen_FTN_data(MVs, range(len(fileNames)), filePath, fileNames, n_sec=20) print('Saving FTN files...') # Save FTN files import pickle with open(filePath + '\\EV_FTN.pickle', 'wb') as f: pickle.dump(EV_FTN, f) with open(filePath + '\\MV_FTN.pickle', 'wb') as f: pickle.dump(MV_FTN, f) else: print('FTN files found, loading FTN files') # Load FTN files ----------------------------------------------------- import pickle with open(filePath + '\\EV_FTN.pickle', 'rb') as f: EV_FTN = pickle.load(f) with open(filePath + '\\MV_FTN.pickle', 'rb') as f: MV_FTN = pickle.load(f) weather_df = EV_FTN['WeatherData.y'] # Data Cleaning weather_df = weather_df.iloc[start_step:] # weather_avg = weather_df.quantile(0.5,axis = 1) # weather_Q1 = weather_df.quantile(0.25,axis = 1) # weather_Q3 = weather_df.quantile(0.75,axis = 1) # weather_std = weather_df.std(axis = 1) # Divide TS data # ####### # Weather # ####### # # estimate the number of samples # T = 360*22 # 20 hours # t = 360*22 # 6 hours # i_step = 6*60 # 60 min import math n_samples = math.floor((T - t) / i_step) + 1 # weather_ts = np.empty((n_samples * weather_df.shape[1], t)) # NT format days = weather_df.columns for day_count, day in enumerate(days): for count, i in enumerate(range(0, T - t + 1, i_step)): ts = weather_df[day].loc[start_step + i:start_step + i + t - 1] ts = ts.values.reshape( 1, t) # col vector to row vector, shape(t,1) to (1,t) row = day_count * n_samples + count weather_ts[row, :] = ts # corresponding MV data # convert on/off switch controlled MV to PAA # n_seg is global parameter for key in MVs: if isOnOff[key]: MV = np.array(MV_FTN[key].T) # TN to NT format N = MV.shape[0] for i in range(N): # update MV values with PAA result # MV[i] = PAA(MV[i],n_seg,original_length=True) MV_FTN[key][MV_FTN[key].columns[i]] = PAA(MV[i], n_seg, original_length=True, print_=False) MVs_ts = np.empty( (len(MV_FTN), n_samples * MV_FTN[list(MV_FTN.keys())[0]].shape[1], t)) for MV_index, key in enumerate(MV_FTN): MV_df = MV_FTN[key] # set MV MV_df = MV_df.iloc[start_step:] # truncate data # Divide ts MV_ts = np.empty((n_samples * MV_df.shape[1], t)) # NT format days = MV_df.columns for day_count, day in enumerate(days): for count, i in enumerate(range(0, T - t + 1, i_step)): ts = MV_df[day].loc[start_step + i:start_step + i + t - 1] ts = ts.values.reshape( 1, t) # col vector to row vector, shape(t,1) to (1,t) row = day_count * n_samples + count MV_ts[row, :] = ts print('{} finished {}/{}'.format(key, MV_index + 1, len(MV_FTN))) MVs_ts[MV_index] = MV_ts return (weather_ts, MVs_ts)
def load_test_data(root_folder, testdata_name): ''' global parameters: EVs: list of HVAC external variable names MVs: list of HVAC manipulated variable names i_step: increment step size for TS window t T: Overall TS length t: TS window size start_step: data cleaning start step, crop out data before this time step n_samples = number of samples for TS matrix n_sec: one sample per n_sec seconds global parameters should be set before using this function ------------------------------------------------------------------------------- inputs: root_folder: root folder path of all datasets testdata_name: folder name of testing dataset outputs: test_EVs_ts: Divided TS EVs data, numpy array in NT format test_MVs_ts: Corresponding divided TS MVs data, numpy array in NT format ------------------------------------------------------------------------------- Loads FTN data(pickel file) from "root_folder/testdata_name" path. If no FTN data found, will generate one using function gen_FTN_data Then use the FTN file to generate weather_ts and MVs_ts ''' # ------------------------------------------------------------------------------ # Load testing dataset: # ------------------------------------------------------------------------------ filePath = root_folder + testdata_name os.chdir(filePath) fileNames = os.listdir(filePath) fileNames = [fname for fname in fileNames if fname[-4:] == '.csv'] # check if it's a csv file # Check if FTN files exist, if not, generate one: if not os.path.exists(filePath + '\\EV_FTN.pickle'): print('No FTN files found, generating one...') # Generate FTN files test_EV_FTN = gen_FTN_data(EVs, range(len(fileNames)), filePath, fileNames, n_sec=10) test_MV_FTN = gen_FTN_data(MVs, range(len(fileNames)), filePath, fileNames, n_sec=10) print('Saving FTN files...') # Save FTN files import pickle with open(filePath + '\\EV_FTN.pickle', 'wb') as f: pickle.dump(test_EV_FTN, f) with open(filePath + '\\MV_FTN.pickle', 'wb') as f: pickle.dump(test_MV_FTN, f) else: print('FTN files found, loading FTN files') # Load FTN files ----------------------------------------------------- import pickle with open(filePath + '\\EV_FTN.pickle', 'rb') as f: test_EV_FTN = pickle.load(f) with open(filePath + '\\MV_FTN.pickle', 'rb') as f: test_MV_FTN = pickle.load(f) print('{} FTN files loaded'.format(new_fault_dataset)) print('Testing dataset loaded') # Data cleaning ------------------------------------------------------ # truncate data for FTN in [test_EV_FTN, test_MV_FTN]: for key in FTN: FTN[key] = FTN[key].iloc[ start_step:] # start step given previously, same as training data # Divide TS to smaller intervals ------------------------------------- ''' # estimate the number of samples # T = 360*22 # 20 hours # t = 360*6 # 6 hours # i_step = 6*60 # 60 min # import math # n_samples = math.floor((T-t)/i_step) + 1 # ''' test_EVs_ts = np.empty( (len(test_EV_FTN), n_samples * test_EV_FTN[list(test_EV_FTN.keys())[0]].shape[1], t)) for test_EV_index, key in enumerate(test_EV_FTN): test_EV_df = test_EV_FTN[key] # set test_EV # already truncated, skip this # test_EV_df = test_EV_df.iloc[start_step:] # truncate data # Divide ts test_EV_ts = np.empty( (n_samples * test_EV_df.shape[1], t)) # NT format days = test_EV_df.columns for day_count, day in enumerate(days): for count, i in enumerate(range(0, T - t + 1, i_step)): ts = test_EV_df[day].loc[start_step + i:start_step + i + t - 1] ts = ts.values.reshape( 1, t) # col vector to row vector, shape(t,1) to (1,t) row = day_count * n_samples + count test_EV_ts[row, :] = ts print('{} finished {}/{}'.format(key, test_EV_index + 1, len(test_EV_FTN))) test_EVs_ts[test_EV_index] = test_EV_ts # corresponding MV data # convert on/off switch controlled MV to PAA # n_seg is global parameter for key in MVs: if isOnOff[key]: MV = np.array(test_MV_FTN[key].T) # TN to NT format N = MV.shape[0] for i in range(N): # update MV values with PAA result # MV[i] = PAA(MV[i],n_seg,original_length=True) test_MV_FTN[key][test_MV_FTN[key].columns[i]] = PAA( MV[i], n_seg, original_length=True, print_=False) test_MVs_ts = np.empty( (len(test_MV_FTN), n_samples * test_MV_FTN[list(test_MV_FTN.keys())[0]].shape[1], t)) for test_MV_index, key in enumerate(test_MV_FTN): test_MV_df = test_MV_FTN[key] # set test_MV # already truncated, skip this # test_MV_df = test_MV_df.iloc[start_step:] # truncate data # Divide ts test_MV_ts = np.empty( (n_samples * test_MV_df.shape[1], t)) # NT format days = test_MV_df.columns for day_count, day in enumerate(days): for count, i in enumerate(range(0, T - t + 1, i_step)): ts = test_MV_df[day].loc[start_step + i:start_step + i + t - 1] ts = ts.values.reshape( 1, t) # col vector to row vector, shape(t,1) to (1,t) row = day_count * n_samples + count test_MV_ts[row, :] = ts print('{} finished {}/{}'.format(key, test_MV_index + 1, len(test_MV_FTN))) test_MVs_ts[test_MV_index] = test_MV_ts return (test_EVs_ts, test_MVs_ts)
def load_test_data(testdata_name): # testdata_name = testdata_names[testdata_index] filePath = 'N:\\HVAC_ModelicaModel_Data\\' + testdata_name os.chdir(filePath) fileNames = os.listdir(filePath) if not os.path.exists(filePath + '\\test_EV_FTN.pickle'): # Generate FTN files test_EV_FTN = gen_FTN_data(EVs,range(len(fileNames)),filePath,fileNames) test_MV_FTN = gen_FTN_data(MVs,range(len(fileNames)),filePath,fileNames) # Save FTN files import pickle with open(filePath + '\\test_EV_FTN.pickle','wb') as f: pickle.dump(test_EV_FTN,f) with open(filePath + '\\test_MV_FTN.pickle','wb') as f: pickle.dump(test_MV_FTN,f) else: # Load FTN files ----------------------------------------------------- import pickle with open(filePath + '\\test_EV_FTN.pickle','rb') as f: test_EV_FTN = pickle.load(f) with open(filePath + '\\test_MV_FTN.pickle','rb') as f: test_MV_FTN = pickle.load(f) ''' These steps are going over the same work flow as we did for training data ''' # Data cleaning ------------------------------------------------------ # truncate data for FTN in [test_EV_FTN,test_MV_FTN]: for key in FTN: FTN[key] = FTN[key].iloc[start_step:] # start step given previously, same as training data # Divide TS to smaller intervals ------------------------------------- ''' # estimate the number of samples # T = 360*22 # 20 hours # t = 360*6 # 6 hours # i_step = 6*60 # 60 min # import math # n_samples = math.floor((T-t)/i_step) + 1 # ''' test_EVs_ts = np.empty((len(test_EV_FTN),n_samples*test_EV_FTN[list(test_EV_FTN.keys())[0]].shape[1],t)) for test_EV_index,key in enumerate(test_EV_FTN): test_EV_df = test_EV_FTN[key] # set test_EV # already truncated, skip this # test_EV_df = test_EV_df.iloc[start_step:] # truncate data # Divide ts test_EV_ts = np.empty((n_samples*test_EV_df.shape[1],t)) # NT format days = test_EV_df.columns for day_count,day in enumerate(days): for count,i in enumerate(range(0,T-t+1,i_step)): ts = test_EV_df[day].loc[start_step+i:start_step+i+t-1] ts = ts.values.reshape(1,t) # col vector to row vector, shape(t,1) to (1,t) row = day_count * n_samples + count test_EV_ts[row,:] = ts print('{} finished {}/{}'.format(key,test_EV_index+1,len(test_EV_FTN))) test_EVs_ts[test_EV_index] = test_EV_ts test_MVs_ts = np.empty((len(test_MV_FTN),n_samples*test_MV_FTN[list(test_MV_FTN.keys())[0]].shape[1],t)) for test_MV_index,key in enumerate(test_MV_FTN): test_MV_df = test_MV_FTN[key] # set test_MV # already truncated, skip this # test_MV_df = test_MV_df.iloc[start_step:] # truncate data # Divide ts test_MV_ts = np.empty((n_samples*test_MV_df.shape[1],t)) # NT format days = test_MV_df.columns for day_count,day in enumerate(days): for count,i in enumerate(range(0,T-t+1,i_step)): ts = test_MV_df[day].loc[start_step+i:start_step+i+t-1] ts = ts.values.reshape(1,t) # col vector to row vector, shape(t,1) to (1,t) row = day_count * n_samples + count test_MV_ts[row,:] = ts print('{} finished {}/{}'.format(key,test_MV_index+1,len(test_MV_FTN))) test_MVs_ts[test_MV_index] = test_MV_ts return(test_EVs_ts,test_MVs_ts)
''' # intiailze stacked_MV_FTN = dict() stacked_class_labels = dict() # stack data for index,dataset_name in enumerate(training_data_list): filePath = root_folder + dataset_name os.chdir(filePath) fileNames = os.listdir(filePath) fileNames = [fname for fname in fileNames if fname[-4:]=='.csv'] # check if it's a csv file # Check if FTN files exist, if not, generate one: if not os.path.exists(filePath + '\\EV_FTN.pickle'): print('No FTN files found, generating one...') # Generate FTN files EV_FTN = gen_FTN_data(EVs,range(len(fileNames)),filePath,fileNames,n_sec=10) MV_FTN = gen_FTN_data(MVs,range(len(fileNames)),filePath,fileNames,n_sec=10) print('Saving FTN files...') # Save FTN files import pickle with open(filePath + '\\EV_FTN.pickle','wb') as f: pickle.dump(EV_FTN,f) with open(filePath + '\\MV_FTN.pickle','wb') as f: pickle.dump(MV_FTN,f) else: print('FTN files found, loading FTN files') # Load FTN files ----------------------------------------------------- import pickle with open(filePath + '\\EV_FTN.pickle','rb') as f: EV_FTN = pickle.load(f)
def load_test_data(root_folder,testdata_name): ''' global parameters: EVs: list of HVAC external variable names MVs: list of HVAC manipulated variable names i_step: increment step size for TS window t T: Overall TS length t: TS window size start_step: data cleaning start step, crop out data before this time step n_samples = number of samples for TS matrix global parameters should be set before using this function ------------------------------------------------------------------------------- inputs: root_folder: root folder path of all datasets testdata_name: folder name of testing dataset outputs: test_EVs_ts: Divided TS EVs data, numpy array in NT format test_MVs_ts: Corresponding divided TS MVs data, numpy array in NT format ------------------------------------------------------------------------------- Loads FTN data(pickel file) from "root_folder/testdata_name" path. If no FTN data found, will generate one using function gen_FTN_data Then use the FTN file to generate weather_ts and MVs_ts ''' # testdata_name = testdata_names[testdata_index] filePath = root_folder + testdata_name # filePath = 'N:\\HVAC_ModelicaModel_Data\\' + testdata_name os.chdir(filePath) fileNames = os.listdir(filePath) fileNames = [fname for fname in fileNames if fname[-4:]=='.csv'] # check if it's a csv file if not os.path.exists(filePath + '\\EV_FTN.pickle'): print('No FTN files found, generating one...') # Generate FTN files test_EV_FTN = gen_FTN_data(EVs,range(len(fileNames)),filePath,fileNames) test_MV_FTN = gen_FTN_data(MVs,range(len(fileNames)),filePath,fileNames) print('Saving FTN files...') # Save FTN files import pickle with open(filePath + '\\EV_FTN.pickle','wb') as f: pickle.dump(test_EV_FTN,f) with open(filePath + '\\MV_FTN.pickle','wb') as f: pickle.dump(test_MV_FTN,f) else: print('FTN files found, loading FTN files') # Load FTN files ----------------------------------------------------- import pickle with open(filePath + '\\EV_FTN.pickle','rb') as f: test_EV_FTN = pickle.load(f) with open(filePath + '\\MV_FTN.pickle','rb') as f: test_MV_FTN = pickle.load(f) ''' These steps are going over the same work flow as we did for training data ''' # Data cleaning ------------------------------------------------------ # truncate data for FTN in [test_EV_FTN,test_MV_FTN]: for key in FTN: FTN[key] = FTN[key].iloc[start_step:] # start step given previously, same as training data # Divide TS to smaller intervals ------------------------------------- ''' # estimate the number of samples # T = 360*22 # 20 hours # t = 360*6 # 6 hours # i_step = 6*60 # 60 min # import math # n_samples = math.floor((T-t)/i_step) + 1 # ''' test_EVs_ts = np.empty((len(test_EV_FTN),n_samples*test_EV_FTN[list(test_EV_FTN.keys())[0]].shape[1],t)) for test_EV_index,key in enumerate(test_EV_FTN): test_EV_df = test_EV_FTN[key] # set test_EV # already truncated, skip this # test_EV_df = test_EV_df.iloc[start_step:] # truncate data # Divide ts test_EV_ts = np.empty((n_samples*test_EV_df.shape[1],t)) # NT format days = test_EV_df.columns for day_count,day in enumerate(days): for count,i in enumerate(range(0,T-t+1,i_step)): ts = test_EV_df[day].loc[start_step+i:start_step+i+t-1] ts = ts.values.reshape(1,t) # col vector to row vector, shape(t,1) to (1,t) row = day_count * n_samples + count test_EV_ts[row,:] = ts print('{} finished {}/{}'.format(key,test_EV_index+1,len(test_EV_FTN))) test_EVs_ts[test_EV_index] = test_EV_ts test_MVs_ts = np.empty((len(test_MV_FTN),n_samples*test_MV_FTN[list(test_MV_FTN.keys())[0]].shape[1],t)) for test_MV_index,key in enumerate(test_MV_FTN): test_MV_df = test_MV_FTN[key] # set test_MV # already truncated, skip this # test_MV_df = test_MV_df.iloc[start_step:] # truncate data # Divide ts test_MV_ts = np.empty((n_samples*test_MV_df.shape[1],t)) # NT format days = test_MV_df.columns for day_count,day in enumerate(days): for count,i in enumerate(range(0,T-t+1,i_step)): ts = test_MV_df[day].loc[start_step+i:start_step+i+t-1] ts = ts.values.reshape(1,t) # col vector to row vector, shape(t,1) to (1,t) row = day_count * n_samples + count test_MV_ts[row,:] = ts print('{} finished {}/{}'.format(key,test_MV_index+1,len(test_MV_FTN))) test_MVs_ts[test_MV_index] = test_MV_ts return(test_EVs_ts,test_MVs_ts)