def load_training_data(root_folder, dataset_name):
    '''
    global parameters:
        EVs: list of HVAC external variable names
        MVs: list of HVAC manipulated variable names
        i_step: increment step size for TS window t
        T: Overall TS length
        t: TS window size
        start_step: data cleaning start step, crop out data before this time step
        n_samples = number of samples for TS matrix
        n_seg: number of segments for PAA conversion
        n_sec: one sample per n_sec seconds
    global parameters should be set before using this function
    -------------------------------------------------------------------------------    
    inputs:
        root_folder: root folder path of all datasets
        dataset_name: folder name of dataset
    outputs:
        weather_ts: Divided TS weather data, numpy array in NT format
        MVs_ts: Corresponding divided TS MVs data, numpy array in NT format
    -------------------------------------------------------------------------------
    Loads FTN data(pickel file) from "root_folder/dataset_name" path.
    If no FTN data found, will generate one using function gen_FTN_data
    Then use the FTN file to generate weather_ts and MVs_ts
    '''

    # Load data files
    import os
    # dataset_name = '130_HVACv4a_Boston+SmallOffice_Workday'
    filePath = root_folder + dataset_name
    os.chdir(filePath)
    fileNames = os.listdir(filePath)
    fileNames = [fname for fname in fileNames
                 if fname[-4:] == '.csv']  # check if it's a csv file

    # Check if FTN files exist, if not, generate one:
    if not os.path.exists(filePath + '\\EV_FTN.pickle'):
        print('No FTN files found, generating one...')
        # Generate FTN files
        EV_FTN = gen_FTN_data(EVs,
                              range(len(fileNames)),
                              filePath,
                              fileNames,
                              n_sec=20)
        MV_FTN = gen_FTN_data(MVs,
                              range(len(fileNames)),
                              filePath,
                              fileNames,
                              n_sec=20)

        print('Saving FTN files...')
        # Save FTN files
        import pickle
        with open(filePath + '\\EV_FTN.pickle', 'wb') as f:
            pickle.dump(EV_FTN, f)
        with open(filePath + '\\MV_FTN.pickle', 'wb') as f:
            pickle.dump(MV_FTN, f)
    else:
        print('FTN files found, loading FTN files')
        # Load FTN files -----------------------------------------------------
        import pickle
        with open(filePath + '\\EV_FTN.pickle', 'rb') as f:
            EV_FTN = pickle.load(f)
        with open(filePath + '\\MV_FTN.pickle', 'rb') as f:
            MV_FTN = pickle.load(f)

    weather_df = EV_FTN['WeatherData.y']

    # Data Cleaning
    weather_df = weather_df.iloc[start_step:]
    # weather_avg = weather_df.quantile(0.5,axis = 1)
    # weather_Q1 = weather_df.quantile(0.25,axis = 1)
    # weather_Q3 = weather_df.quantile(0.75,axis = 1)
    # weather_std = weather_df.std(axis = 1)
    # Divide TS data
    # #######
    # Weather
    # #######
    # # estimate the number of samples
    # T = 360*22 # 20 hours
    # t = 360*22 # 6 hours
    # i_step = 6*60 # 60 min
    import math
    n_samples = math.floor((T - t) / i_step) + 1  #

    weather_ts = np.empty((n_samples * weather_df.shape[1], t))  # NT format
    days = weather_df.columns
    for day_count, day in enumerate(days):
        for count, i in enumerate(range(0, T - t + 1, i_step)):
            ts = weather_df[day].loc[start_step + i:start_step + i + t - 1]
            ts = ts.values.reshape(
                1, t)  # col vector to row vector, shape(t,1) to (1,t)
            row = day_count * n_samples + count
            weather_ts[row, :] = ts

    # corresponding MV data

    # convert on/off switch controlled MV to PAA
    # n_seg is global parameter
    for key in MVs:
        if isOnOff[key]:
            MV = np.array(MV_FTN[key].T)  # TN to NT format
            N = MV.shape[0]
            for i in range(N):  # update MV values with PAA result
                # MV[i] = PAA(MV[i],n_seg,original_length=True)
                MV_FTN[key][MV_FTN[key].columns[i]] = PAA(MV[i],
                                                          n_seg,
                                                          original_length=True,
                                                          print_=False)

    MVs_ts = np.empty(
        (len(MV_FTN), n_samples * MV_FTN[list(MV_FTN.keys())[0]].shape[1], t))

    for MV_index, key in enumerate(MV_FTN):
        MV_df = MV_FTN[key]  # set MV
        MV_df = MV_df.iloc[start_step:]  # truncate data
        # Divide ts
        MV_ts = np.empty((n_samples * MV_df.shape[1], t))  # NT format
        days = MV_df.columns
        for day_count, day in enumerate(days):
            for count, i in enumerate(range(0, T - t + 1, i_step)):
                ts = MV_df[day].loc[start_step + i:start_step + i + t - 1]
                ts = ts.values.reshape(
                    1, t)  # col vector to row vector, shape(t,1) to (1,t)
                row = day_count * n_samples + count
                MV_ts[row, :] = ts
        print('{} finished {}/{}'.format(key, MV_index + 1, len(MV_FTN)))
        MVs_ts[MV_index] = MV_ts

    return (weather_ts, MVs_ts)
def load_test_data(root_folder, testdata_name):
    '''
    global parameters:
        EVs: list of HVAC external variable names
        MVs: list of HVAC manipulated variable names
        i_step: increment step size for TS window t
        T: Overall TS length
        t: TS window size
        start_step: data cleaning start step, crop out data before this time step
        n_samples = number of samples for TS matrix
        n_sec: one sample per n_sec seconds
    global parameters should be set before using this function
    -------------------------------------------------------------------------------    
    inputs:
        root_folder: root folder path of all datasets
        testdata_name: folder name of testing dataset
    outputs:
        test_EVs_ts: Divided TS EVs data, numpy array in NT format
        test_MVs_ts: Corresponding divided TS MVs data, numpy array in NT format
    -------------------------------------------------------------------------------
    Loads FTN data(pickel file) from "root_folder/testdata_name" path.
    If no FTN data found, will generate one using function gen_FTN_data
    Then use the FTN file to generate weather_ts and MVs_ts
    '''
    # ------------------------------------------------------------------------------
    # Load testing dataset:
    # ------------------------------------------------------------------------------
    filePath = root_folder + testdata_name
    os.chdir(filePath)
    fileNames = os.listdir(filePath)
    fileNames = [fname for fname in fileNames
                 if fname[-4:] == '.csv']  # check if it's a csv file

    # Check if FTN files exist, if not, generate one:
    if not os.path.exists(filePath + '\\EV_FTN.pickle'):
        print('No FTN files found, generating one...')
        # Generate FTN files
        test_EV_FTN = gen_FTN_data(EVs,
                                   range(len(fileNames)),
                                   filePath,
                                   fileNames,
                                   n_sec=10)
        test_MV_FTN = gen_FTN_data(MVs,
                                   range(len(fileNames)),
                                   filePath,
                                   fileNames,
                                   n_sec=10)

        print('Saving FTN files...')
        # Save FTN files
        import pickle
        with open(filePath + '\\EV_FTN.pickle', 'wb') as f:
            pickle.dump(test_EV_FTN, f)
        with open(filePath + '\\MV_FTN.pickle', 'wb') as f:
            pickle.dump(test_MV_FTN, f)
    else:
        print('FTN files found, loading FTN files')
        # Load FTN files -----------------------------------------------------
        import pickle
        with open(filePath + '\\EV_FTN.pickle', 'rb') as f:
            test_EV_FTN = pickle.load(f)
        with open(filePath + '\\MV_FTN.pickle', 'rb') as f:
            test_MV_FTN = pickle.load(f)

    print('{} FTN files loaded'.format(new_fault_dataset))
    print('Testing dataset loaded')

    # Data cleaning ------------------------------------------------------
    # truncate data
    for FTN in [test_EV_FTN, test_MV_FTN]:
        for key in FTN:
            FTN[key] = FTN[key].iloc[
                start_step:]  # start step given previously, same as training data

    # Divide TS to smaller intervals -------------------------------------
    '''
    # estimate the number of samples
    # T = 360*22 # 20 hours
    # t = 360*6 # 6 hours
    # i_step = 6*60 # 60 min
    # import math
    # n_samples = math.floor((T-t)/i_step) + 1 # 
    '''

    test_EVs_ts = np.empty(
        (len(test_EV_FTN),
         n_samples * test_EV_FTN[list(test_EV_FTN.keys())[0]].shape[1], t))

    for test_EV_index, key in enumerate(test_EV_FTN):
        test_EV_df = test_EV_FTN[key]  # set test_EV

        # already truncated, skip this
        # test_EV_df = test_EV_df.iloc[start_step:] # truncate data

        # Divide ts
        test_EV_ts = np.empty(
            (n_samples * test_EV_df.shape[1], t))  # NT format
        days = test_EV_df.columns
        for day_count, day in enumerate(days):
            for count, i in enumerate(range(0, T - t + 1, i_step)):
                ts = test_EV_df[day].loc[start_step + i:start_step + i + t - 1]
                ts = ts.values.reshape(
                    1, t)  # col vector to row vector, shape(t,1) to (1,t)
                row = day_count * n_samples + count
                test_EV_ts[row, :] = ts
        print('{} finished {}/{}'.format(key, test_EV_index + 1,
                                         len(test_EV_FTN)))
        test_EVs_ts[test_EV_index] = test_EV_ts

    # corresponding MV data

    # convert on/off switch controlled MV to PAA
    # n_seg is global parameter
    for key in MVs:
        if isOnOff[key]:
            MV = np.array(test_MV_FTN[key].T)  # TN to NT format
            N = MV.shape[0]
            for i in range(N):  # update MV values with PAA result
                # MV[i] = PAA(MV[i],n_seg,original_length=True)
                test_MV_FTN[key][test_MV_FTN[key].columns[i]] = PAA(
                    MV[i], n_seg, original_length=True, print_=False)

    test_MVs_ts = np.empty(
        (len(test_MV_FTN),
         n_samples * test_MV_FTN[list(test_MV_FTN.keys())[0]].shape[1], t))

    for test_MV_index, key in enumerate(test_MV_FTN):
        test_MV_df = test_MV_FTN[key]  # set test_MV

        # already truncated, skip this
        # test_MV_df = test_MV_df.iloc[start_step:] # truncate data

        # Divide ts
        test_MV_ts = np.empty(
            (n_samples * test_MV_df.shape[1], t))  # NT format
        days = test_MV_df.columns
        for day_count, day in enumerate(days):
            for count, i in enumerate(range(0, T - t + 1, i_step)):
                ts = test_MV_df[day].loc[start_step + i:start_step + i + t - 1]
                ts = ts.values.reshape(
                    1, t)  # col vector to row vector, shape(t,1) to (1,t)
                row = day_count * n_samples + count
                test_MV_ts[row, :] = ts
        print('{} finished {}/{}'.format(key, test_MV_index + 1,
                                         len(test_MV_FTN)))
        test_MVs_ts[test_MV_index] = test_MV_ts

    return (test_EVs_ts, test_MVs_ts)
예제 #3
0
def load_test_data(testdata_name):
    # testdata_name = testdata_names[testdata_index]
    
    filePath = 'N:\\HVAC_ModelicaModel_Data\\' + testdata_name
    os.chdir(filePath)
    fileNames = os.listdir(filePath)
    
    if not os.path.exists(filePath + '\\test_EV_FTN.pickle'):
        # Generate FTN files
        test_EV_FTN = gen_FTN_data(EVs,range(len(fileNames)),filePath,fileNames)
        test_MV_FTN = gen_FTN_data(MVs,range(len(fileNames)),filePath,fileNames)
        
        # Save FTN files
        import pickle
        with open(filePath + '\\test_EV_FTN.pickle','wb') as f:
            pickle.dump(test_EV_FTN,f)
        with open(filePath + '\\test_MV_FTN.pickle','wb') as f:
            pickle.dump(test_MV_FTN,f)
    else:
        # Load FTN files -----------------------------------------------------
        import pickle
        with open(filePath + '\\test_EV_FTN.pickle','rb') as f:
            test_EV_FTN = pickle.load(f)
        with open(filePath + '\\test_MV_FTN.pickle','rb') as f:
            test_MV_FTN = pickle.load(f)
    
    '''
    These steps are going over the same work flow as we did for training data
    '''
    # Data cleaning ------------------------------------------------------
    # truncate data
    for FTN in [test_EV_FTN,test_MV_FTN]:
        for key in FTN:
            FTN[key] = FTN[key].iloc[start_step:] # start step given previously, same as training data
    
    # Divide TS to smaller intervals -------------------------------------
    '''
    # estimate the number of samples
    # T = 360*22 # 20 hours
    # t = 360*6 # 6 hours
    # i_step = 6*60 # 60 min
    # import math
    # n_samples = math.floor((T-t)/i_step) + 1 # 
    '''
    
    
    test_EVs_ts = np.empty((len(test_EV_FTN),n_samples*test_EV_FTN[list(test_EV_FTN.keys())[0]].shape[1],t))
    
    for test_EV_index,key in enumerate(test_EV_FTN):
        test_EV_df = test_EV_FTN[key] # set test_EV
        
        # already truncated, skip this
        # test_EV_df = test_EV_df.iloc[start_step:] # truncate data
        
        # Divide ts
        test_EV_ts = np.empty((n_samples*test_EV_df.shape[1],t)) # NT format
        days = test_EV_df.columns
        for day_count,day in enumerate(days):
            for count,i in enumerate(range(0,T-t+1,i_step)):
                ts = test_EV_df[day].loc[start_step+i:start_step+i+t-1]
                ts = ts.values.reshape(1,t) # col vector to row vector, shape(t,1) to (1,t)
                row = day_count * n_samples + count
                test_EV_ts[row,:] = ts
        print('{} finished {}/{}'.format(key,test_EV_index+1,len(test_EV_FTN)))
        test_EVs_ts[test_EV_index] = test_EV_ts
        
    
    
    test_MVs_ts = np.empty((len(test_MV_FTN),n_samples*test_MV_FTN[list(test_MV_FTN.keys())[0]].shape[1],t))
    
    for test_MV_index,key in enumerate(test_MV_FTN):
        test_MV_df = test_MV_FTN[key] # set test_MV
        
        # already truncated, skip this
        # test_MV_df = test_MV_df.iloc[start_step:] # truncate data
        
        # Divide ts
        test_MV_ts = np.empty((n_samples*test_MV_df.shape[1],t)) # NT format
        days = test_MV_df.columns
        for day_count,day in enumerate(days):
            for count,i in enumerate(range(0,T-t+1,i_step)):
                ts = test_MV_df[day].loc[start_step+i:start_step+i+t-1]
                ts = ts.values.reshape(1,t) # col vector to row vector, shape(t,1) to (1,t)
                row = day_count * n_samples + count
                test_MV_ts[row,:] = ts
        print('{} finished {}/{}'.format(key,test_MV_index+1,len(test_MV_FTN)))
        test_MVs_ts[test_MV_index] = test_MV_ts
    
    return(test_EVs_ts,test_MVs_ts)
'''
# intiailze
stacked_MV_FTN = dict()
stacked_class_labels = dict()
# stack data
for index,dataset_name in enumerate(training_data_list):
    filePath = root_folder + dataset_name
    os.chdir(filePath)
    fileNames = os.listdir(filePath)
    fileNames = [fname for fname in fileNames if fname[-4:]=='.csv'] # check if it's a csv file
    
    # Check if FTN files exist, if not, generate one:
    if not os.path.exists(filePath + '\\EV_FTN.pickle'):
        print('No FTN files found, generating one...')
        # Generate FTN files
        EV_FTN = gen_FTN_data(EVs,range(len(fileNames)),filePath,fileNames,n_sec=10)
        MV_FTN = gen_FTN_data(MVs,range(len(fileNames)),filePath,fileNames,n_sec=10)
        
        print('Saving FTN files...')
        # Save FTN files
        import pickle
        with open(filePath + '\\EV_FTN.pickle','wb') as f:
            pickle.dump(EV_FTN,f)
        with open(filePath + '\\MV_FTN.pickle','wb') as f:
            pickle.dump(MV_FTN,f)
    else:
        print('FTN files found, loading FTN files')
        # Load FTN files -----------------------------------------------------
        import pickle
        with open(filePath + '\\EV_FTN.pickle','rb') as f:
            EV_FTN = pickle.load(f)
예제 #5
0
def load_test_data(root_folder,testdata_name):
    '''
    global parameters:
        EVs: list of HVAC external variable names
        MVs: list of HVAC manipulated variable names
        i_step: increment step size for TS window t
        T: Overall TS length
        t: TS window size
        start_step: data cleaning start step, crop out data before this time step
        n_samples = number of samples for TS matrix
    global parameters should be set before using this function
    -------------------------------------------------------------------------------    
    inputs:
        root_folder: root folder path of all datasets
        testdata_name: folder name of testing dataset
    outputs:
        test_EVs_ts: Divided TS EVs data, numpy array in NT format
        test_MVs_ts: Corresponding divided TS MVs data, numpy array in NT format
    -------------------------------------------------------------------------------
    Loads FTN data(pickel file) from "root_folder/testdata_name" path.
    If no FTN data found, will generate one using function gen_FTN_data
    Then use the FTN file to generate weather_ts and MVs_ts
    '''
    # testdata_name = testdata_names[testdata_index]
    
    filePath = root_folder + testdata_name
    # filePath = 'N:\\HVAC_ModelicaModel_Data\\' + testdata_name
    os.chdir(filePath)
    fileNames = os.listdir(filePath)
    fileNames = [fname for fname in fileNames if fname[-4:]=='.csv'] # check if it's a csv file
    
    if not os.path.exists(filePath + '\\EV_FTN.pickle'):
        print('No FTN files found, generating one...')
        # Generate FTN files
        test_EV_FTN = gen_FTN_data(EVs,range(len(fileNames)),filePath,fileNames)
        test_MV_FTN = gen_FTN_data(MVs,range(len(fileNames)),filePath,fileNames)
        
        print('Saving FTN files...')
        # Save FTN files
        import pickle
        with open(filePath + '\\EV_FTN.pickle','wb') as f:
            pickle.dump(test_EV_FTN,f)
        with open(filePath + '\\MV_FTN.pickle','wb') as f:
            pickle.dump(test_MV_FTN,f)
    else:
        print('FTN files found, loading FTN files')
        # Load FTN files -----------------------------------------------------
        import pickle
        with open(filePath + '\\EV_FTN.pickle','rb') as f:
            test_EV_FTN = pickle.load(f)
        with open(filePath + '\\MV_FTN.pickle','rb') as f:
            test_MV_FTN = pickle.load(f)
    
    '''
    These steps are going over the same work flow as we did for training data
    '''
    # Data cleaning ------------------------------------------------------
    # truncate data
    for FTN in [test_EV_FTN,test_MV_FTN]:
        for key in FTN:
            FTN[key] = FTN[key].iloc[start_step:] # start step given previously, same as training data
    
    # Divide TS to smaller intervals -------------------------------------
    '''
    # estimate the number of samples
    # T = 360*22 # 20 hours
    # t = 360*6 # 6 hours
    # i_step = 6*60 # 60 min
    # import math
    # n_samples = math.floor((T-t)/i_step) + 1 # 
    '''
    
    
    test_EVs_ts = np.empty((len(test_EV_FTN),n_samples*test_EV_FTN[list(test_EV_FTN.keys())[0]].shape[1],t))
    
    for test_EV_index,key in enumerate(test_EV_FTN):
        test_EV_df = test_EV_FTN[key] # set test_EV
        
        # already truncated, skip this
        # test_EV_df = test_EV_df.iloc[start_step:] # truncate data
        
        # Divide ts
        test_EV_ts = np.empty((n_samples*test_EV_df.shape[1],t)) # NT format
        days = test_EV_df.columns
        for day_count,day in enumerate(days):
            for count,i in enumerate(range(0,T-t+1,i_step)):
                ts = test_EV_df[day].loc[start_step+i:start_step+i+t-1]
                ts = ts.values.reshape(1,t) # col vector to row vector, shape(t,1) to (1,t)
                row = day_count * n_samples + count
                test_EV_ts[row,:] = ts
        print('{} finished {}/{}'.format(key,test_EV_index+1,len(test_EV_FTN)))
        test_EVs_ts[test_EV_index] = test_EV_ts
        
    
    
    test_MVs_ts = np.empty((len(test_MV_FTN),n_samples*test_MV_FTN[list(test_MV_FTN.keys())[0]].shape[1],t))
    
    for test_MV_index,key in enumerate(test_MV_FTN):
        test_MV_df = test_MV_FTN[key] # set test_MV
        
        # already truncated, skip this
        # test_MV_df = test_MV_df.iloc[start_step:] # truncate data
        
        # Divide ts
        test_MV_ts = np.empty((n_samples*test_MV_df.shape[1],t)) # NT format
        days = test_MV_df.columns
        for day_count,day in enumerate(days):
            for count,i in enumerate(range(0,T-t+1,i_step)):
                ts = test_MV_df[day].loc[start_step+i:start_step+i+t-1]
                ts = ts.values.reshape(1,t) # col vector to row vector, shape(t,1) to (1,t)
                row = day_count * n_samples + count
                test_MV_ts[row,:] = ts
        print('{} finished {}/{}'.format(key,test_MV_index+1,len(test_MV_FTN)))
        test_MVs_ts[test_MV_index] = test_MV_ts
    
    return(test_EVs_ts,test_MVs_ts)