def main(): look_back = 20 # number of previous timestamp used for training n_columns = 15 # total columns n_labels = 6 # number of labels split_ratio = 0.8 # train & test data split ratio file_list_train = glob.glob('preprocessed_data/train/*.csv') file_list_test = glob.glob('preprocessed_data/test/*.csv') file = open('results/Single_MLP_2.txt', 'w') sum_Smape = 0 sum_Smape_PM25 = 0 sum_Smape_PM10 = 0 sum_Smape_NO2 = 0 sum_Smape_CO = 0 sum_Smape_O3 = 0 sum_Smape_SO2 = 0 for i in range(len(file_list_train)): locals()['dataset_train' + str(i)], locals()['scaled_train' + str(i)], locals()[ 'scaler_train' + str(i)] = helper_funcs.load_dataset(file_list_train[i]) locals()['dataset_test' + str(i)], locals()['scaled_test' + str(i)], locals()[ 'scaler_test' + str(i)] = helper_funcs.load_dataset(file_list_test[i]) # split into train and test sets locals()['train_X' + str(i)], locals()['train_y' + str(i)] = helper_funcs.split_dataset( locals()['scaled_train' + str(i)], look_back, n_columns, n_labels) locals()['test_X' + str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset( locals()['scaled_test' + str(i)], look_back, n_columns, n_labels) model = build_model(locals()['train_X' + str(i)]) import time start_time = time.time() # fit network history = model.fit(locals()['train_X' + str(i)], locals()['train_y' + str(i)], epochs=40, batch_size=120, validation_data=(locals()['test_X' + str(i)], locals()['test_y' + str(i)]), verbose=2, shuffle=False, callbacks=[ keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=2, mode='min')] ) end_time = time.time() print('--- %s seconds ---' % (end_time - start_time)) # plot history # plt.plot(history.history['loss'], label='train') # plt.plot(history.history['val_loss'], label='test') # plt.legend() # plt.show() # make a prediction y_predict = model.predict(locals()['test_X' + str(i)]) # results = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, # look_back, n_columns, n_labels, locals()['scaler' + str(i)]) locals()['Smape' + str(i)] = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)]) locals()['Smape_PM25' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 0) locals()['Smape_PM10' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 1) locals()['Smape_NO2' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 2) locals()['Smape_CO' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 3) locals()['Smape_O3' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 4) locals()['Smape_SO2' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 5) file.write('Current file index is: ' + str(i) + '\n') file.write('Smape:' + ' ' + str(locals()['Smape' + str(i)]) + '\n') file.write('Smape_PM25:' + ' ' + str(locals()['Smape_PM25' + str(i)]) + '\n') file.write('Smape_PM10:' + ' ' + str(locals()['Smape_PM10' + str(i)]) + '\n') file.write('Smape_NO2:' + ' ' + str(locals()['Smape_NO2' + str(i)]) + '\n') file.write('Smape_CO:' + ' ' + str(locals()['Smape_CO' + str(i)]) + '\n') file.write('Smape_O3:' + ' ' + str(locals()['Smape_O3' + str(i)]) + '\n') file.write('Smape_SO2:' + ' ' + str(locals()['Smape_SO2' + str(i)]) + '\n') file.write('\n') sum_Smape = sum_Smape + locals()['Smape' + str(i)] sum_Smape_PM25 = sum_Smape_PM25 + locals()['Smape_PM25' + str(i)] sum_Smape_PM10 = sum_Smape_PM10 + locals()['Smape_PM10' + str(i)] sum_Smape_NO2 = sum_Smape_NO2 + locals()['Smape_NO2' + str(i)] sum_Smape_CO = sum_Smape_CO + locals()['Smape_CO' + str(i)] sum_Smape_O3 = sum_Smape_O3 + locals()['Smape_O3' + str(i)] sum_Smape_SO2 = sum_Smape_SO2 + locals()['Smape_SO2' + str(i)] file.write('avg_Smape: ' + str(sum_Smape / len(file_list_test)) + '\n') file.write('avg_Smape_PM25: ' + str(sum_Smape_PM25 / len(file_list_test)) + '\n') file.write('avg_Smape_PM10: ' + str(sum_Smape_PM10 / len(file_list_test)) + '\n') file.write('avg_Smape_NO2: ' + str(sum_Smape_NO2 / len(file_list_test)) + '\n') file.write('avg_Smape_CO: ' + str(sum_Smape_CO / len(file_list_test)) + '\n') file.write('avg_Smape_O3: ' + str(sum_Smape_O3 / len(file_list_test)) + '\n') file.write('avg_Smape_SO2: ' + str(sum_Smape_SO2 / len(file_list_test)) + '\n') file.write('training time:' + str(end_time - start_time))
def main(): # network parameters task_num = 9 lstm_layer = 64 drop = 0.2 r_drop = 0.2 l2_value = 0.001 shared_layer = 576 dense_num = 64 look_back = 20 # number of previous timestamp used for training n_columns = 15 # total columns n_labels = 6 # number of labels split_ratio = 0.8 # train & test data split ratio # trainX_list = [] # trainy_list = [] # testX_list = [] # testy_list = [] file_list_train = glob.glob('preprocessed_data/train/*.csv') file_list_test = glob.glob('preprocessed_data/test/*.csv') # path = r'data/US/market/merged_data' # allFiles = glob.glob(path + "/*.csv") with open('train_combined.csv', 'wb') as outfile: for i, fname in enumerate(file_list_train): with open(fname, 'rb') as infile: if i != 0: infile.readline() # Throw away header on all but first file # Block copy rest of file from input to output without parsing shutil.copyfileobj(infile, outfile) print(fname + " has been imported.") train_data,scaled,scaler =helper_funcs.load_dataset('train_combined.csv') trainX,trainy = helper_funcs.split_dataset(scaled,look_back,n_columns, n_labels) file = open('results/globalAtt_1.txt', 'w') sum_Smape = 0 sum_Smape_PM25 = 0 sum_Smape_PM10 = 0 sum_Smape_NO2 = 0 sum_Smape_CO = 0 sum_Smape_O3 = 0 sum_Smape_SO2 = 0 for i in range(len(file_list_train)): # train_data = 'data/preprocessed_data/train/bj_huairou.csv' # test_data = 'data/preprocessed_data/test/bj_huairou_201805.csv' # locals()['dataset_train' + str(i)], locals()['scaled_train' + str(i)], locals()[ # 'scaler_train' + str(i)] = helper_funcs.load_dataset(file_list_train[i]) locals()['dataset_test' + str(i)], locals()['scaled_test' + str(i)], locals()[ 'scaler_test' + str(i)] = helper_funcs.load_dataset(file_list_test[i]) # split into train and test sets # locals()['train_X' + str(i)], locals()['train_y' + str(i)] = helper_funcs.split_dataset( # locals()['scaled_train' + str(i)], look_back, n_columns, n_labels) locals()['test_X' + str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset( locals()['scaled_test' + str(i)], look_back, n_columns, n_labels) model = build_model(trainX,lstm_layer, drop, r_drop, l2_value, dense_num, n_labels) import time start_time = time.time() # fit network history = model.fit(trainX, trainy, epochs=100, batch_size=120, validation_data=(locals()['test_X' + str(i)], locals()['test_y' + str(i)]), verbose=2, shuffle=False, callbacks=[ keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=2, mode='min')] ) end_time = time.time() print('--- %s seconds ---' % (end_time - start_time)) # plot history # plt.plot(history.history['loss'], label='train') # plt.plot(history.history['val_loss'], label='test') # plt.legend() # plt.show() # make a prediction y_predict = model.predict(locals()['test_X' + str(i)]) # results = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler' + str(i)]) locals()['Smape' + str(i)] = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)]) locals()['Smape_PM25' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 0) locals()['Smape_PM10' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 1) locals()['Smape_NO2' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 2) locals()['Smape_CO' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 3) locals()['Smape_O3' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 4) locals()['Smape_SO2' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler_test' + str(i)], 5) file.write('Current file index is: ' + str(i) + '\n') file.write('Smape:' + ' ' + str(locals()['Smape' + str(i)]) + '\n') file.write('Smape_PM25:' + ' ' + str(locals()['Smape_PM25' + str(i)]) + '\n') file.write('Smape_PM10:' + ' ' + str(locals()['Smape_PM10' + str(i)]) + '\n') file.write('Smape_NO2:' + ' ' + str(locals()['Smape_NO2' + str(i)]) + '\n') file.write('Smape_CO:' + ' ' + str(locals()['Smape_CO' + str(i)]) + '\n') file.write('Smape_O3:' + ' ' + str(locals()['Smape_O3' + str(i)]) + '\n') file.write('Smape_SO2:' + ' ' + str(locals()['Smape_SO2' + str(i)]) + '\n') file.write('\n') sum_Smape = sum_Smape + locals()['Smape' + str(i)] sum_Smape_PM25 = sum_Smape_PM25 + locals()['Smape_PM25' + str(i)] sum_Smape_PM10 = sum_Smape_PM10 + locals()['Smape_PM10' + str(i)] sum_Smape_NO2 = sum_Smape_NO2 + locals()['Smape_NO2' + str(i)] sum_Smape_CO = sum_Smape_CO + locals()['Smape_CO' + str(i)] sum_Smape_O3 = sum_Smape_O3 + locals()['Smape_O3' + str(i)] sum_Smape_SO2 = sum_Smape_SO2 + locals()['Smape_SO2' + str(i)] file.write('avg_Smape: ' + str(sum_Smape / len(file_list_test)) + '\n') file.write('avg_Smape_PM25: ' + str(sum_Smape_PM25 / len(file_list_test)) + '\n') file.write('avg_Smape_PM10: ' + str(sum_Smape_PM10 / len(file_list_test)) + '\n') file.write('avg_Smape_NO2: ' + str(sum_Smape_NO2 / len(file_list_test)) + '\n') file.write('avg_Smape_CO: ' + str(sum_Smape_CO / len(file_list_test)) + '\n') file.write('avg_Smape_O3: ' + str(sum_Smape_O3 / len(file_list_test)) + '\n') file.write('avg_Smape_SO2: ' + str(sum_Smape_SO2 / len(file_list_test)) + '\n') file.write('training time:' + str(end_time - start_time))
def main(): # network parameters task_num = 6 con_layer1 = 128 # for 6 users # con_layer1 = 256 con_layer1_filter = 1 con_layer2 = 64 con_layer2_filter = 4 lstm_layer = 64 drop = 0.2 r_drop = 0.2 l2_value = 0.001 shared_layer = 576 dense_num = 64 look_back = 30 # number of previous timestamp used for training n_columns = 276 # total columns n_labels = 51 # number of labels split_ratio = 0.8 # train & test data split ratio trainX_list = [] trainy_list = [] testX_list = [] testy_list = [] file_list = glob.glob('../data_csv/train/6users/*.csv') for i in range(len(file_list)): locals()['dataset' + str(i)] = file_list[i] locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[ 'scaler' + str(i)] = helper_funcs.load_dataset(locals()['dataset' + str(i)]) locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()[ 'test_X' + str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset( locals()['dataset' + str(i)], locals()['scaled' + str(i)], look_back, n_columns, n_labels, split_ratio) trainX_list.append(locals()['train_X' + str(i)]) trainy_list.append(locals()['train_y' + str(i)]) testX_list.append(locals()['test_X' + str(i)]) testy_list.append(locals()['test_y' + str(i)]) model = build_model(trainX_list, task_num, con_layer1, con_layer1_filter, con_layer2, con_layer2_filter, lstm_layer, drop, r_drop, l2_value, shared_layer, dense_num, n_labels) import time start_time = time.time() # fit network history = model.fit( trainX_list, trainy_list, epochs=50, batch_size=60, validation_split=0.25, # validation_data=(testX_list, testy_list), verbose=2, shuffle=False, callbacks=[ keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=2, mode='min') ]) end_time = time.time() print('--- %s seconds ---' % (end_time - start_time)) y_pred1, y_pred2, y_pred3, y_pred4, y_pred5, y_pred6 = model.predict( testX_list) # y_pred1,y_pred2,y_pred3,y_pred4,y_pred5,y_pred6,y_pred7,y_pred8,y_pred9,y_pred10,y_pred11,y_pred12,y_pred13,y_pred14,y_pred15,y_pred16,y_pred17,y_pred18,y_pred19,y_pred20,y_pred21,y_pred22,y_pred23,y_pred24,y_pred25,y_pred26,y_pred27 \ # , y_pred28,y_pred29,y_pred30,y_pred31,y_pred32,y_pred33,y_pred34,y_pred35,y_pred36,y_pred37,y_pred38,y_pred39,y_pred40 = model.predict(testX_list) #################### attention plot - TIME STEP level ################################ attention_vectors = [] ##### Case Study, input_dimension ################ #### axis = 1 is ploting the attention on input_dim, axis = 2 is ploting the attention on TIME_STEP dimension for k in range(len(file_list)): for j in range(10): attention_vector = np.mean(helper_funcs.get_activationsT( k, model, testX_list[k][0:20, :, :], print_shape_only=True, layer_name='attention_vect' + str(k))[0], axis=1).squeeze() # print('attention =', attention_vector) # assert (np.sum(attention_vector) - 1.0) < 1e-5 attention_vectors.append(attention_vector) print('.....') print(len(attention_vector)) attention_vector_final = np.mean(np.array(attention_vectors), axis=0) print('attention final=', attention_vector_final) # print('attention final length=', len(attention_vector_final)) import seaborn as sns import matplotlib.pylab as plt # attention_vector_final = np.delete(attention_vector_final, np.s_[225:], axis=1) ax = sns.heatmap(attention_vector_final, cmap="BuPu") plt.savefig('time_dim_heatmap2' + str(k) + '.png', dpi=150) plt.show() # # plot part. # import matplotlib.pyplot as plt # import pandas as pd # # # df = pd.DataFrame(attention_vector_final[1], columns=['attention (%)']) # df.to_csv('../results/attention_plot/TimeAtt1/30_TIME_STEP_attention'+str(k)+'.csv') # df.plot(kind='bar',title='Attention Mechanism as ''a function of input'' dimensions.') # # plt.figure(figsize=(100, 100)) # plt.xticks(rotation=90) # plt.savefig('../results/attention_plot/TimeAtt1/30_TIME_STEP_attention'+str(k)+'.png',dpi=150) # plt.show() ################################# attention plot ends ################################################# helper_funcs.evaluation(locals()['test_X' + str(k)], locals()['test_y' + str(k)], locals()['y_pred' + str(k + 1)], look_back, n_columns, n_labels, locals()['scaler' + str(k)])
def main(): look_back = 20 # number of previous timestamp used for training n_columns = 276 # total columns n_labels = 51 # number of labels split_ratio = 0.8 # train & test data split ratio file_list = glob.glob('data_csv/train/*.csv') file = open('results/Single_MLP_40users3.txt', 'w') sum_bacc = 0 sum_TPR = 0 Num_tp = 0 Num_fn = 0 Num_fp = 0 Num_tn = 0 sum_precision = 0 sum_F1 = 0 train_time = 0 for i in range(len(file_list)): locals()['dataset' + str(i)] = file_list[i] locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[ 'scaler' + str(i)] = helper_funcs.load_dataset(locals()['dataset' + str(i)]) # split into train and test sets locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()[ 'test_X' + str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset( locals()['dataset' + str(i)], locals()['scaled' + str(i)], look_back, n_columns, n_labels, split_ratio) model = build_model(locals()['train_X' + str(i)]) import time start_time = time.time() # fit network history = model.fit( locals()['train_X' + str(i)], locals()['train_y' + str(i)], epochs=40, batch_size=60, # validation_data=(test_X, test_y), validation_split=0.25, verbose=2, shuffle=False, callbacks=[ keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, mode='min') ]) end_time = time.time() print('--- %s seconds ---' % (end_time - start_time)) y_predict = model.predict(locals()['test_X' + str(i)]) results = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler' + str(i)]) sum_bacc = sum_bacc + results[3] sum_TPR = sum_TPR + results[1] Num_tp = Num_tp + results[4] Num_fn = Num_fn + results[5] Num_fp = Num_fp + results[6] Num_tn = Num_tn + results[7] sum_precision = sum_precision + results[8] sum_F1 = sum_F1 + results[9] train_time = train_time + (end_time - start_time) file.write('Accuracy:' + ' ' + str(results[0]) + ' ') file.write('TPR:' + ' ' + str(results[1]) + ' ') file.write('TNR:' + ' ' + str(results[2]) + ' ') file.write('Bacc:' + ' ' + str(results[3]) + '\n') file.write('FP No.:' + ' ' + str(results[6]) + '\n') file.write('TN No.:' + ' ' + str(results[7]) + '\n') file.write('Precision:' + ' ' + str(results[8]) + '\n') file.write('F1:' + ' ' + str(results[9]) + '\n') file.write('avg_bacc: ' + str(sum_bacc / len(file_list)) + '\n') file.write('avg_TPR: ' + str(sum_TPR / len(file_list)) + '\n') file.write('avg_precision: ' + str(sum_precision / len(file_list)) + '\n') file.write('avg_F1: ' + str(sum_F1 / len(file_list)) + '\n') file.write('sum_Num_tp: ' + str(Num_tp) + '\n') file.write('sum_Num_fn: ' + str(Num_fn) + '\n') file.write('sum_Num_fp: ' + str(Num_fp) + '\n') file.write('sum_Num_tn: ' + str(Num_tn) + '\n') file.write('train_time: ' + str(train_time) + '\n')