def main():
    # network parameters
    task_num = 1
    lstm_layer = 64
    drop = 0.2
    r_drop = 0.2
    l2_value = 0.001
    shared_layer = 576
    dense_num = 64

    look_back = 20  # number of previous timestamp used for training
    n_columns = 15  # total columns
    n_labels = 6  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    trainX_list = []
    trainy_list = []
    testX_list = []
    testy_list = []
    file_list_train = glob.glob(
        '../preprocessed_data/train/attplot_train/*.csv')
    file_list_test = glob.glob('../preprocessed_data/test/attplot_test/*.csv')

    for i in range(len(file_list_train)):
        locals()['dataset' + str(i)] = file_list_train[i]
        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[
            'scaler' + str(i)] = helper_funcs.load_dataset(locals()['dataset' +
                                                                    str(i)])
        locals()['train_X' +
                 str(i)], locals()['train_y' +
                                   str(i)] = helper_funcs.split_dataset(
                                       locals()['scaled' + str(i)], look_back,
                                       n_columns, n_labels)

        trainX_list.append(locals()['train_X' + str(i)])
        trainy_list.append(locals()['train_y' + str(i)])

    for i in range(len(file_list_test)):
        locals()['dataset_test' + str(i)] = file_list_test[i]
        locals()['dataset_test' +
                 str(i)], locals()['scaled_test' + str(i)], locals()[
                     'scaler_test' + str(i)] = helper_funcs.load_dataset(
                         locals()['dataset_test' + str(i)])
        locals()['test_X' +
                 str(i)], locals()['test_y' +
                                   str(i)] = helper_funcs.split_dataset(
                                       locals()['scaled_test' + str(i)],
                                       look_back, n_columns, n_labels)
        testX_list.append(locals()['test_X' + str(i)])
        testy_list.append(locals()['test_y' + str(i)])

    model = build_model(trainX_list, task_num, lstm_layer, drop, r_drop,
                        l2_value, shared_layer, dense_num, n_labels)

    import time
    start_time = time.time()

    # fit network
    history = model.fit(
        trainX_list,
        trainy_list,
        epochs=60,
        batch_size=120,
        validation_split=0.25,
        # validation_data=(testX_list, testy_list),
        verbose=2,
        shuffle=False,
        callbacks=[
            keras.callbacks.EarlyStopping(monitor='val_loss',
                                          min_delta=0,
                                          patience=20,
                                          verbose=2,
                                          mode='min')
        ])
    end_time = time.time()
    print('--- %s seconds ---' % (end_time - start_time))

    ####################  attention plot - input dimension level ################################

    attention_vectors = []

    ##### Case Study, input_dimension ################
    #### axis = 1 is ploting the attention on input_dim, axis = 2 is ploting the attention on TIME_STEP dimension
    for j in range(10):
        attention_vector = np.mean(helper_funcs.get_activationsD(
            model,
            testX_list[0][0:240, :, :],
            print_shape_only=True,
            layer_name='attention_vec')[0],
                                   axis=0).squeeze()
        print('attention =', attention_vector)
        # assert (np.sum(attention_vector) - 1.0) < 1e-5
        attention_vectors.append(attention_vector)
        print('.....')
        print(len(attention_vector))

    attention_vector_final = np.mean(np.array(attention_vectors), axis=0)
    print('attention final=', attention_vector_final)
    print('attention final length=', len(attention_vector_final))

    import seaborn as sns
    import matplotlib.pylab as plt

    attention_vector_final = np.delete(attention_vector_final,
                                       np.s_[9:],
                                       axis=1)
    ax = sns.heatmap(attention_vector_final, cmap='Blues')
    plt.savefig('input_dim_fangshan(0-240).png', dpi=150)
    plt.show()

    # plot part.
    # import matplotlib.pyplot as plt
    # import pandas as pd
    #
    #
    # df = pd.DataFrame(attention_vector_final[153], columns=['attention (%)'])
    # df.to_csv('../results/attention_plot/8153_input_dim_attention2554--.csv')
    # df.plot(kind='bar',title='Attention Mechanism as ''a function of input'' dimensions.')
    # # plt.figure(figsize=(100, 100))
    # plt.xticks(rotation=90)
    # plt.savefig('../results/attention_plot-1test.png',dpi=150)
    # plt.show()

    ################################# attention plot ends #################################################

    y_pred1 = model.predict(testX_list)
    smape = helper_funcs.evaluation(testX_list[0], testy_list[0], y_pred1,
                                    look_back, n_columns, n_labels,
                                    locals()['scaler' + str(0)])

    print(smape)
def main():

    # network parameters
    task_num = 3
    lstm_layer = 64
    drop = 0.2
    r_drop = 0.2
    l2_value = 0.001
    shared_layer = 576
    dense_num = 64

    look_back = 30  # number of previous timestamp used for training
    n_columns = 12  # total columns
    n_labels = 2  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    trainX_list = []
    trainy_list = []
    testX_list = []
    testy_list = []
    file_list_train = glob.glob('../data/for_centralTimeAtt/*.csv')

    for i in range(len(file_list_train)):
        locals()['dataset' + str(i)] = file_list_train[i]
        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[
            'scaler' + str(i)] = helper_funcs.load_dataset(locals()['dataset' +
                                                                    str(i)])
        locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()[
            'test_X' +
            str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset(
                locals()['dataset' + str(i)],
                locals()['scaled' + str(i)], look_back, n_columns, n_labels,
                split_ratio)

        trainX_list.append(locals()['train_X' + str(i)])
        trainy_list.append(locals()['train_y' + str(i)])
        testX_list.append(locals()['test_X' + str(i)])
        testy_list.append(locals()['test_y' + str(i)])

    model = build_model(trainX_list, task_num, lstm_layer, drop, r_drop,
                        l2_value, shared_layer, dense_num, n_labels)

    import time
    start_time = time.time()

    # fit network
    history = model.fit(
        trainX_list,
        trainy_list,
        epochs=100,
        batch_size=100,
        validation_split=0.25,
        # validation_data=(testX_list, testy_list),
        verbose=2,
        shuffle=False,
        callbacks=[
            keras.callbacks.EarlyStopping(monitor='val_loss',
                                          min_delta=0,
                                          patience=20,
                                          verbose=2,
                                          mode='min')
        ])
    end_time = time.time()
    print('--- %s seconds ---' % (end_time - start_time))

    # make prediction

    y_pred1, y_pred2, y_pred3 = model.predict(testX_list)

    #===========================================================================================#
    # write parameters & results to file
    # file = open('results/Attention_results(12)_F1.txt', 'w')
    file = open('TimeStep_30.txt', 'w')

    file.write('task_num:' + str(task_num) + '\n')
    file.write('lstm_layer:' + str(lstm_layer) + '\n')
    file.write('drop:' + str(drop) + '\n')
    file.write('r_drop:' + str(r_drop) + '\n')
    file.write('l2_value:' + str(l2_value) + '\n')
    file.write('shared_layer:' + str(shared_layer) + '\n')
    file.write('dense_num:' + str(dense_num) + '\n')

    sum_Smape = 0
    sum_Smape_speed = 0
    sum_Smape_heartRate = 0
    sum_mae = 0
    sum_mae_speed = 0
    sum_mae_heartRate = 0
    # balance accuracy
    for i in range(len(file_list_train)):
        locals()['Smape' +
                 str(i)], locals()['mae' + str(i)] = helper_funcs.evaluation(
                     locals()['test_X' + str(i)],
                     locals()['test_y' + str(i)],
                     locals()['y_pred' + str(i + 1)], look_back, n_columns,
                     n_labels,
                     locals()['scaler' + str(i)])

        locals()['Smape_speed' +
                 str(i)], locals()['mae_speed' +
                                   str(i)] = helper_funcs.evaluation_single(
                                       locals()['test_X' + str(i)],
                                       locals()['test_y' + str(i)],
                                       locals()['y_pred' + str(i + 1)],
                                       look_back, n_columns, n_labels,
                                       locals()['scaler' + str(i)], 0)
        locals()['Smape_heartRate' +
                 str(i)], locals()['mae_heartRate' +
                                   str(i)] = helper_funcs.evaluation_single(
                                       locals()['test_X' + str(i)],
                                       locals()['test_y' + str(i)],
                                       locals()['y_pred' + str(i + 1)],
                                       look_back, n_columns, n_labels,
                                       locals()['scaler' + str(i)], 1)

        file.write('Current file index is: ' + str(i) + '\n')
        file.write('Smape:' + ' ' + str(locals()['Smape' + str(i)]) + '\n')
        file.write('Smape_speed:' + ' ' +
                   str(locals()['Smape_speed' + str(i)]) + '\n')
        file.write('Smape_heartRate:' + ' ' +
                   str(locals()['Smape_heartRate' + str(i)]) + '\n')
        file.write('mae:' + ' ' + str(locals()['mae' + str(i)]) + '\n')
        file.write('mae_speed:' + ' ' + str(locals()['mae_speed' + str(i)]) +
                   '\n')
        file.write('mae_heartRate:' + ' ' +
                   str(locals()['mae_heartRate' + str(i)]) + '\n')

        file.write('\n')

        sum_Smape = sum_Smape + locals()['Smape' + str(i)]
        sum_Smape_speed = sum_Smape_speed + locals()['Smape_speed' + str(i)]
        sum_Smape_heartRate = sum_Smape_heartRate + locals()['Smape_heartRate'
                                                             + str(i)]
        sum_mae = sum_mae + locals()['mae' + str(i)]
        sum_mae_speed = sum_mae_speed + locals()['mae_speed' + str(i)]
        sum_mae_heartRate = sum_mae_heartRate + locals()['mae_heartRate' +
                                                         str(i)]

    file.write('avg_Smape: ' + str(sum_Smape / len(file_list_train)) + '\n')
    file.write('avg_sum_Smape_speed: ' +
               str(sum_Smape_speed / len(file_list_train)) + '\n')
    file.write('avg_sum_Smape_heartRate: ' +
               str(sum_Smape_heartRate / len(file_list_train)) + '\n')
    file.write('avg_mae: ' + str(sum_mae / len(file_list_train)) + '\n')
    file.write('avg_sum_mae_speed: ' +
               str(sum_mae_speed / len(file_list_train)) + '\n')
    file.write('avg_sum_mae_heartRate: ' +
               str(sum_mae_heartRate / len(file_list_train)) + '\n')

    file.write('training time:' + str(end_time - start_time))
def main():
    look_back = 10  # number of previous timestamp used for training
    n_columns = 12  # total columns
    n_labels = 6  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    file_list_train = glob.glob('data/data_user/*.csv')

    file = open('results/Single_MLP_1.txt', 'w')
    sum_Smape = 0
    sum_Smape_speed = 0
    sum_Smape_heartRate = 0
    sum_mae = 0
    sum_mae_speed = 0
    sum_mae_heartRate = 0

    for i in range(len(file_list_train)):
        locals()['dataset' + str(i)] = file_list_train[i]
        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[
            'scaler' + str(i)] = helper_funcs.load_dataset(
            locals()['dataset' + str(i)])
        locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()['test_X' + str(i)], locals()[
            'test_y' + str(i)] = helper_funcs.split_dataset(locals()['dataset' + str(i)], locals()['scaled' + str(i)],
                                                            look_back,
                                                            n_columns, n_labels, split_ratio)

        model = build_model(locals()['train_X' + str(i)])

        import time
        start_time = time.time()

        # fit network
        history = model.fit(locals()['train_X' + str(i)], locals()['train_y' + str(i)], epochs=200, batch_size=100,
                            validation_data=(locals()['test_X' + str(i)], locals()['test_y' + str(i)]), verbose=2,
                            shuffle=False,
                            callbacks=[
                                keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=2,
                                                              mode='min')]
                            )

        end_time = time.time()
        print('--- %s seconds ---' % (end_time - start_time))

        # plot history
        # plt.plot(history.history['loss'], label='train')
        # plt.plot(history.history['val_loss'], label='test')
        # plt.legend()
        # plt.show()

        # make a prediction
        y_predict = model.predict(locals()['test_X' + str(i)])
        # results = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler' + str(i)])

        locals()['Smape' + str(i)], locals()['mae' + str(i)] = helper_funcs.evaluation(locals()['test_X' + str(i)],
                                                                                       locals()['test_y' + str(i)],
                                                                                       y_predict,
                                                                                       look_back, n_columns, n_labels,
                                                                                       locals()['scaler' + str(i)])

        locals()['Smape_speed' + str(i)], locals()['mae_speed' + str(i)] = helper_funcs.evaluation_single(
            locals()['test_X' + str(i)], locals()['test_y' + str(i)],
            y_predict,
            look_back, n_columns, n_labels,
            locals()['scaler' + str(i)], 0)
        locals()['Smape_heartRate' + str(i)], locals()['mae_heartRate' + str(i)] = helper_funcs.evaluation_single(
            locals()['test_X' + str(i)], locals()['test_y' + str(i)],
            y_predict,
            look_back, n_columns, n_labels,
            locals()['scaler' + str(i)], 1)

        file.write('Current file index is: ' + str(i) + '\n')
        file.write('Smape:' + ' ' + str(locals()['Smape' + str(i)]) + '\n')
        file.write('Smape_speed:' + ' ' + str(locals()['Smape_speed' + str(i)]) + '\n')
        file.write('Smape_heartRate:' + ' ' + str(locals()['Smape_heartRate' + str(i)]) + '\n')
        file.write('mae:' + ' ' + str(locals()['mae' + str(i)]) + '\n')
        file.write('mae_speed:' + ' ' + str(locals()['mae_speed' + str(i)]) + '\n')
        file.write('mae_heartRate:' + ' ' + str(locals()['mae_heartRate' + str(i)]) + '\n')

        file.write('\n')

        sum_Smape = sum_Smape + locals()['Smape' + str(i)]
        sum_Smape_speed = sum_Smape_speed + locals()['Smape_speed' + str(i)]
        sum_Smape_heartRate = sum_Smape_heartRate + locals()['Smape_heartRate' + str(i)]
        sum_mae = sum_mae + locals()['mae' + str(i)]
        sum_mae_speed = sum_mae_speed + locals()['mae_speed' + str(i)]
        sum_mae_heartRate = sum_mae_heartRate + locals()['mae_heartRate' + str(i)]

    file.write('avg_Smape: ' + str(sum_Smape / len(file_list_train)) + '\n')
    file.write('avg_sum_Smape_speed: ' + str(sum_Smape_speed / len(file_list_train)) + '\n')
    file.write('avg_sum_Smape_heartRate: ' + str(sum_Smape_heartRate / len(file_list_train)) + '\n')
    file.write('avg_mae: ' + str(sum_mae / len(file_list_train)) + '\n')
    file.write('avg_sum_mae_speed: ' + str(sum_mae_speed / len(file_list_train)) + '\n')
    file.write('avg_sum_mae_heartRate: ' + str(sum_mae_heartRate / len(file_list_train)) + '\n')

    file.write('training time:' + str(end_time - start_time))
Exemple #4
0
def main():

    # network parameters
    task_num = 9
    lstm_layer = 64
    drop = 0.2
    r_drop = 0.2
    l2_value = 0.001
    shared_layer = 576
    dense_num = 64

    look_back = 20  # number of previous timestamp used for training
    n_columns = 15  # total columns
    n_labels = 6  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    trainX_list = []
    trainy_list = []
    testX_list = []
    testy_list = []
    file_list_train = glob.glob('preprocessed_data/train/*.csv')
    file_list_test = glob.glob('preprocessed_data/test/*.csv')

    for i in range(len(file_list_train)):
        locals()['dataset' + str(i)] = file_list_train[i]
        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()['scaler' + str(i)] = helper_funcs.load_dataset(
            locals()['dataset' + str(i)])
        locals()['train_X' + str(i)], locals()['train_y' + str(i)] = helper_funcs.split_dataset(locals()['scaled' + str(i)],
                                                                                   look_back, n_columns, n_labels)

        trainX_list.append(locals()['train_X' + str(i)])
        trainy_list.append(locals()['train_y' + str(i)])

    for i in range(len(file_list_test)):
        locals()['dataset_test' + str(i)] = file_list_test[i]
        locals()['dataset_test' + str(i)], locals()['scaled_test' + str(i)], locals()['scaler_test' + str(i)] = helper_funcs.load_dataset(locals()['dataset_test' + str(i)])
        locals()['test_X' + str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset(locals()['scaled_test' + str(i)],
                                                                                 look_back,
                                                                                 n_columns, n_labels)
        testX_list.append(locals()['test_X' + str(i)])
        testy_list.append(locals()['test_y' + str(i)])

    model = build_model(trainX_list,task_num,lstm_layer, drop, r_drop, l2_value, shared_layer, dense_num, n_labels)


    import time
    start_time = time.time()

    # fit network
    history = model.fit(trainX_list, trainy_list,
                        epochs=300,
                        batch_size=120,
                        validation_split = 0.25,
                        # validation_data=(testX_list, testy_list),
                        verbose=2,
                        shuffle=False,
                        callbacks=[
                            keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=2,
                                                          mode='min')]
                        )
    end_time = time.time()
    print('--- %s seconds ---' % (end_time - start_time))



    # make prediction
    pred_time = time.time()

    y_pred1, y_pred2, y_pred3, y_pred4, y_pred5, y_pred6, y_pred7, y_pred8, y_pred9 = model.predict(testX_list)
    
    end_pred_time = time.time()

   #===========================================================================================#
    # write parameters & results to file
    # file = open('results/Attention_results(12)_F1.txt', 'w')
    file = open('time_cost/FATHOMb1.txt', 'w')

    file.write('task_num:' + str(task_num) + '\n')
    file.write('lstm_layer:' + str(lstm_layer) + '\n')
    file.write('drop:' + str(drop) + '\n')
    file.write('r_drop:' + str(r_drop) + '\n')
    file.write('l2_value:' + str(l2_value) + '\n')
    file.write('shared_layer:' + str(shared_layer) + '\n')
    file.write('dense_num:' + str(dense_num) + '\n')

    sum_Smape = 0
    sum_Smape_PM25 = 0
    sum_Smape_PM10 = 0
    sum_Smape_NO2 = 0
    sum_Smape_CO = 0
    sum_Smape_O3 = 0
    sum_Smape_SO2 = 0
    sum_mae = 0
    sum_mae_PM25 = 0
    sum_mae_PM10 = 0
    sum_mae_NO2 = 0
    sum_mae_CO = 0
    sum_mae_O3 = 0
    sum_mae_SO2 = 0

    for i in range(len(file_list_test)):
        locals()['Smape' + str(i)], locals()['mae' + str(i)] = helper_funcs.evaluation(locals()['test_X' + str(i)],
                                                                                       locals()['test_y' + str(i)],
                                                                                       locals()['y_pred' + str(i + 1)],
                                                                                       look_back, n_columns, n_labels,
                                                                                       locals()['scaler_test' + str(i)])

        locals()['Smape_PM25' + str(i)], locals()['mae_PM25' + str(i)] = helper_funcs.evaluation_single(
            locals()['test_X' + str(i)], locals()['test_y' + str(i)],
            locals()['y_pred' + str(i + 1)],
            look_back, n_columns, n_labels,
            locals()['scaler_test' + str(i)], 0)
        locals()['Smape_PM10' + str(i)], locals()['mae_PM10' + str(i)] = helper_funcs.evaluation_single(
            locals()['test_X' + str(i)], locals()['test_y' + str(i)],
            locals()['y_pred' + str(i + 1)],
            look_back, n_columns, n_labels,
            locals()['scaler_test' + str(i)], 1)
        locals()['Smape_NO2' + str(i)], locals()['mae_NO2' + str(i)] = helper_funcs.evaluation_single(
            locals()['test_X' + str(i)], locals()['test_y' + str(i)],
            locals()['y_pred' + str(i + 1)],
            look_back, n_columns, n_labels,
            locals()['scaler_test' + str(i)], 2)
        locals()['Smape_CO' + str(i)], locals()['mae_CO' + str(i)] = helper_funcs.evaluation_single(
            locals()['test_X' + str(i)], locals()['test_y' + str(i)],
            locals()['y_pred' + str(i + 1)],
            look_back, n_columns, n_labels,
            locals()['scaler_test' + str(i)], 3)
        locals()['Smape_O3' + str(i)], locals()['mae_O3' + str(i)] = helper_funcs.evaluation_single(
            locals()['test_X' + str(i)], locals()['test_y' + str(i)],
            locals()['y_pred' + str(i + 1)],
            look_back, n_columns, n_labels,
            locals()['scaler_test' + str(i)], 4)
        locals()['Smape_SO2' + str(i)], locals()['mae_SO2' + str(i)] = helper_funcs.evaluation_single(
            locals()['test_X' + str(i)], locals()['test_y' + str(i)],
            locals()['y_pred' + str(i + 1)],
            look_back, n_columns, n_labels,
            locals()['scaler_test' + str(i)], 5)

        file.write('Current file index is: ' + str(i) + '\n')
        file.write('Smape:' + ' ' + str(locals()['Smape' + str(i)]) + '\n')
        file.write('Smape_PM25:' + ' ' + str(locals()['Smape_PM25' + str(i)]) + '\n')
        file.write('Smape_PM10:' + ' ' + str(locals()['Smape_PM10' + str(i)]) + '\n')
        file.write('Smape_NO2:' + ' ' + str(locals()['Smape_NO2' + str(i)]) + '\n')
        file.write('Smape_CO:' + ' ' + str(locals()['Smape_CO' + str(i)]) + '\n')
        file.write('Smape_O3:' + ' ' + str(locals()['Smape_O3' + str(i)]) + '\n')
        file.write('Smape_SO2:' + ' ' + str(locals()['Smape_SO2' + str(i)]) + '\n')
        file.write('mae:' + ' ' + str(locals()['mae' + str(i)]) + '\n')
        file.write('mae_PM25:' + ' ' + str(locals()['mae_PM25' + str(i)]) + '\n')
        file.write('mae_PM10:' + ' ' + str(locals()['mae_PM10' + str(i)]) + '\n')
        file.write('mae_NO2:' + ' ' + str(locals()['mae_NO2' + str(i)]) + '\n')
        file.write('mae_CO:' + ' ' + str(locals()['mae_CO' + str(i)]) + '\n')
        file.write('mae_O3:' + ' ' + str(locals()['mae_O3' + str(i)]) + '\n')
        file.write('mae_SO2:' + ' ' + str(locals()['mae_SO2' + str(i)]) + '\n')
        file.write('\n')

        sum_Smape = sum_Smape + locals()['Smape' + str(i)]
        sum_Smape_PM25 = sum_Smape_PM25 + locals()['Smape_PM25' + str(i)]
        sum_Smape_PM10 = sum_Smape_PM10 + locals()['Smape_PM10' + str(i)]
        sum_Smape_NO2 = sum_Smape_NO2 + locals()['Smape_NO2' + str(i)]
        sum_Smape_CO = sum_Smape_CO + locals()['Smape_CO' + str(i)]
        sum_Smape_O3 = sum_Smape_O3 + locals()['Smape_O3' + str(i)]
        sum_Smape_SO2 = sum_Smape_SO2 + locals()['Smape_SO2' + str(i)]
        sum_mae = sum_mae + locals()['mae' + str(i)]
        sum_mae_PM25 = sum_mae_PM25 + locals()['mae_PM25' + str(i)]
        sum_mae_PM10 = sum_mae_PM10 + locals()['mae_PM10' + str(i)]
        sum_mae_NO2 = sum_mae_NO2 + locals()['mae_NO2' + str(i)]
        sum_mae_CO = sum_mae_CO + locals()['mae_CO' + str(i)]
        sum_mae_O3 = sum_mae_O3 + locals()['mae_O3' + str(i)]
        sum_mae_SO2 = sum_mae_SO2 + locals()['mae_SO2' + str(i)]

    file.write('avg_Smape: ' + str(sum_Smape / len(file_list_test)) + '\n')
    file.write('avg_Smape_PM25: ' + str(sum_Smape_PM25 / len(file_list_test)) + '\n')
    file.write('avg_Smape_PM10: ' + str(sum_Smape_PM10 / len(file_list_test)) + '\n')
    file.write('avg_Smape_NO2: ' + str(sum_Smape_NO2 / len(file_list_test)) + '\n')
    file.write('avg_Smape_CO: ' + str(sum_Smape_CO / len(file_list_test)) + '\n')
    file.write('avg_Smape_O3: ' + str(sum_Smape_O3 / len(file_list_test)) + '\n')
    file.write('avg_Smape_SO2: ' + str(sum_Smape_SO2 / len(file_list_test)) + '\n')
    file.write('avg_mae: ' + str(sum_mae / len(file_list_test)) + '\n')
    file.write('avg_mae_PM25: ' + str(sum_mae_PM25 / len(file_list_test)) + '\n')
    file.write('avg_mae_PM10: ' + str(sum_mae_PM10 / len(file_list_test)) + '\n')
    file.write('avg_mae_NO2: ' + str(sum_mae_NO2 / len(file_list_test)) + '\n')
    file.write('avg_mae_CO: ' + str(sum_mae_CO / len(file_list_test)) + '\n')
    file.write('avg_mae_O3: ' + str(sum_mae_O3 / len(file_list_test)) + '\n')
    file.write('avg_mae_SO2: ' + str(sum_mae_SO2 / len(file_list_test)) + '\n')
    file.write('training time:' + str(end_time - start_time))
    file.write('prediction time:' + str(end_pred_time - pred_time))
def main():

    # network parameters
    task_num = 40
    con_layer1 = 128  # for 6 users
    # con_layer1 = 256
    con_layer1_filter = 5
    con_layer2 = 64
    con_layer2_filter = 4
    lstm_layer = 64
    drop = 0.25
    r_drop = 0.25
    l2_value = 0.001
    shared_layer = 576
    dense_num = 64

    look_back = 20  # number of previous timestamp used for training
    n_columns = 276  # total columns
    n_labels = 51  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    trainX_list = []
    trainy_list = []
    testX_list = []
    testy_list = []
    file_list = glob.glob('data_csv/train/*.csv')

    for i in range(len(file_list)):
        locals()['dataset' + str(i)] = file_list[i]
        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[
            'scaler' + str(i)] = helper_funcs.load_dataset(locals()['dataset' +
                                                                    str(i)])
        locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()[
            'test_X' +
            str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset(
                locals()['dataset' + str(i)],
                locals()['scaled' + str(i)], look_back, n_columns, n_labels,
                split_ratio)
        trainX_list.append(locals()['train_X' + str(i)])
        trainy_list.append(locals()['train_y' + str(i)])
        testX_list.append(locals()['test_X' + str(i)])
        testy_list.append(locals()['test_y' + str(i)])

    model = build_model(trainX_list, task_num, con_layer1, con_layer1_filter,
                        con_layer2, con_layer2_filter, lstm_layer, drop,
                        r_drop, l2_value, shared_layer, dense_num, n_labels)

    import time
    start_time = time.time()

    # fit network
    history = model.fit(
        trainX_list,
        trainy_list,
        epochs=100,
        batch_size=60,
        validation_split=0.25,
        # validation_data=(testX_list, testy_list),
        verbose=2,
        shuffle=False,
        callbacks=[
            keras.callbacks.EarlyStopping(monitor='val_loss',
                                          min_delta=0,
                                          patience=20,
                                          verbose=2,
                                          mode='min')
        ])
    end_time = time.time()
    print('--- %s seconds ---' % (end_time - start_time))

    # make prediction
    pred_time = time.time()

    y_pred1,y_pred2,y_pred3,y_pred4,y_pred5,y_pred6,y_pred7,y_pred8,y_pred9,y_pred10,y_pred11,y_pred12,y_pred13,y_pred14,y_pred15,y_pred16,y_pred17,y_pred18,y_pred19,y_pred20,y_pred21,y_pred22,y_pred23,y_pred24,y_pred25,y_pred26,y_pred27 \
        , y_pred28,y_pred29,y_pred30,y_pred31,y_pred32,y_pred33,y_pred34,y_pred35,y_pred36,y_pred37,y_pred38,y_pred39,y_pred40 = model.predict(testX_list)
    # print (len(y_pred1))
    # y_pred1, y_pred2, y_pred3, y_pred4, y_pred5, y_pred6 = model.predict(testX_list)
    # y_pred1, y_pred2, y_pred3, y_pred4, y_pred5, y_pred6,y_pred7,y_pred8,y_pred9,y_pred10,y_pred11,y_pred12 = model.predict(testX_list)
    pred_end_time = time.time()

    #===========================================================================================#
    # write parameters & results to file
    # file = open('results/Attention_results(12)_F1.txt', 'w')
    file = open('time_cost/FATHOMb2_40users.txt', 'w')

    file.write('task_num:' + str(task_num) + '\n')
    file.write('con_layer1:' + str(con_layer1) + '\n')
    file.write('con_layer1_filter:' + str(con_layer1_filter) + '\n')
    file.write('con_layer2:' + str(con_layer2) + '\n')
    file.write('con_layer2_filter:' + str(con_layer2_filter) + '\n')
    file.write('lstm_layer:' + str(lstm_layer) + '\n')
    file.write('drop:' + str(drop) + '\n')
    file.write('r_drop:' + str(r_drop) + '\n')
    file.write('l2_value:' + str(l2_value) + '\n')
    file.write('shared_layer:' + str(shared_layer) + '\n')
    file.write('dense_num:' + str(dense_num) + '\n')

    sum_bacc = 0
    sum_TPR = 0
    Num_tp = 0
    Num_fn = 0
    Num_fp = 0
    Num_tn = 0
    sum_precision = 0
    sum_F1 = 0

    # balance accuracy
    for i in range(len(file_list)):
        locals()['Bacc' + str(i)] = helper_funcs.evaluation(
            locals()['test_X' + str(i)],
            locals()['test_y' + str(i)],
            locals()['y_pred' + str(i + 1)], look_back, n_columns, n_labels,
            locals()['scaler' + str(i)])
        sum_bacc = sum_bacc + (locals()['Bacc' + str(i)])[3]
        sum_TPR = sum_TPR + (locals()['Bacc' + str(i)])[1]
        Num_tp = Num_tp + (locals()['Bacc' + str(i)])[4]
        Num_fn = Num_fn + (locals()['Bacc' + str(i)])[5]
        Num_fp = Num_fp + (locals()['Bacc' + str(i)])[6]
        Num_tn = Num_tn + (locals()['Bacc' + str(i)])[7]
        sum_precision = sum_precision + (locals()['Bacc' + str(i)])[8]
        sum_F1 = sum_F1 + (locals()['Bacc' + str(i)])[9]

        file.write('Accuracy:' + ' ' + str((locals()['Bacc' + str(i)])[0]) +
                   ' ')
        file.write('TPR:' + ' ' + str((locals()['Bacc' + str(i)])[1]) + ' ')
        file.write('TNR:' + ' ' + str((locals()['Bacc' + str(i)])[2]) + ' ')
        file.write('Bacc:' + ' ' + str((locals()['Bacc' + str(i)])[3]) + '\n')
        file.write('TP No.:' + ' ' + str((locals()['Bacc' + str(i)])[4]) +
                   '\n')
        file.write('FN No.:' + ' ' + str((locals()['Bacc' + str(i)])[5]) +
                   '\n')
        file.write('FP No.:' + ' ' + str((locals()['Bacc' + str(i)])[6]) +
                   '\n')
        file.write('TN No.:' + ' ' + str((locals()['Bacc' + str(i)])[7]) +
                   '\n')
        file.write('Precision:' + ' ' + str((locals()['Bacc' + str(i)])[8]) +
                   '\n')
        file.write('F1:' + ' ' + str((locals()['Bacc' + str(i)])[9]) + '\n')

    file.write('avg_bacc: ' + str(sum_bacc / len(file_list)) + '\n')
    file.write('avg_TPR: ' + str(sum_TPR / len(file_list)) + '\n')
    file.write('avg_precision: ' + str(sum_precision / len(file_list)) + '\n')
    file.write('avg_F1: ' + str(sum_F1 / len(file_list)) + '\n')
    file.write('sum_Num_tp: ' + str(Num_tp) + '\n')
    file.write('sum_Num_fn: ' + str(Num_fn) + '\n')
    file.write('sum_Num_fp: ' + str(Num_fp) + '\n')
    file.write('sum_Num_tn: ' + str(Num_tn) + '\n')
    file.write('training time:' + str(end_time - start_time))
    file.write('prediction time:' + str(pred_end_time - pred_time))
def main():

    # network parameters
    task_num = 6
    con_layer1 = 128  # for 6 users
    # con_layer1 = 256
    con_layer1_filter = 5
    con_layer2 = 64
    con_layer2_filter = 4
    lstm_layer = 64
    drop = 0.2
    r_drop = 0.2
    l2_value = 0.001
    shared_layer = 576
    dense_num = 64

    look_back = 30  # number of previous timestamp used for training
    n_columns = 276  # total columns
    n_labels = 51  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    trainX_list = []
    trainy_list = []
    testX_list = []
    testy_list = []
    file_list = glob.glob('../data_csv/train/6users/*.csv')

    for i in range(len(file_list)):
        locals()['dataset' + str(i)] = file_list[i]
        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[
            'scaler' + str(i)] = helper_funcs.load_dataset(locals()['dataset' +
                                                                    str(i)])
        locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()[
            'test_X' +
            str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset(
                locals()['dataset' + str(i)],
                locals()['scaled' + str(i)], look_back, n_columns, n_labels,
                split_ratio)
        trainX_list.append(locals()['train_X' + str(i)])
        trainy_list.append(locals()['train_y' + str(i)])
        testX_list.append(locals()['test_X' + str(i)])
        testy_list.append(locals()['test_y' + str(i)])

    model = build_model(trainX_list, task_num, con_layer1, con_layer1_filter,
                        con_layer2, con_layer2_filter, lstm_layer, drop,
                        r_drop, l2_value, shared_layer, dense_num, n_labels)

    import time
    start_time = time.time()

    # fit network
    history = model.fit(
        trainX_list,
        trainy_list,
        epochs=100,
        batch_size=60,
        validation_split=0.25,
        # validation_data=(testX_list, testy_list),
        verbose=2,
        shuffle=False,
        callbacks=[
            keras.callbacks.EarlyStopping(monitor='val_loss',
                                          min_delta=0,
                                          patience=10,
                                          verbose=2,
                                          mode='min')
        ])
    end_time = time.time()
    print('--- %s seconds ---' % (end_time - start_time))

    # ####################  attention plot - input dimension ################################
    #
    # attention_vectorsd = []
    # layer_name_list = ['attention_vecd0','attention_vecd1','attention_vecd2','attention_vecd3','attention_vecd4','attention_vecd5']
    # for i in range(len(file_list)):
    #     for j in range(10):
    #         activations = helper_funcs.get_activations(model, locals()['test_X' + str(i)], \
    #                                                    print_shape_only=True, layer_name=layer_name_list[i])
    #         attention_vec = np.mean(activations[0], axis=0).squeeze()
    #         print('attention =', attention_vec)
    #         # assert (np.sum(attention_vec) - 1.0) < 1e-5
    #         attention_vectorsd.append(attention_vec)
    #
    #     attention_vector_final = np.mean(np.array(attention_vectorsd), axis=0)
    #     print(len(attention_vector_final[0]))
    #
    #     # plot part.
    #     import matplotlib.pyplot as plt
    #     import pandas as pd
    #
    #     df = pd.DataFrame(attention_vector_final[26], columns=['attention (%)']) ## attention_vector_final[i], i is the index of test datapoint
    #     df.to_csv('results/attention_plot/input_dim_attention'+str(i)+'.csv')
    #     df.plot(kind='bar',title='Attention Mechanism as ''a function of input'' dimensions.')
    #     # plt.figure(figsize=(100, 100))
    #     plt.xticks(rotation=90)
    #     # plt.savefig('results/attention_plot/input_dim_attention_plot'+ str(i)+'.png', dpi=150)
    #     plt.show()
    #
    # ###################################### attention plot-input dimension ends #########################################

    # ####################  attention plot - TIME STEP ################################
    #
    # attention_vectorst = []
    # for i in range(len(file_list)):
    #     for j in range(10):
    #         activations = helper_funcs.get_activations(model, locals()['test_X' + str(i)], \
    #                                                    print_shape_only=True, layer_name='attention_vect'+str(i))
    #         attention_vec = np.mean(activations[0], axis=2).squeeze()
    #         print('attention_vec shape:', attention_vec.shape)
    #         print('attention =', attention_vec)
    #         # assert (np.sum(attention_vec) - 1.0) < 1e-5
    #         attention_vectorst.append(attention_vec)
    #
    #     attention_vector_final = np.mean(np.array(attention_vectorst), axis=0)
    #     print('attention_vector_final shape:', attention_vector_final.shape)
    #
    #     # plot part.
    #     import matplotlib.pyplot as plt
    #     import pandas as pd
    #
    #     df = pd.DataFrame(attention_vector_final[0], columns=['attention (%)'])
    #     df.to_csv('results/attention_plot/TIME_STEP_attention' + str(i) + '.csv')
    #     df.plot(kind='bar', title='Attention Mechanism as ''a function of input'' dimensions.')
    #     # plt.figure(figsize=(100, 100))
    #     plt.xticks(rotation=90)
    #     plt.savefig('results/attention_plot/TIME_STEP_attention_plot' + str(i) + '.png', dpi=150)
    #     plt.show()
    #
    # ###################################### attention plot-TIME STEP ends #########################################

    # make prediction

    # y_pred1,y_pred2,y_pred3,y_pred4,y_pred5,y_pred6,y_pred7,y_pred8,y_pred9,y_pred10,y_pred11,y_pred12,y_pred13,y_pred14,y_pred15,y_pred16,y_pred17,y_pred18,y_pred19,y_pred20,y_pred21,y_pred22,y_pred23,y_pred24,y_pred25,y_pred26,y_pred27 \
    #     , y_pred28,y_pred29,y_pred30,y_pred31,y_pred32,y_pred33,y_pred34,y_pred35,y_pred36,y_pred37,y_pred38,y_pred39,y_pred40 = model.predict(testX_list)
    # print (len(y_pred1))
    y_pred1, y_pred2, y_pred3, y_pred4, y_pred5, y_pred6 = model.predict(
        testX_list)
    # y_pred1, y_pred2, y_pred3, y_pred4, y_pred5, y_pred6,y_pred7,y_pred8,y_pred9,y_pred10,y_pred11,y_pred12 = model.predict(testX_list)

    #===========================================================================================#
    # write parameters & results to file
    # file = open('results/Attention_results(12)_F1.txt', 'w')
    file = open('TimeStep_30.txt', 'w')

    file.write('task_num:' + str(task_num) + '\n')
    file.write('con_layer1:' + str(con_layer1) + '\n')
    file.write('con_layer1_filter:' + str(con_layer1_filter) + '\n')
    file.write('con_layer2:' + str(con_layer2) + '\n')
    file.write('con_layer2_filter:' + str(con_layer2_filter) + '\n')
    file.write('lstm_layer:' + str(lstm_layer) + '\n')
    file.write('drop:' + str(drop) + '\n')
    file.write('r_drop:' + str(r_drop) + '\n')
    file.write('l2_value:' + str(l2_value) + '\n')
    file.write('shared_layer:' + str(shared_layer) + '\n')
    file.write('dense_num:' + str(dense_num) + '\n')

    sum_bacc = 0
    sum_TPR = 0
    Num_tp = 0
    Num_fn = 0
    Num_fp = 0
    Num_tn = 0
    sum_precision = 0
    sum_F1 = 0

    # balance accuracy
    for i in range(len(file_list)):
        locals()['Bacc' + str(i)] = helper_funcs.evaluation(
            locals()['test_X' + str(i)],
            locals()['test_y' + str(i)],
            locals()['y_pred' + str(i + 1)], look_back, n_columns, n_labels,
            locals()['scaler' + str(i)])
        sum_bacc = sum_bacc + (locals()['Bacc' + str(i)])[3]
        sum_TPR = sum_TPR + (locals()['Bacc' + str(i)])[1]
        Num_tp = Num_tp + (locals()['Bacc' + str(i)])[4]
        Num_fn = Num_fn + (locals()['Bacc' + str(i)])[5]
        Num_fp = Num_fp + (locals()['Bacc' + str(i)])[6]
        Num_tn = Num_tn + (locals()['Bacc' + str(i)])[7]
        sum_precision = sum_precision + (locals()['Bacc' + str(i)])[8]
        sum_F1 = sum_F1 + (locals()['Bacc' + str(i)])[9]

        file.write('Accuracy:' + ' ' + str((locals()['Bacc' + str(i)])[0]) +
                   ' ')
        file.write('TPR:' + ' ' + str((locals()['Bacc' + str(i)])[1]) + ' ')
        file.write('TNR:' + ' ' + str((locals()['Bacc' + str(i)])[2]) + ' ')
        file.write('Bacc:' + ' ' + str((locals()['Bacc' + str(i)])[3]) + '\n')
        file.write('TP No.:' + ' ' + str((locals()['Bacc' + str(i)])[4]) +
                   '\n')
        file.write('FN No.:' + ' ' + str((locals()['Bacc' + str(i)])[5]) +
                   '\n')
        file.write('FP No.:' + ' ' + str((locals()['Bacc' + str(i)])[6]) +
                   '\n')
        file.write('TN No.:' + ' ' + str((locals()['Bacc' + str(i)])[7]) +
                   '\n')
        file.write('Precision:' + ' ' + str((locals()['Bacc' + str(i)])[8]) +
                   '\n')
        file.write('F1:' + ' ' + str((locals()['Bacc' + str(i)])[9]) + '\n')

    file.write('avg_bacc: ' + str(sum_bacc / len(file_list)) + '\n')
    file.write('avg_TPR: ' + str(sum_TPR / len(file_list)) + '\n')
    file.write('avg_precision: ' + str(sum_precision / len(file_list)) + '\n')
    file.write('avg_F1: ' + str(sum_F1 / len(file_list)) + '\n')
    file.write('sum_Num_tp: ' + str(Num_tp) + '\n')
    file.write('sum_Num_fn: ' + str(Num_fn) + '\n')
    file.write('sum_Num_fp: ' + str(Num_fp) + '\n')
    file.write('sum_Num_tn: ' + str(Num_tn) + '\n')
    file.write('training time:' + str(end_time - start_time))
def main():

    # network parameters
    task_num = 3
    con_layer1 = 128 # for 6 users
    # con_layer1 = 256
    con_layer1_filter = 1
    con_layer2 = 64
    con_layer2_filter = 4
    lstm_layer = 64
    drop = 0.2
    r_drop = 0.2
    l2_value = 0.001
    shared_layer = 576
    dense_num = 64

    look_back = 10  # number of previous timestamp used for training
    n_columns = 12  # total columns
    n_labels = 2  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    trainX_list = []
    trainy_list = []
    testX_list = []
    testy_list = []
    file_list_train = glob.glob('../data/for_centralTimeAtt/*.csv')

    for i in range(len(file_list_train)):
        locals()['dataset' + str(i)] = file_list_train[i]
        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[
            'scaler' + str(i)] = helper_funcs.load_dataset(
            locals()['dataset' + str(i)])
        locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()['test_X' + str(i)], locals()[
            'test_y' + str(i)] = helper_funcs.split_dataset(locals()['dataset' + str(i)], locals()['scaled' + str(i)],
                                                            look_back,
                                                            n_columns, n_labels, split_ratio)

        trainX_list.append(locals()['train_X' + str(i)])
        trainy_list.append(locals()['train_y' + str(i)])
        testX_list.append(locals()['test_X' + str(i)])
        testy_list.append(locals()['test_y' + str(i)])

    model = build_model(trainX_list,task_num,lstm_layer, drop, r_drop, l2_value, shared_layer, dense_num, n_labels)



    import time
    start_time = time.time()

    # fit network
    history = model.fit(trainX_list, trainy_list,
                        epochs=1,
                        batch_size=120,
                        validation_split = 0.25,
                        # validation_data=(testX_list, testy_list),
                        verbose=2,
                        shuffle=False,
                        callbacks=[
                            keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=2,
                                                          mode='min')]
                        )
    end_time = time.time()
    print('--- %s seconds ---' % (end_time - start_time))

    y_pred1, y_pred2, y_pred3 = model.predict(testX_list)

    ####################  attention plot - TIME STEP level ################################

    attention_vectors = []

    ##### Case Study, input_dimension ################
    #### axis = 1 is ploting the attention on input_dim, axis = 2 is ploting the attention on TIME_STEP dimension
    for k in range(len(file_list_train)):
        for j in range(10):
            attention_vector = np.mean(helper_funcs.get_activationsT(k,model,testX_list[k],
                                                       print_shape_only=True,
                                                       layer_name='attention_vect'+str(k))[0], axis=2).squeeze()
            # print('attention =', attention_vector)
            # assert (np.sum(attention_vector) - 1.0) < 1e-5
            attention_vectors.append(attention_vector)
            print('.....')
            print(len(attention_vector))

        attention_vector_final = np.mean(np.array(attention_vectors), axis=0)
        # print('attention final=', attention_vector_final[0])
        # print('attention final length=', len(attention_vector_final))

        # plot part.
        import matplotlib.pyplot as plt
        import pandas as pd


        df = pd.DataFrame(attention_vector_final[1], columns=['attention (%)'])
        df.to_csv('TIME_STEP_attention'+str(k)+'.csv')
        df.plot(kind='bar',title='Attention Mechanism as ''a function of input'' dimensions.')
        # plt.figure(figsize=(100, 100))
        plt.xticks(rotation=90)
        plt.savefig('TIME_STEP_attention'+str(k)+'.png',dpi=150)
        plt.show()

    ################################# attention plot ends #################################################



        helper_funcs.evaluation(locals()['test_X' + str(k)], locals()['test_y' + str(k)], locals()['y_pred' + str(k + 1)],
                                look_back, n_columns, n_labels, locals()['scaler' + str(k)])
Exemple #8
0
def main():
    look_back = 20  # number of previous timestamp used for training
    n_columns = 15  # total columns
    n_labels = 6  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    file_list_train = glob.glob('preprocessed_data/train/*.csv')
    file_list_test = glob.glob('preprocessed_data/test/*.csv')

    file = open('results/Single_MLP_2.txt', 'w')
    sum_Smape = 0
    sum_Smape_PM25 = 0
    sum_Smape_PM10 = 0
    sum_Smape_NO2 = 0
    sum_Smape_CO = 0
    sum_Smape_O3 = 0
    sum_Smape_SO2 = 0

    for i in range(len(file_list_train)):
        locals()['dataset_train' + str(i)], locals()['scaled_train' + str(i)], locals()[
            'scaler_train' + str(i)] = helper_funcs.load_dataset(file_list_train[i])
        locals()['dataset_test' + str(i)], locals()['scaled_test' + str(i)], locals()[
            'scaler_test' + str(i)] = helper_funcs.load_dataset(file_list_test[i])

        # split into train and test sets
        locals()['train_X' + str(i)], locals()['train_y' + str(i)] = helper_funcs.split_dataset(
            locals()['scaled_train' + str(i)], look_back, n_columns, n_labels)
        locals()['test_X' + str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset(
            locals()['scaled_test' + str(i)], look_back, n_columns, n_labels)

        model = build_model(locals()['train_X' + str(i)])

        import time
        start_time = time.time()

        # fit network
        history = model.fit(locals()['train_X' + str(i)], locals()['train_y' + str(i)], epochs=40, batch_size=120,
                            validation_data=(locals()['test_X' + str(i)], locals()['test_y' + str(i)]), verbose=2,
                            shuffle=False,
                            callbacks=[
                                keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=2,
                                                              mode='min')]
                            )

        end_time = time.time()
        print('--- %s seconds ---' % (end_time - start_time))

        # plot history
        # plt.plot(history.history['loss'], label='train')
        # plt.plot(history.history['val_loss'], label='test')
        # plt.legend()
        # plt.show()

        # make a prediction
        y_predict = model.predict(locals()['test_X' + str(i)])
        # results = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict,
        #                                   look_back, n_columns, n_labels, locals()['scaler' + str(i)])

        locals()['Smape' + str(i)] = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)],
                                                             y_predict,
                                                             look_back, n_columns, n_labels,
                                                             locals()['scaler_test' + str(i)])

        locals()['Smape_PM25' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                         locals()['test_y' + str(i)],
                                                                         y_predict,
                                                                         look_back, n_columns, n_labels,
                                                                         locals()['scaler_test' + str(i)], 0)
        locals()['Smape_PM10' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                         locals()['test_y' + str(i)],
                                                                         y_predict,
                                                                         look_back, n_columns, n_labels,
                                                                         locals()['scaler_test' + str(i)], 1)
        locals()['Smape_NO2' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                        locals()['test_y' + str(i)],
                                                                        y_predict,
                                                                        look_back, n_columns, n_labels,
                                                                        locals()['scaler_test' + str(i)], 2)
        locals()['Smape_CO' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                       locals()['test_y' + str(i)],
                                                                       y_predict,
                                                                       look_back, n_columns, n_labels,
                                                                       locals()['scaler_test' + str(i)], 3)
        locals()['Smape_O3' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                       locals()['test_y' + str(i)],
                                                                       y_predict,
                                                                       look_back, n_columns, n_labels,
                                                                       locals()['scaler_test' + str(i)], 4)
        locals()['Smape_SO2' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                        locals()['test_y' + str(i)],
                                                                        y_predict,
                                                                        look_back, n_columns, n_labels,
                                                                        locals()['scaler_test' + str(i)], 5)

        file.write('Current file index is: ' + str(i) + '\n')
        file.write('Smape:' + ' ' + str(locals()['Smape' + str(i)]) + '\n')
        file.write('Smape_PM25:' + ' ' + str(locals()['Smape_PM25' + str(i)]) + '\n')
        file.write('Smape_PM10:' + ' ' + str(locals()['Smape_PM10' + str(i)]) + '\n')
        file.write('Smape_NO2:' + ' ' + str(locals()['Smape_NO2' + str(i)]) + '\n')
        file.write('Smape_CO:' + ' ' + str(locals()['Smape_CO' + str(i)]) + '\n')
        file.write('Smape_O3:' + ' ' + str(locals()['Smape_O3' + str(i)]) + '\n')
        file.write('Smape_SO2:' + ' ' + str(locals()['Smape_SO2' + str(i)]) + '\n')
        file.write('\n')

        sum_Smape = sum_Smape + locals()['Smape' + str(i)]
        sum_Smape_PM25 = sum_Smape_PM25 + locals()['Smape_PM25' + str(i)]
        sum_Smape_PM10 = sum_Smape_PM10 + locals()['Smape_PM10' + str(i)]
        sum_Smape_NO2 = sum_Smape_NO2 + locals()['Smape_NO2' + str(i)]
        sum_Smape_CO = sum_Smape_CO + locals()['Smape_CO' + str(i)]
        sum_Smape_O3 = sum_Smape_O3 + locals()['Smape_O3' + str(i)]
        sum_Smape_SO2 = sum_Smape_SO2 + locals()['Smape_SO2' + str(i)]

    file.write('avg_Smape: ' + str(sum_Smape / len(file_list_test)) + '\n')
    file.write('avg_Smape_PM25: ' + str(sum_Smape_PM25 / len(file_list_test)) + '\n')
    file.write('avg_Smape_PM10: ' + str(sum_Smape_PM10 / len(file_list_test)) + '\n')
    file.write('avg_Smape_NO2: ' + str(sum_Smape_NO2 / len(file_list_test)) + '\n')
    file.write('avg_Smape_CO: ' + str(sum_Smape_CO / len(file_list_test)) + '\n')
    file.write('avg_Smape_O3: ' + str(sum_Smape_O3 / len(file_list_test)) + '\n')
    file.write('avg_Smape_SO2: ' + str(sum_Smape_SO2 / len(file_list_test)) + '\n')
    file.write('training time:' + str(end_time - start_time))
Exemple #9
0
def main():

    # network parameters
    task_num = 6
    con_layer1 = 128  # for 6 users
    # con_layer1 = 256
    con_layer1_filter = 1
    con_layer2 = 64
    con_layer2_filter = 4
    lstm_layer = 64
    drop = 0.2
    r_drop = 0.2
    l2_value = 0.001
    shared_layer = 576
    dense_num = 64

    look_back = 30  # number of previous timestamp used for training
    n_columns = 276  # total columns
    n_labels = 51  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    trainX_list = []
    trainy_list = []
    testX_list = []
    testy_list = []
    file_list = glob.glob('../data_csv/train/6users/*.csv')

    for i in range(len(file_list)):
        locals()['dataset' + str(i)] = file_list[i]
        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[
            'scaler' + str(i)] = helper_funcs.load_dataset(locals()['dataset' +
                                                                    str(i)])
        locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()[
            'test_X' +
            str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset(
                locals()['dataset' + str(i)],
                locals()['scaled' + str(i)], look_back, n_columns, n_labels,
                split_ratio)
        trainX_list.append(locals()['train_X' + str(i)])
        trainy_list.append(locals()['train_y' + str(i)])
        testX_list.append(locals()['test_X' + str(i)])
        testy_list.append(locals()['test_y' + str(i)])

    model = build_model(trainX_list, task_num, con_layer1, con_layer1_filter,
                        con_layer2, con_layer2_filter, lstm_layer, drop,
                        r_drop, l2_value, shared_layer, dense_num, n_labels)

    import time
    start_time = time.time()

    # fit network
    history = model.fit(
        trainX_list,
        trainy_list,
        epochs=50,
        batch_size=60,
        validation_split=0.25,
        # validation_data=(testX_list, testy_list),
        verbose=2,
        shuffle=False,
        callbacks=[
            keras.callbacks.EarlyStopping(monitor='val_loss',
                                          min_delta=0,
                                          patience=20,
                                          verbose=2,
                                          mode='min')
        ])
    end_time = time.time()
    print('--- %s seconds ---' % (end_time - start_time))

    y_pred1, y_pred2, y_pred3, y_pred4, y_pred5, y_pred6 = model.predict(
        testX_list)
    # y_pred1,y_pred2,y_pred3,y_pred4,y_pred5,y_pred6,y_pred7,y_pred8,y_pred9,y_pred10,y_pred11,y_pred12,y_pred13,y_pred14,y_pred15,y_pred16,y_pred17,y_pred18,y_pred19,y_pred20,y_pred21,y_pred22,y_pred23,y_pred24,y_pred25,y_pred26,y_pred27 \
    #     , y_pred28,y_pred29,y_pred30,y_pred31,y_pred32,y_pred33,y_pred34,y_pred35,y_pred36,y_pred37,y_pred38,y_pred39,y_pred40 = model.predict(testX_list)

    ####################  attention plot - TIME STEP level ################################

    attention_vectors = []

    ##### Case Study, input_dimension ################
    #### axis = 1 is ploting the attention on input_dim, axis = 2 is ploting the attention on TIME_STEP dimension
    for k in range(len(file_list)):
        for j in range(10):
            attention_vector = np.mean(helper_funcs.get_activationsT(
                k,
                model,
                testX_list[k][0:20, :, :],
                print_shape_only=True,
                layer_name='attention_vect' + str(k))[0],
                                       axis=1).squeeze()
            # print('attention =', attention_vector)
            # assert (np.sum(attention_vector) - 1.0) < 1e-5
            attention_vectors.append(attention_vector)
            print('.....')
            print(len(attention_vector))

        attention_vector_final = np.mean(np.array(attention_vectors), axis=0)
        print('attention final=', attention_vector_final)
        # print('attention final length=', len(attention_vector_final))

        import seaborn as sns
        import matplotlib.pylab as plt

        # attention_vector_final = np.delete(attention_vector_final, np.s_[225:], axis=1)
        ax = sns.heatmap(attention_vector_final, cmap="BuPu")
        plt.savefig('time_dim_heatmap2' + str(k) + '.png', dpi=150)
        plt.show()

        # # plot part.
        # import matplotlib.pyplot as plt
        # import pandas as pd
        #
        #
        # df = pd.DataFrame(attention_vector_final[1], columns=['attention (%)'])
        # df.to_csv('../results/attention_plot/TimeAtt1/30_TIME_STEP_attention'+str(k)+'.csv')
        # df.plot(kind='bar',title='Attention Mechanism as ''a function of input'' dimensions.')
        # # plt.figure(figsize=(100, 100))
        # plt.xticks(rotation=90)
        # plt.savefig('../results/attention_plot/TimeAtt1/30_TIME_STEP_attention'+str(k)+'.png',dpi=150)
        # plt.show()

        ################################# attention plot ends #################################################

        helper_funcs.evaluation(locals()['test_X' + str(k)],
                                locals()['test_y' + str(k)],
                                locals()['y_pred' + str(k + 1)], look_back,
                                n_columns, n_labels,
                                locals()['scaler' + str(k)])
def main():

    # network parameters
    task_num = 9
    lstm_layer = 64
    drop = 0.2
    r_drop = 0.2
    l2_value = 0.001
    shared_layer = 576
    dense_num = 64

    look_back = 20  # number of previous timestamp used for training
    n_columns = 15  # total columns
    n_labels = 6  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    # trainX_list = []
    # trainy_list = []
    # testX_list = []
    # testy_list = []
    file_list_train = glob.glob('preprocessed_data/train/*.csv')
    file_list_test = glob.glob('preprocessed_data/test/*.csv')

    # path = r'data/US/market/merged_data'
    # allFiles = glob.glob(path + "/*.csv")
    with open('train_combined.csv', 'wb') as outfile:
        for i, fname in enumerate(file_list_train):
            with open(fname, 'rb') as infile:
                if i != 0:
                    infile.readline()  # Throw away header on all but first file
                # Block copy rest of file from input to output without parsing
                shutil.copyfileobj(infile, outfile)
                print(fname + " has been imported.")

    train_data,scaled,scaler =helper_funcs.load_dataset('train_combined.csv')

    trainX,trainy = helper_funcs.split_dataset(scaled,look_back,n_columns, n_labels)

    file = open('results/globalAtt_1.txt', 'w')
    sum_Smape = 0
    sum_Smape_PM25 = 0
    sum_Smape_PM10 = 0
    sum_Smape_NO2 = 0
    sum_Smape_CO = 0
    sum_Smape_O3 = 0
    sum_Smape_SO2 = 0

    for i in range(len(file_list_train)):
        # train_data = 'data/preprocessed_data/train/bj_huairou.csv'
        # test_data = 'data/preprocessed_data/test/bj_huairou_201805.csv'

        # locals()['dataset_train' + str(i)], locals()['scaled_train' + str(i)], locals()[
        #     'scaler_train' + str(i)] = helper_funcs.load_dataset(file_list_train[i])
        locals()['dataset_test' + str(i)], locals()['scaled_test' + str(i)], locals()[
            'scaler_test' + str(i)] = helper_funcs.load_dataset(file_list_test[i])

        # split into train and test sets
        # locals()['train_X' + str(i)], locals()['train_y' + str(i)] = helper_funcs.split_dataset(
        #     locals()['scaled_train' + str(i)], look_back, n_columns, n_labels)
        locals()['test_X' + str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset(
            locals()['scaled_test' + str(i)], look_back, n_columns, n_labels)

        model = build_model(trainX,lstm_layer, drop, r_drop, l2_value, dense_num, n_labels)

        import time
        start_time = time.time()

        # fit network
        history = model.fit(trainX, trainy,
                            epochs=100,
                            batch_size=120,
                            validation_data=(locals()['test_X' + str(i)], locals()['test_y' + str(i)]), verbose=2,
                            shuffle=False,
                            callbacks=[
                                keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=2,
                                                              mode='min')]
                            )

        end_time = time.time()
        print('--- %s seconds ---' % (end_time - start_time))

        # plot history
        # plt.plot(history.history['loss'], label='train')
        # plt.plot(history.history['val_loss'], label='test')
        # plt.legend()
        # plt.show()

        # make a prediction
        y_predict = model.predict(locals()['test_X' + str(i)])
        # results = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)], y_predict, look_back, n_columns, n_labels, locals()['scaler' + str(i)])

        locals()['Smape' + str(i)] = helper_funcs.evaluation(locals()['test_X' + str(i)], locals()['test_y' + str(i)],
                                                             y_predict,
                                                             look_back, n_columns, n_labels,
                                                             locals()['scaler_test' + str(i)])

        locals()['Smape_PM25' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                         locals()['test_y' + str(i)],
                                                                         y_predict,
                                                                         look_back, n_columns, n_labels,
                                                                         locals()['scaler_test' + str(i)], 0)
        locals()['Smape_PM10' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                         locals()['test_y' + str(i)],
                                                                         y_predict,
                                                                         look_back, n_columns, n_labels,
                                                                         locals()['scaler_test' + str(i)], 1)
        locals()['Smape_NO2' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                        locals()['test_y' + str(i)],
                                                                        y_predict,
                                                                        look_back, n_columns, n_labels,
                                                                        locals()['scaler_test' + str(i)], 2)
        locals()['Smape_CO' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                       locals()['test_y' + str(i)],
                                                                       y_predict,
                                                                       look_back, n_columns, n_labels,
                                                                       locals()['scaler_test' + str(i)], 3)
        locals()['Smape_O3' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                       locals()['test_y' + str(i)],
                                                                       y_predict,
                                                                       look_back, n_columns, n_labels,
                                                                       locals()['scaler_test' + str(i)], 4)
        locals()['Smape_SO2' + str(i)] = helper_funcs.evaluation_single(locals()['test_X' + str(i)],
                                                                        locals()['test_y' + str(i)],
                                                                        y_predict,
                                                                        look_back, n_columns, n_labels,
                                                                        locals()['scaler_test' + str(i)], 5)

        file.write('Current file index is: ' + str(i) + '\n')
        file.write('Smape:' + ' ' + str(locals()['Smape' + str(i)]) + '\n')
        file.write('Smape_PM25:' + ' ' + str(locals()['Smape_PM25' + str(i)]) + '\n')
        file.write('Smape_PM10:' + ' ' + str(locals()['Smape_PM10' + str(i)]) + '\n')
        file.write('Smape_NO2:' + ' ' + str(locals()['Smape_NO2' + str(i)]) + '\n')
        file.write('Smape_CO:' + ' ' + str(locals()['Smape_CO' + str(i)]) + '\n')
        file.write('Smape_O3:' + ' ' + str(locals()['Smape_O3' + str(i)]) + '\n')
        file.write('Smape_SO2:' + ' ' + str(locals()['Smape_SO2' + str(i)]) + '\n')
        file.write('\n')

        sum_Smape = sum_Smape + locals()['Smape' + str(i)]
        sum_Smape_PM25 = sum_Smape_PM25 + locals()['Smape_PM25' + str(i)]
        sum_Smape_PM10 = sum_Smape_PM10 + locals()['Smape_PM10' + str(i)]
        sum_Smape_NO2 = sum_Smape_NO2 + locals()['Smape_NO2' + str(i)]
        sum_Smape_CO = sum_Smape_CO + locals()['Smape_CO' + str(i)]
        sum_Smape_O3 = sum_Smape_O3 + locals()['Smape_O3' + str(i)]
        sum_Smape_SO2 = sum_Smape_SO2 + locals()['Smape_SO2' + str(i)]

    file.write('avg_Smape: ' + str(sum_Smape / len(file_list_test)) + '\n')
    file.write('avg_Smape_PM25: ' + str(sum_Smape_PM25 / len(file_list_test)) + '\n')
    file.write('avg_Smape_PM10: ' + str(sum_Smape_PM10 / len(file_list_test)) + '\n')
    file.write('avg_Smape_NO2: ' + str(sum_Smape_NO2 / len(file_list_test)) + '\n')
    file.write('avg_Smape_CO: ' + str(sum_Smape_CO / len(file_list_test)) + '\n')
    file.write('avg_Smape_O3: ' + str(sum_Smape_O3 / len(file_list_test)) + '\n')
    file.write('avg_Smape_SO2: ' + str(sum_Smape_SO2 / len(file_list_test)) + '\n')
    file.write('training time:' + str(end_time - start_time))
Exemple #11
0
def main():

    look_back = 20  # number of previous timestamp used for training
    n_columns = 276  # total columns
    n_labels = 51  # number of labels
    split_ratio = 0.8  # train & test data split ratio

    file_list = glob.glob('data_csv/train/*.csv')

    file = open('results/Single_MLP_40users3.txt', 'w')
    sum_bacc = 0
    sum_TPR = 0
    Num_tp = 0
    Num_fn = 0
    Num_fp = 0
    Num_tn = 0
    sum_precision = 0
    sum_F1 = 0
    train_time = 0

    for i in range(len(file_list)):
        locals()['dataset' + str(i)] = file_list[i]

        locals()['dataset' + str(i)], locals()['scaled' + str(i)], locals()[
            'scaler' + str(i)] = helper_funcs.load_dataset(locals()['dataset' +
                                                                    str(i)])

        # split into train and test sets
        locals()['train_X' + str(i)], locals()['train_y' + str(i)], locals()[
            'test_X' +
            str(i)], locals()['test_y' + str(i)] = helper_funcs.split_dataset(
                locals()['dataset' + str(i)],
                locals()['scaled' + str(i)], look_back, n_columns, n_labels,
                split_ratio)

        model = build_model(locals()['train_X' + str(i)])

        import time
        start_time = time.time()

        # fit network
        history = model.fit(
            locals()['train_X' + str(i)],
            locals()['train_y' + str(i)],
            epochs=40,
            batch_size=60,
            # validation_data=(test_X, test_y),
            validation_split=0.25,
            verbose=2,
            shuffle=False,
            callbacks=[
                keras.callbacks.EarlyStopping(monitor='val_loss',
                                              min_delta=0,
                                              patience=10,
                                              mode='min')
            ])

        end_time = time.time()
        print('--- %s seconds ---' % (end_time - start_time))

        y_predict = model.predict(locals()['test_X' + str(i)])

        results = helper_funcs.evaluation(locals()['test_X' + str(i)],
                                          locals()['test_y' + str(i)],
                                          y_predict, look_back, n_columns,
                                          n_labels,
                                          locals()['scaler' + str(i)])

        sum_bacc = sum_bacc + results[3]
        sum_TPR = sum_TPR + results[1]
        Num_tp = Num_tp + results[4]
        Num_fn = Num_fn + results[5]
        Num_fp = Num_fp + results[6]
        Num_tn = Num_tn + results[7]
        sum_precision = sum_precision + results[8]
        sum_F1 = sum_F1 + results[9]
        train_time = train_time + (end_time - start_time)

        file.write('Accuracy:' + ' ' + str(results[0]) + ' ')
        file.write('TPR:' + ' ' + str(results[1]) + ' ')
        file.write('TNR:' + ' ' + str(results[2]) + ' ')
        file.write('Bacc:' + ' ' + str(results[3]) + '\n')
        file.write('FP No.:' + ' ' + str(results[6]) + '\n')
        file.write('TN No.:' + ' ' + str(results[7]) + '\n')
        file.write('Precision:' + ' ' + str(results[8]) + '\n')
        file.write('F1:' + ' ' + str(results[9]) + '\n')

    file.write('avg_bacc: ' + str(sum_bacc / len(file_list)) + '\n')
    file.write('avg_TPR: ' + str(sum_TPR / len(file_list)) + '\n')
    file.write('avg_precision: ' + str(sum_precision / len(file_list)) + '\n')
    file.write('avg_F1: ' + str(sum_F1 / len(file_list)) + '\n')
    file.write('sum_Num_tp: ' + str(Num_tp) + '\n')
    file.write('sum_Num_fn: ' + str(Num_fn) + '\n')
    file.write('sum_Num_fp: ' + str(Num_fp) + '\n')
    file.write('sum_Num_tn: ' + str(Num_tn) + '\n')
    file.write('train_time: ' + str(train_time) + '\n')