Example #1
0
def generate_test_result(attempt_path):
    total_result_list = []
    path = '../../processed_data/cluster_8.csv'
    cluster = pd.read_table(path, header=None).values
    for i in range(8):
        cluster_idx = cluster[i].tolist()[0].split(',')
        cluster_idx = map(int, cluster_idx)
        path = attempt_path+'final_try_on_idx'+str(i)
        temp_result = np.zeros(len(cluster_idx)*45)
        for j in range(1,3+1,1):
            path1 = path + '_' + str(j) + '_batch_ratio_0.002/'
            # load the test data
            test_array, data_dim = get_test_data_array_csv(cluster_idx)
            test_data = construct_test_data_for_lstm(test_array)
            csv_path = path1 + 'LSTM_MAPE_list.csv'
            entry = open(csv_path).readlines()[0]
            entry = entry.strip('\n')
            entry_list = entry.split(',')
            # initial model and load model
            model = initial_lstm_model(entry_list[1], int(entry_list[2]))
            model_path = path1 + 'model_district_1.h5'
            model.load_weights(model_path)
            predicted = model.predict(test_data)
            predict = np.asarray(predicted.flatten().tolist())
            temp_result = np.add(temp_result,predict)
        mean_result = temp_result/3.0
        result_list = return_predict_label_with_idx(mean_result, cluster_idx)
        total_result_list += result_list
    write_list_to_csv(total_result_list, attempt_path + 'result.csv')
Example #2
0
def save_poi_data(district_dict, path_in, path_out):
    poi_list = load_poi(district_dict, path_in)
    header = ['district_id']
    for i in range(1, 25+1, 1):
        temp = '%02d'%i
        header.append('class'+temp)
    write_list_to_csv(poi_list, path_out=path_out, header=header)
Example #3
0
def generate_test_result(attempt_path):
    total_result_list = []
    path = '../../processed_data/cluster_8.csv'
    cluster = pd.read_table(path, header=None).values
    for i in range(8):
        cluster_idx = cluster[i].tolist()[0].split(',')
        cluster_idx = map(int, cluster_idx)
        path = attempt_path + 'final_try_on_idx' + str(i)
        temp_result = np.zeros(len(cluster_idx) * 45)
        for j in range(1, 3 + 1, 1):
            path1 = path + '_' + str(j) + '_batch_ratio_0.002/'
            # load the test data
            test_array, data_dim = get_test_data_array_csv(cluster_idx)
            test_data = construct_test_data_for_lstm(test_array)
            csv_path = path1 + 'LSTM_MAPE_list.csv'
            entry = open(csv_path).readlines()[0]
            entry = entry.strip('\n')
            entry_list = entry.split(',')
            # initial model and load model
            model = initial_lstm_model(entry_list[1], int(entry_list[2]))
            model_path = path1 + 'model_district_1.h5'
            model.load_weights(model_path)
            predicted = model.predict(test_data)
            predict = np.asarray(predicted.flatten().tolist())
            temp_result = np.add(temp_result, predict)
        mean_result = temp_result / 3.0
        result_list = return_predict_label_with_idx(mean_result, cluster_idx)
        total_result_list += result_list
    write_list_to_csv(total_result_list, attempt_path + 'result.csv')
            weather_array[idx, 0] = (idx + 1) % 144
            if (idx + 1) % 144 == 0:
                weather_array[idx, 0] = 144
            if idx % 144 == 0:
                weather_array[idx, 0] = 1
    return weather_array



if __name__ == '__main__':
    st = time.time()
    #
    weather_array = get_weather_data_array_db('test_weather_data')
    weather_list = weather_array.tolist()
    weather_path = '../../processed_data/train/weather_data.csv'
    write_list_to_csv(weather_list, weather_path)

    for district_id in range(1, 66 + 1, 1):
        order_array = get_order_data_array_db(district_id, 'test_order_data')
        traffic_array = get_traffic_data_array_db(district_id, 'test_traffic_data')

        order_list = order_array.tolist()
        traffic_list = traffic_array.tolist()

        order_path = '../../processed_data/train/D' + str(district_id) + '_order_data.csv'
        traffic_path = '../../processed_data/train/D' + str(district_id) + '_traffic_data.csv'

        write_list_to_csv(order_list, order_path)
        write_list_to_csv(traffic_list, traffic_path)

        print('==================================================')
Example #5
0
        cluster_idx = cluster[i].tolist()[0].split(',')
        cluster_idx = map(int, cluster_idx)
        for j in range(1, 3+1,1):

            attempt = [3, 'final_try_on_idx'+str(i)+'_'+str(j)]
            MODEL_OUT_PATH = '../../result/attempt' + str(attempt[0]) + '/' \
                             + str(attempt[1]) + '_batch_ratio_' + str(batch_size_ratio) + '/'
            mape_list = []
            st_time = time.time()
            d = os.path.dirname(MODEL_OUT_PATH)
            if not os.path.exists(d):
                os.makedirs(d)
            # get data from csv or mongodb
            data_array, train_dim = get_train_data_array_csv(cluster_idx)
            # construct data
            train_array, label_array = construct_data_for_lstm(data_array)
            # split data by 7:2:1
            (train_data, train_label), (validate_data, validate_label), (test_data, test_label) \
                = train_data_split(train_array, label_array)
            # save the test_data into the models directory
            save_test_csv(MODEL_OUT_PATH, test_data, test_label)
            mape_entry = multi_model(train_data, train_label, validate_data, validate_label, 1, train_dim)
            mape_list.append(mape_entry)
            # save the validation MAPE for every model and overall MAPE
            out_path = MODEL_OUT_PATH + 'LSTM_MAPE_list.csv'
            write_list_to_csv(mape_list, out_path)
            ed_time = time.time()
            print(' Overall Time: %.2f hours' % ((ed_time - st_time) / 3600))
            print('overall mape loss: %f\n' % (mape_sum / mape_num))
            mape_list.append(['overall mape']+[mape_sum / mape_num])
            # fill the 0 time slot
            weather_array[idx, 0] = (idx + 1) % 144
            if (idx + 1) % 144 == 0:
                weather_array[idx, 0] = 144
            if idx % 144 == 0:
                weather_array[idx, 0] = 1
    return weather_array


if __name__ == '__main__':
    st = time.time()
    #
    weather_array = get_weather_data_array_db('test_weather_data')
    weather_list = weather_array.tolist()
    weather_path = '../../processed_data/train/weather_data.csv'
    write_list_to_csv(weather_list, weather_path)

    for district_id in range(1, 66 + 1, 1):
        order_array = get_order_data_array_db(district_id, 'test_order_data')
        traffic_array = get_traffic_data_array_db(district_id,
                                                  'test_traffic_data')

        order_list = order_array.tolist()
        traffic_list = traffic_array.tolist()

        order_path = '../../processed_data/train/D' + str(
            district_id) + '_order_data.csv'
        traffic_path = '../../processed_data/train/D' + str(
            district_id) + '_traffic_data.csv'

        write_list_to_csv(order_list, order_path)