def generate_test_result(attempt_path): total_result_list = [] path = '../../processed_data/cluster_8.csv' cluster = pd.read_table(path, header=None).values for i in range(8): cluster_idx = cluster[i].tolist()[0].split(',') cluster_idx = map(int, cluster_idx) path = attempt_path+'final_try_on_idx'+str(i) temp_result = np.zeros(len(cluster_idx)*45) for j in range(1,3+1,1): path1 = path + '_' + str(j) + '_batch_ratio_0.002/' # load the test data test_array, data_dim = get_test_data_array_csv(cluster_idx) test_data = construct_test_data_for_lstm(test_array) csv_path = path1 + 'LSTM_MAPE_list.csv' entry = open(csv_path).readlines()[0] entry = entry.strip('\n') entry_list = entry.split(',') # initial model and load model model = initial_lstm_model(entry_list[1], int(entry_list[2])) model_path = path1 + 'model_district_1.h5' model.load_weights(model_path) predicted = model.predict(test_data) predict = np.asarray(predicted.flatten().tolist()) temp_result = np.add(temp_result,predict) mean_result = temp_result/3.0 result_list = return_predict_label_with_idx(mean_result, cluster_idx) total_result_list += result_list write_list_to_csv(total_result_list, attempt_path + 'result.csv')
def save_poi_data(district_dict, path_in, path_out): poi_list = load_poi(district_dict, path_in) header = ['district_id'] for i in range(1, 25+1, 1): temp = '%02d'%i header.append('class'+temp) write_list_to_csv(poi_list, path_out=path_out, header=header)
def generate_test_result(attempt_path): total_result_list = [] path = '../../processed_data/cluster_8.csv' cluster = pd.read_table(path, header=None).values for i in range(8): cluster_idx = cluster[i].tolist()[0].split(',') cluster_idx = map(int, cluster_idx) path = attempt_path + 'final_try_on_idx' + str(i) temp_result = np.zeros(len(cluster_idx) * 45) for j in range(1, 3 + 1, 1): path1 = path + '_' + str(j) + '_batch_ratio_0.002/' # load the test data test_array, data_dim = get_test_data_array_csv(cluster_idx) test_data = construct_test_data_for_lstm(test_array) csv_path = path1 + 'LSTM_MAPE_list.csv' entry = open(csv_path).readlines()[0] entry = entry.strip('\n') entry_list = entry.split(',') # initial model and load model model = initial_lstm_model(entry_list[1], int(entry_list[2])) model_path = path1 + 'model_district_1.h5' model.load_weights(model_path) predicted = model.predict(test_data) predict = np.asarray(predicted.flatten().tolist()) temp_result = np.add(temp_result, predict) mean_result = temp_result / 3.0 result_list = return_predict_label_with_idx(mean_result, cluster_idx) total_result_list += result_list write_list_to_csv(total_result_list, attempt_path + 'result.csv')
weather_array[idx, 0] = (idx + 1) % 144 if (idx + 1) % 144 == 0: weather_array[idx, 0] = 144 if idx % 144 == 0: weather_array[idx, 0] = 1 return weather_array if __name__ == '__main__': st = time.time() # weather_array = get_weather_data_array_db('test_weather_data') weather_list = weather_array.tolist() weather_path = '../../processed_data/train/weather_data.csv' write_list_to_csv(weather_list, weather_path) for district_id in range(1, 66 + 1, 1): order_array = get_order_data_array_db(district_id, 'test_order_data') traffic_array = get_traffic_data_array_db(district_id, 'test_traffic_data') order_list = order_array.tolist() traffic_list = traffic_array.tolist() order_path = '../../processed_data/train/D' + str(district_id) + '_order_data.csv' traffic_path = '../../processed_data/train/D' + str(district_id) + '_traffic_data.csv' write_list_to_csv(order_list, order_path) write_list_to_csv(traffic_list, traffic_path) print('==================================================')
cluster_idx = cluster[i].tolist()[0].split(',') cluster_idx = map(int, cluster_idx) for j in range(1, 3+1,1): attempt = [3, 'final_try_on_idx'+str(i)+'_'+str(j)] MODEL_OUT_PATH = '../../result/attempt' + str(attempt[0]) + '/' \ + str(attempt[1]) + '_batch_ratio_' + str(batch_size_ratio) + '/' mape_list = [] st_time = time.time() d = os.path.dirname(MODEL_OUT_PATH) if not os.path.exists(d): os.makedirs(d) # get data from csv or mongodb data_array, train_dim = get_train_data_array_csv(cluster_idx) # construct data train_array, label_array = construct_data_for_lstm(data_array) # split data by 7:2:1 (train_data, train_label), (validate_data, validate_label), (test_data, test_label) \ = train_data_split(train_array, label_array) # save the test_data into the models directory save_test_csv(MODEL_OUT_PATH, test_data, test_label) mape_entry = multi_model(train_data, train_label, validate_data, validate_label, 1, train_dim) mape_list.append(mape_entry) # save the validation MAPE for every model and overall MAPE out_path = MODEL_OUT_PATH + 'LSTM_MAPE_list.csv' write_list_to_csv(mape_list, out_path) ed_time = time.time() print(' Overall Time: %.2f hours' % ((ed_time - st_time) / 3600)) print('overall mape loss: %f\n' % (mape_sum / mape_num)) mape_list.append(['overall mape']+[mape_sum / mape_num])
# fill the 0 time slot weather_array[idx, 0] = (idx + 1) % 144 if (idx + 1) % 144 == 0: weather_array[idx, 0] = 144 if idx % 144 == 0: weather_array[idx, 0] = 1 return weather_array if __name__ == '__main__': st = time.time() # weather_array = get_weather_data_array_db('test_weather_data') weather_list = weather_array.tolist() weather_path = '../../processed_data/train/weather_data.csv' write_list_to_csv(weather_list, weather_path) for district_id in range(1, 66 + 1, 1): order_array = get_order_data_array_db(district_id, 'test_order_data') traffic_array = get_traffic_data_array_db(district_id, 'test_traffic_data') order_list = order_array.tolist() traffic_list = traffic_array.tolist() order_path = '../../processed_data/train/D' + str( district_id) + '_order_data.csv' traffic_path = '../../processed_data/train/D' + str( district_id) + '_traffic_data.csv' write_list_to_csv(order_list, order_path)