def _task_4(): ''' X: past one hour Y: next hour's min value ''' x_target_path = './npy/final/hour_min/testing/X' y_target_path = './npy/final/hour_min/testing/Y' if not os.path.exists(x_target_path): os.makedirs(x_target_path) if not os.path.exists(y_target_path): os.makedirs(y_target_path) filelist = du.list_all_input_file(root_dir + '/npy/hour_min/X') filelist.sort() for i, filename in enumerate(filelist): if filename != 'training_raw_data.npy': data_array = du.load_array(root_dir + '/npy/hour_min/X/' + filename) # only network activity data_array = data_array[:, :, grid_start:grid_stop, grid_start:grid_stop, (0, 1, -1)] print('saving array shape:{}'.format(data_array.shape)) du.save_array(data_array, x_target_path + '/hour_min_' + str(i)) filelist = du.list_all_input_file(root_dir + '/npy/hour_min/Y') filelist.sort() for i, filename in enumerate(filelist): min_array = du.load_array(root_dir + '/npy/hour_min/Y/' + filename) # only network activity min_array = min_array[:, :, grid_start:grid_stop, grid_start:grid_stop, (0, 1, -1)] du.save_array(min_array, y_target_path + '/hour_min_' + str(i))
def task_6(): ''' X: past one hour Y: next 10 minutes traffic level ''' x_dir = './npy/final/10_minutes_level/testing/X/' y_dir = './npy/final/10_minutes_level/testing/Y/' x_data_list = du.list_all_input_file(x_dir) x_data_list.sort() y_data_list = du.list_all_input_file(y_dir) y_data_list.sort() X_array_list = [] for filename in x_data_list: X_array_list.append(du.load_array(x_dir + filename)) X_array = np.concatenate(X_array_list, axis=0) del X_array_list Y_array_list = [] for filename in y_data_list: Y_array_list.append(du.load_array(y_dir + filename)) Y_array = np.concatenate(Y_array_list, axis=0) del Y_array_list # X_array = feature_scaling(X_array) # Y_array = feature_scaling(Y_array) return X_array, Y_array
def task_3(): ''' X: past one hour Y: next hour's avg value ''' x_dir = './npy/final/hour_avg/testing/X/' y_dir = './npy/final/hour_avg/testing/Y/' x_data_list = du.list_all_input_file(x_dir) x_data_list.sort() y_data_list = du.list_all_input_file(y_dir) y_data_list.sort() X_array_list = [] for filename in x_data_list: X_array_list.append(du.load_array(x_dir + filename)) X_array = np.concatenate(X_array_list, axis=0) # X_array = X_array[:, :, 0:21, 0:21, :] del X_array_list Y_array_list = [] for filename in y_data_list: Y_array_list.append(du.load_array(y_dir + filename)) Y_array = np.concatenate(Y_array_list, axis=0) del Y_array_list # new_X_array = feature_scaling(X_array[:, :, :, :, -1, np.newaxis]) # new_Y_array = feature_scaling(Y_array[:, :, :, :, -1, np.newaxis]) # X_array = _copy(X_array, new_X_array) # Y_array = _copy(Y_array, new_Y_array) X_array = X_array[0:-1] # important!! Y_array = Y_array[1:] # important!! Y should shift 10 minutes return X_array, Y_array
def _get_10mins_CDR_internet_traffic(self, grid_list, reload=True): target_path = './npy/10min_CDR_internet_traffic_temp.npy' source_path = os.path.join(self.config.base_dir, '10min_CDR_internet_traffic.npy') if reload: # TK = Prepare_Task_Data('./npy/final/') # X_array, _ = TK.Task_max(grid_limit=[(0, 100), (0, 100)], generate_data=True) # only need max here X_array = du.load_array(source_path) X_array = np.transpose(X_array, (2, 3, 0, 1, 4)) array_list = [] for search_grid_id in grid_list: # row, column = compute_row_col(grid_id) for row_index in range(X_array.shape[0]): for col_index in range(X_array.shape[1]): grid_id = X_array[row_index, col_index, 0, 0, 0] if search_grid_id == grid_id: new_x = X_array[row_index, col_index] new_x = new_x[:, :, (0, 1, -1)] array_list.append(new_x) # grid_id, timestamp, internet traffic _10mins_CDR_internet_traffic = np.stack(array_list) logger.debug('_10mins_CDR_internet_traffic shape:{}'.format(_10mins_CDR_internet_traffic.shape)) # (grid_number, 1487, 6, 3) du.save_array(_10mins_CDR_internet_traffic, target_path) else: _10mins_CDR_internet_traffic = du.load_array(target_path) return _10mins_CDR_internet_traffic
def task_4(): ''' X: past one hour Y: next hour's min value ''' x_dir = './npy/final/hour_min/testing/X/' y_dir = './npy/final/hour_min/testing/Y/' x_data_list = du.list_all_input_file(x_dir) x_data_list.sort() y_data_list = du.list_all_input_file(y_dir) y_data_list.sort() X_array_list = [] for filename in x_data_list: X_array_list.append(du.load_array(x_dir + filename)) X_array = np.concatenate(X_array_list, axis=0) del X_array_list Y_array_list = [] for filename in y_data_list: Y_array_list.append(du.load_array(y_dir + filename)) Y_array = np.concatenate(Y_array_list, axis=0) del Y_array_list X_array = X_array[0:-1] # important!! Y_array = Y_array[1:] # important!! Y should shift 10 minutes return X_array, Y_array
def task_2(): ''' rolling 10 minutes among timeflows X: past one hour Y: next 10 minutes value ''' x_dir = './npy/final/roll_10/testing/X/' y_dir = './npy/final/roll_10/testing/Y/' X_file_list = du.list_all_input_file(x_dir) Y_file_list = du.list_all_input_file(y_dir) X_file_list.sort() Y_file_list.sort() X_array_list = [] Y_array_list = [] # X array for filename in X_file_list: X_array_list.append(du.load_array(x_dir + filename)) X_array = np.concatenate(X_array_list, axis=0) del X_array_list # Y array for filename in Y_file_list: Y_array_list.append(du.load_array(y_dir + filename)) Y_array = np.concatenate(Y_array_list, axis=0) del Y_array_list # new_X_array = feature_scaling(X_array) # new_Y_array = feature_scaling(Y_array) # X_array = _copy(X_array, new_X_array) # Y_array = _copy(Y_array, new_Y_array) return X_array, Y_array
def task_5(): ''' X: past one hour Y: next hour's min avg max network traffic for multi task learning ''' x_dir = './npy/final/hour_min_avg_max/testing/X/' y_dir = './npy/final/hour_min_avg_max/testing/Y/' x_data_list = du.list_all_input_file(x_dir) x_data_list.sort() y_data_list = du.list_all_input_file(y_dir) y_data_list.sort() X_array_list = [] for filename in x_data_list: X_array_list.append(du.load_array(x_dir + filename)) X_array = np.concatenate(X_array_list, axis=0) del X_array_list Y_array_list = [] for filename in y_data_list: Y_array_list.append(du.load_array(y_dir + filename)) Y_array = np.concatenate(Y_array_list, axis=0) del Y_array_list # X_array = feature_scaling(X_array) # Y_array = feature_scaling(Y_array) return X_array, Y_array
def get_data(): CNN_RNN_all_grid_path = './result/CNN_RNN/all_real_prediction_traffic_array_0718.npy' CNN_RNN_without_task_all_grid_path = './result/CNN_RNN_without_task/all_real_prediction_traffic_array_split_min_avg_max.npy' CNN_RNN_MTL_array = du.load_array(CNN_RNN_all_grid_path) CNN_RNN_without_task_array = du.load_array( CNN_RNN_without_task_all_grid_path) CNN_RNN_MTL_array = CNN_RNN_MTL_array[:-1] logger.info( 'CNN_RNN_MTL_array shape:{} CNN_RNN_without_task_array shape:{}'. format(CNN_RNN_MTL_array.shape, CNN_RNN_without_task_array.shape)) return CNN_RNN_MTL_array, CNN_RNN_without_task_array
def convert_prediction_to_non_prediction(): source_path = os.path.join(root_dir, 'offloading/npy/real_prediction') target_path = os.path.join(root_dir, 'offloading/npy/real_without_prediction') _10_min_traffic = du.load_array(os.path.join(source_path, '10min_CDR_internet_traffic.npy')) hour_traffic = du.load_array(os.path.join(source_path, 'hour_traffic_array.npy')) print('origin 10 min shape:{} origin hour shape:{}'.format(_10_min_traffic.shape, hour_traffic.shape)) _10_min_traffic = _10_min_traffic[1:] # (1485, 6, 41, 41, 3) hour_traffic = hour_traffic[:-1] # (1485, 1, 41, 41, 8) print('new 10 min shape:{} new hour shape:{}'.format(_10_min_traffic.shape, hour_traffic.shape)) du.save_array(_10_min_traffic, os.path.join(target_path, '10min_CDR_internet_traffic')) du.save_array(hour_traffic, os.path.join(target_path, 'hour_traffic_array'))
def get_dataframe(file_path): data_array = du.load_array(file_path) data_array = data_array[:, -149:] cell_num_array = data_array[:, 0, 0] reward_array = np.mean(data_array[:, :, 1], axis=1) energy_array = np.mean(data_array[:, :, 2], axis=1) traffic_digested_array = np.sum(data_array[:, :, 3], axis=1) macro_load_array = np.mean(data_array[:, :, 4], axis=1) small_load_array = data_array[:, :, 5] small_load_array[small_load_array == 0] = np.nan small_load_array = np.nanmean(small_load_array, axis=1) power_consumption_array = np.sum(data_array[:, :, 6], axis=1) action_array = np.mean(data_array[:, :, 7], axis=1) traffic_demand_array = np.sum(data_array[:, :, 8], axis=1) # sum of data within 149 df = pd.DataFrame({ 'cell_num': [int(cell_num) for cell_num in cell_num_array[:]], 'reward': reward_array, 'energy efficiency': energy_array, 'traffic digested': traffic_digested_array, 'macro load': macro_load_array, 'small load': small_load_array, 'power consumption': power_consumption_array, 'small cell number': action_array, 'traffic demand': traffic_demand_array}) df = df.set_index('cell_num') return df
def grouping_by_macro_load(EFFI_df): # load_groups_list = get_groups_load_list() # print(EFFI_df.describe()) without_loading_data_path = os.path.join(root_dir, 'offloading/result/without_offloading_without_RL', 'all_cell_result_array_0731.npy') cell_result = du.load_array(without_loading_data_path) # cell_result = cell_result[:, :] macro_load_array = cell_result[:, -149:, 4] # (144, 149) macro_load_array_mean = np.mean(macro_load_array, axis=1) macro_load_array = cell_result[:, :, 4] # (144, 1486) macro_cell_num = cell_result[:, 0, 0] macro_load_pd = pd.DataFrame({ 'cell_num': [int(cell_num) for cell_num in macro_cell_num], 'macro_load': macro_load_array_mean}) macro_load_pd = macro_load_pd.set_index('cell_num') # print(macro_load_pd) EFFI_df = pd.concat((EFFI_df, macro_load_pd), axis=1) bins = (0, 0.3, 0.7, 1, 2, 10) cats_macro_load_pd = pd.cut(EFFI_df['macro_load'], bins) EFFI_df_group_by_macro_load = EFFI_df.groupby(cats_macro_load_pd) key_list = list(EFFI_df_group_by_macro_load.groups.keys()) key_list = sorted(key_list, key=lambda x: x) # print(EFFI_df_group_by_macro_load.get_group(key_list[0])) # print(key_list) logger.info('\n{}'.format(EFFI_df_group_by_macro_load.count())) logger.info('\n{}'.format(EFFI_df_group_by_macro_load.mean())) # for group in EFFI_df_group_by_macro_load: # print(group[1].head(3)) return key_list, EFFI_df_group_by_macro_load
def evaluate_different_method(): def evaluate_performance(Y_real_prediction_array, file_path, divide_threshold=None): def print_total_report(task_report): for task_name, ele in task_report.items(): print('{}: Accuracy:{:.4f} MAE:{:.4f} RMSE:{:.4f}'.format( task_name, ele['Accuracy'], ele['AE'], ele['RMSE'])) row_center_list = list(range(40, 80, 3)) col_center_list = list(range(30, 70, 3)) row_range = (row_center_list[0], row_center_list[-1]) col_range = (col_center_list[0], col_center_list[-1]) # print((row_range[1] - row_range[0]) * (col_range[1] - col_range[0])) array_len = Y_real_prediction_array.shape[0] if not divide_threshold: divide_threshold = (9 * array_len) // 10 Y_real_prediction_array = Y_real_prediction_array[:, :, row_range[ 0]:row_range[1], col_range[0]:col_range[1]] training_data = Y_real_prediction_array[:divide_threshold] testing_data = Y_real_prediction_array[divide_threshold:] training_info = training_data[:, :, :, :, :2] training_real = training_data[:, :, :, :, 2:5] training_prediction = training_data[:, :, :, :, 5:] testing_info = testing_data[:, :, :, :, :2] testing_real = testing_data[:, :, :, :, 2:5] testing_prediction = testing_data[:, :, :, :, 5:] report_dict = report_func.report_loss_accu(testing_info, testing_real, testing_prediction, file_path) print_total_report(report_dict['total']) # print(report_dict['total']) CNN_RNN_all_grid_path = './result/CNN_RNN/all_real_prediction_traffic_array_0718.npy' CNN_3D_all_grid_path = './result/CNN_3D/all_real_prediction_traffic_array_0718.npy' RNN_all_grid_path = './result/RNN/all_real_prediction_traffic_array_0718.npy' ARIMA_all_grid_path = './result/ARIMA/all_real_prediction_traffic_array.npy' CNN_RNN_STL_all_grid_path = './result/CNN_RNN_STL/all_real_prediction_traffic_array_0715.npy' CNN_RNN_without_task_all_grid_path = './result/CNN_RNN_without_task/all_real_prediction_traffic_array_split_min_avg_max.npy' LM_all_grid_path = './result/LM/all_real_prediction_traffic_array.npy' # CNN_RNN_array = du.load_array(CNN_RNN_all_grid_path) # CNN_3D_array = du.load_array(CNN_3D_all_grid_path) # RNN_array = du.load_array(RNN_all_grid_path) # ARIMA_array = du.load_array(ARIMA_all_grid_path) # CNN_RNN_STL_array = du.load_array(CNN_RNN_STL_all_grid_path) # CNN_RNN_without_task_array = du.load_array(CNN_RNN_without_task_all_grid_path) LM_array = du.load_array(LM_all_grid_path) # evaluate_performance(CNN_RNN_array, './result/CNN_RNN/all_grid_result_report.txt') # evaluate_performance(CNN_3D_array, './result/CNN_3D/all_grid_result_report.txt') # evaluate_performance(RNN_array, './result/RNN/all_grid_result_report.txt') # evaluate_performance(ARIMA_array, './result/ARIMA/all_grid_result_report.txt') # evaluate_performance(CNN_RNN_STL_array, './result/CNN_RNN_STL/all_grid_result_report.txt') # evaluate_performance(CNN_RNN_without_task_array, './result/CNN_RNN_without_task/all_grid_result_report.txt') evaluate_performance(LM_array, './result/LM/all_grid_result_report.txt', 0)
def get_groups_load_dict(): ''' group each cells load into different level ''' without_loading_data_path = os.path.join(root_dir, 'offloading/result/without_offloading_without_RL', 'all_cell_result_array_0731.npy') cell_result = du.load_array(without_loading_data_path) logger.debug('cell result shape:{}'.format(cell_result.shape)) # (144, 1486, 8) 144: cell_num, 1486: time sequence, 8:cell_num, reward, energy, traffic_demand, macro load, small load, power consumption macro_load_array = cell_result[:, -149:, 4] # (144, 149) macro_load_array_mean = np.mean(macro_load_array, axis=1) macro_cell_num = cell_result[:, 0, 0] macro_load_pd = pd.DataFrame({ 'cell_num': macro_cell_num, 'macro_load': macro_load_array_mean}) # print(macro_load_pd) bins = (0, 0.3, 0.7, 1, 2, 10) cats_macro_load_pd = pd.cut(macro_load_pd['macro_load'], bins) # print(cats_macro_load_pd) macro_load_pd_group = macro_load_pd.groupby(cats_macro_load_pd) load_groups_dict = OrderedDict() for index, group in enumerate(macro_load_pd_group): # print(group[1]['cell_num']) group_list = [cell_num for cell_num in group[1]['cell_num']] # print(group_list) load_groups_dict[group[0]] = group_list # load_group_dict[group[0]] = group[1]['cell_num'] # print(macro_load_pd_group.count()) # for key, v in load_group_dict.items(): return load_groups_dict
def load_data(file_dir): file_list = du.list_all_input_file(file_dir) file_list.sort() array_list = [] for filename in file_list: array_list.append( du.load_array(os.path.join(file_dir, filename))) data_array = np.concatenate(array_list, axis=0) return data_array
def _task_3(): ''' X: past one hour Y: next hour's avg value ''' x_target_path = './npy/final/hour_avg/testing/X' y_target_path = './npy/final/hour_avg/testing/Y' if not os.path.exists(x_target_path): os.makedirs(x_target_path) if not os.path.exists(y_target_path): os.makedirs(y_target_path) filelist = du.list_all_input_file(root_dir + '/npy/hour_avg/X') filelist.sort() for i, filename in enumerate(filelist): if filename != 'training_raw_data.npy': data_array = du.load_array(root_dir + '/npy/hour_avg/X/' + filename) data_array = data_array[:, :, grid_start:grid_stop, grid_start:grid_stop, (0, 1, -1)] print('saving array shape:', data_array.shape) du.save_array(data_array, x_target_path + '/hour_avg_' + str(i)) # prepare y filelist = du.list_all_input_file(root_dir + '/npy/hour_avg/Y') filelist.sort() for i, filename in enumerate(filelist): avg_array = du.load_array(root_dir + '/npy/hour_avg/Y/' + filename) # only network activity # avg_array = avg_array[:, :, grid_start:65, grid_start:65, # (0, 1, -1)] # only network activity avg_array = avg_array[:, :, grid_start:grid_stop, grid_start:grid_stop, (0, 1, -1)] du.save_array(avg_array, y_target_path + '/hour_avg_' + str(i))
def load_and_save(file_dir, target_path): filelist = du.list_all_input_file(file_dir) filelist.sort() for i, filename in enumerate(filelist): file_path = os.path.join(file_dir, filename) data_array = du.load_array(file_path) data_array = data_array[:, :, grid_limit[0][0]:grid_limit[0][1], grid_limit[1][0]:grid_limit[1][1], (0, 1, -1)] print('saving array shape:', data_array.shape) du.save_array( data_array, os.path.join(target_path, task_name + '_' + str(i)))
def get_energyEFFI_pd(file_path): data_array = du.load_array(file_path) energy_array = data_array[:, -149:, 2] # (144, 1486) cell_num_array = data_array[:, 0, 0] energy_array = np.mean(energy_array, axis=1) # 144 data_array = np.stack((cell_num_array, energy_array), axis=-1) df = pd.DataFrame({ 'EE': data_array[:, 1], 'cell_num': [int(cell_num) for cell_num in data_array[:, 0]]}) df = df.set_index('cell_num') # print(df.describe()) return df
def _task_2(): ''' rolling 10 minutes among timeflows X: past one hour Y: next 10 minutes value ''' # check target dir exist x_target_path = './npy/final/roll_10/testing/X' y_target_path = './npy/final/roll_10/testing/Y' if not os.path.exists(x_target_path): os.makedirs(x_target_path) if not os.path.exists(y_target_path): os.makedirs(y_target_path) filelist_X = du.list_all_input_file(root_dir + '/npy/npy_roll/X/') filelist_Y = du.list_all_input_file(root_dir + '/npy/npy_roll/Y/') filelist_X.sort() filelist_Y.sort() for i, filename in enumerate(filelist_X): data_array = du.load_array(root_dir + '/npy/npy_roll/X/' + filename) data_array = data_array[:, :, grid_start:grid_stop, grid_start:grid_stop, (0, 1, -1)] print('saving array shape:{}'.format(data_array.shape)) du.save_array(data_array, x_target_path + '/X_' + str(i)) for i, filename in enumerate(filelist_Y): data_array = du.load_array(root_dir + '/npy/npy_roll/Y/' + filename) # only network activity data_array = data_array[:, :, grid_start:grid_stop, grid_start:grid_stop, (0, 1, -1)] print(data_array[0, 0, 20, 20, 0]) print('saving array shape:{}'.format(data_array.shape)) du.save_array(data_array, y_target_path + '/Y_' + str(i))
def get_data(): method_result_path = os.path.join( root_dir, 'CNN_RNN/result/CNN_RNN_without_task/all_real_prediction_traffic_array.npy' ) result_array = du.load_array(method_result_path) row_center_list = list(range(40, 80, 3)) col_center_list = list(range(30, 70, 3)) row_range = range(row_center_list[0] - 1, row_center_list[-1] + 1) col_range = range(col_center_list[0] - 1, col_center_list[-1] + 1) logger.info('row_range {}:{} col_range: {}:{}'.format( row_range[0], row_range[-1], col_range[0], col_range[-1])) result_array = result_array[:-1, :, :row_range[-1] - row_range[0] + 1, :col_range[-1] - col_range[0] + 1] logger.debug('result_array shape:{}'.format(result_array.shape)) return result_array
def generate_new_real_prediction_traffic_array(): # target_path = os.path.join(root_dir, 'offloading/npy/real_prediction_traffic_array.npy') source_path = os.path.join(root_dir, 'CNN_RNN/result/CNN_RNN/all_real_prediction_traffic_array_0718.npy') # source_path = os.path.join(root_dir, 'CNN_RNN/result/ARIMA/all_real_prediction_traffic_array.npy') data_array = du.load_array(source_path) row_center_list = list(range(40, 80, 3)) col_center_list = list(range(30, 70, 3)) row_range = (row_center_list[0] - 1, row_center_list[-1] + 1) col_range = (col_center_list[0] - 1, col_center_list[-1] + 1) data_array = data_array[:, :, row_range[0]: row_range[1], col_range[0]: col_range[1]] # for row_index in range(data_array.shape[2]): # for col_index in range(data_array.shape[3]): # grid_id = data_array[0, 0, row_index, col_index, 0] # if grid_id != 0: # print(grid_id) return data_array
def prepare_data(): data_array = du.load_array(input_file) print('saving array shape:{}'.format(data_array.shape)) # du.save_array(data_array, './npy/autoregrssion_raw_data') i_len = data_array.shape[0] j_len = data_array.shape[1] row = 39 col = 39 data_frame = {'date': [], 'internet': []} for i in range(i_len): for j in range(j_len): date_string = set_time_zone(data_array[i, j, row, col, 1]) date_string = date_time_covert_to_str(date_string) data_frame['internet'].append(data_array[i, j, row, col, -1]) data_frame['date'].append(date_string) data_frame = pd.DataFrame(data_frame) return data_frame
def _get_hour_CDR_internt_traffic(self, grid_list): source_path = os.path.join(self.config.base_dir, 'hour_traffic_array.npy') traffic_array = du.load_array(source_path) traffic_array = np.transpose(traffic_array, (2, 3, 0, 1, 4)) traffic_list = [] # print(grid_list) print(source_path) for search_grid_id in grid_list: # print('grid:{}'.format(grid)) # row, column = compute_row_col(grid) # print(row, column) for row_index in range(traffic_array.shape[0]): for col_index in range(traffic_array.shape[1]): grid_id = traffic_array[row_index, col_index, -149, 0, 0] if search_grid_id == grid_id: grid_traffic = traffic_array[row_index, col_index, :, :] traffic_list.append(grid_traffic) traffic_array = np.stack(traffic_list) # (grid_num, 1487, 1, 8) logger.debug('hour_CDR_internt_traffic shape:{}'.format(traffic_array.shape)) return traffic_array
def grouping_by_macro_load(df): without_loading_data_path = os.path.join(root_dir, 'offloading/result/without_offloading_without_RL', 'all_cell_result_array_0731.npy') cell_result = du.load_array(without_loading_data_path) macro_load_array = cell_result[:, -149:, 4] # (144, 1486) # according 149 macro_load_array_mean = np.mean(macro_load_array, axis=1) macro_load_array = cell_result[:, :, 4] # (144, 1486) macro_cell_num = cell_result[:, 0, 0] macro_load_pd = pd.DataFrame({ 'cell_num': [int(cell_num) for cell_num in macro_cell_num], 'base_macro_load': macro_load_array_mean}) macro_load_pd = macro_load_pd.set_index('cell_num') df = pd.concat((df, macro_load_pd), axis=1) bins = (0, 0.3, 0.7, 1, 2, 10) # bins = (0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 10) cats_macro_load_pd = pd.cut(df['base_macro_load'], bins) df_group = df.groupby(cats_macro_load_pd) logger.debug('macro load group count:{}'.format(df_group.count())) key_list = list(df_group.groups.keys()) key_list = sorted(key_list, key=lambda x: x) return key_list, df_group
def get_array(file_path): data_array = du.load_array(file_path) data_array = data_array[:, -149:, (0, 4)] return data_array
def evaluate_CNN_RNN_without_task(): def search_grid(data_array, grid_id): array = np.transpose(data_array, (2, 3, 0, 1, 4)) for row in range(array.shape[0]): for col in range(array.shape[1]): if grid_id == array[row, col, 0, 0, 0]: return row, col return 0, 0 def get_data(): method_result_path = os.path.join( root_dir, 'CNN_RNN/result/CNN_RNN_without_task/all_real_prediction_traffic_array.npy' ) result_array = du.load_array(method_result_path) row_center_list = list(range(40, 80, 3)) col_center_list = list(range(30, 70, 3)) row_range = range(row_center_list[0] - 1, row_center_list[-1] + 1) col_range = range(col_center_list[0] - 1, col_center_list[-1] + 1) logger.info('row_range {}:{} col_range: {}:{}'.format( row_range[0], row_range[-1], col_range[0], col_range[-1])) result_array = result_array[:-1, :, :row_range[-1] - row_range[0] + 1, :col_range[-1] - col_range[0] + 1] logger.debug('result_array shape:{}'.format(result_array.shape)) return result_array def evaluate_performance(real, prediction): # data_array_len = real.shape[0] # test_real = real[9 * data_array_len // 10:] # test_prediction = prediction[9 * data_array_len // 10:] MAPE_loss = utility.MAPE_loss(real, prediction) AE_loss = utility.AE_loss(real, prediction) RMSE_loss = utility.RMSE_loss(real, prediction) # MAPE_train = utility.MAPE_loss(train_array[:, :, :, :, 2, np.newaxis], train_array[:, :, :, :, 3, np.newaxis]) # print('test accu:{} test AE:{} test RMSE:{}'.format(1 - MAPE_test, AE_test, RMSE_test)) return 1 - MAPE_loss, AE_loss, RMSE_loss def calculate_min_avg_max(data_array): new_data_array = np.zeros( (data_array.shape[0], 1, 100, 100, 8) ) # hour, 1, row, col, (grid_id, timestmap, real_min, real_avg, real_max, preidiction_min, prediction_avg, prediction_max) data_array = np.transpose( data_array, (0, 2, 3, 1, 4)) # hour, row, col, 10min, feature for i in range(data_array.shape[0]): for row in range(data_array.shape[1]): for col in range(data_array.shape[2]): real_max_value = np.amax(data_array[i, row, col, :, 2]) prediction_max_value = np.amax(data_array[i, row, col, :, 3]) real_min_value = np.amin(data_array[i, row, col, :, 2]) prediction_min_value = np.amin(data_array[i, row, col, :, 3]) real_avg_value = np.mean(data_array[i, row, col, :, 2]) prediction_avg_value = np.mean(data_array[i, row, col, :, 3]) grid_id = data_array[i, row, col, 0, 0] timestamp = data_array[i, row, col, 0, 1] row_index, col_index = utility.compute_row_col(grid_id) new_data_array[i, 0, row_index, col_index, 0] = grid_id new_data_array[i, 0, row_index, col_index, 1] = timestamp new_data_array[i, 0, row_index, col_index, 2] = real_min_value new_data_array[i, 0, row_index, col_index, 3] = real_avg_value new_data_array[i, 0, row_index, col_index, 4] = real_max_value new_data_array[i, 0, row_index, col_index, 5] = prediction_min_value new_data_array[i, 0, row_index, col_index, 6] = prediction_avg_value new_data_array[i, 0, row_index, col_index, 7] = prediction_max_value # logger.info('grid_id:{} real:{} prediction:{}'.format(int(grid_id), real_max_value, prediction_max_value)) return new_data_array def plot_CNN_RNN_without_task(data_arrray, grid_id, interval=6): logger.debug('data_arrray :{}'.format(data_arrray.shape)) # plot_row = 10 # plot_col = 30 plot_row, plot_col = search_grid(data_arrray, grid_id) # result_array_len = result_array.shape[0] logger.info('plot_row:{} plot_col:{}'.format(plot_row, plot_col)) plot_real = data_arrray[:, :, plot_row, plot_col, 2].reshape(-1, 1) plot_prediction = data_arrray[:, :, plot_row, plot_col, 3].reshape(-1, 1) plt_info = data_arrray[:, :, plot_row, plot_col, :2].reshape(-1, 2) report_func.plot_predict_vs_real(plt_info, plot_real, plot_prediction, 'CNN-RNN(*) prediction on ', interval) def evaluate_one_grid(origin_array, real_preidction, grid_id=4867): logger.info('origin_array shape:{} real_preidction shape:{}'.format( origin_array.shape, real_preidction.shape)) plot_CNN_RNN_without_task(origin_array[-149:], grid_id, 24) row, col = search_grid(real_preidction, grid_id) accu_min, AE_min, RMSE_min = evaluate_performance( real_preidction[-149:, :, row:row + 1, col:col + 1, 2], real_preidction[-149:, :, row:row + 1, col:col + 1, 5]) accu_avg, AE_avg, RMSE_avg = evaluate_performance( real_preidction[-149:, :, row:row + 1, col:col + 1, 3], real_preidction[-149:, :, row:row + 1, col:col + 1, 6]) accu_max, AE_max, RMSE_max = evaluate_performance( real_preidction[-149:, :, row:row + 1, col:col + 1, 4], real_preidction[-149:, :, row:row + 1, col:col + 1, 7]) logger.info('grid id:{} MIN accu:{} AE:{} RMSE:{}'.format( grid_id, accu_min, AE_min, RMSE_min)) logger.info('grid id:{} AVG accu:{} AE:{} RMSE:{}'.format( grid_id, accu_avg, AE_avg, RMSE_avg)) logger.info('grid id:{} MAX accu:{} AE:{} RMSE:{}'.format( grid_id, accu_max, AE_max, RMSE_max)) plot_CNN_RNN_without_task( real_preidction[-149:, :, :, :, (0, 1, 4, 7)], grid_id, 2) reload = None result_array = get_data() accu, AE, RMSE = evaluate_performance(result_array[-149:, :, :, :, 2], result_array[-149:, :, :, :, 3]) logger.info('total data: test accu:{} test AE:{} test RMSE:{}'.format( accu, AE, RMSE)) if reload: real_preidction = calculate_min_avg_max(result_array) du.save_array( real_preidction, os.path.join( root_dir, 'CNN_RNN/result/CNN_RNN_without_task/all_real_prediction_traffic_array_split_min_avg_max.npy' )) else: real_preidction = du.load_array( os.path.join( root_dir, 'CNN_RNN/result/CNN_RNN_without_task/all_real_prediction_traffic_array_split_min_avg_max.npy' )) print() accu_min, AE_min, RMSE_min = evaluate_performance( real_preidction[-149:, :, :, :, 2], real_preidction[-149:, :, :, :, 5]) accu_avg, AE_avg, RMSE_avg = evaluate_performance( real_preidction[-149:, :, :, :, 3], real_preidction[-149:, :, :, :, 6]) accu_max, AE_max, RMSE_max = evaluate_performance( real_preidction[-149:, :, :, :, 4], real_preidction[-149:, :, :, :, 7]) logger.info('MIN accu:{} AE:{} RMSE:{}'.format(accu_min, AE_min, RMSE_min)) logger.info('AVG accu:{} AE:{} RMSE:{}'.format(accu_avg, AE_avg, RMSE_avg)) logger.info('MAX accu:{} AE:{} RMSE:{}'.format(accu_max, AE_max, RMSE_max)) evaluate_one_grid(result_array, real_preidction, 4867) plt.show()
def filter_cell_index(): def get_cell_tower_grid_pair(): cell_tower_with_grid = os.path.join( root_dir, 'cell_tower/cell_tower_with_grid.txt') with open(cell_tower_with_grid, 'r') as f: cell_grid = json.load(f) return cell_grid def evaluate_performance(Y_real_prediction_array, threshold): Y_real_prediction_array = np.transpose(Y_real_prediction_array, (2, 3, 0, 1, 4)) grid_id_list = [] for row_index in range(Y_real_prediction_array.shape[0]): for col_index in range(Y_real_prediction_array.shape[1]): info = Y_real_prediction_array[row_index, col_index, :, 0, :2] real = Y_real_prediction_array[row_index, col_index, :, 0, 2:5] prediction = Y_real_prediction_array[row_index, col_index, :, 0, 5:] # task_min_MAPE = utility.MAPE_loss(real[:, 0], prediction[:, 0]) # task_avg_MAPE = utility.MAPE_loss(real[:, 1], prediction[:, 1]) # task_max_MAPE = utility.MAPE_loss(real[:, 2], prediction[:, 2]) MAPE = utility.MAPE_loss(real, prediction) Accu = 1 - MAPE if MAPE else 0 if Accu > threshold and Accu: grid_id = info[0, 0] # print('grid id:{} accu:{}'.format(grid_id, Accu)) grid_id_list.append(int(grid_id)) return grid_id_list def filter_by_range(Y_real_prediction_array): Y_real_prediction_array = np.transpose(Y_real_prediction_array, (2, 3, 0, 1, 4)) grid_id_list = [] row_range = list(range(20, 50)) col_range = list(range(30, 50)) for row_index in range(Y_real_prediction_array.shape[0]): for col_index in range(Y_real_prediction_array.shape[1]): info = Y_real_prediction_array[row_index, col_index, :, 0, :2] if row_index in row_range and col_index in col_range: grid_id = info[0, 0] # print('grid id:{} accu:{}'.format(grid_id, Accu)) grid_id_list.append(int(grid_id)) return grid_id_list all_real_prediction_traffic_array_path = os.path.join( root_dir, 'offloading/npy/real_prediction/hour_traffic_array_0730.npy') CNN_RNN_MTL_array = du.load_array(all_real_prediction_traffic_array_path) # evaluate_threshold = 0.75 # grid_id_list = evaluate_performance(CNN_RNN_MTL_array, evaluate_threshold) grid_id_list = filter_by_range(CNN_RNN_MTL_array) cell_grids = get_cell_tower_grid_pair() cell_index_list = [] for cell_grid in cell_grids: cell_index = cell_grid['index'] grids = cell_grid['grid'] # print(cell_grid) if set(grids).issubset(set(grid_id_list)) and len(grids) > 0: cell_index_list.append(cell_index) print('cell_index_list length:', len(cell_index_list)) cell_index_list = sorted(cell_index_list) return cell_index_list