if Delta > 1: max_k = max(k_idx_his[-Delta:-1]) + 1 else: max_k = k_idx_his[-1] + 1 K = min(max_k + 1, N) i_idx = i h = channel[i_idx, :] # the action selection must be either 'OP' or 'KNN' m_list = mem.decode(h, K, decoder_mode) r_list = [] for m in m_list: # only acitve users are used to compute the rate r_list.append(bisection(h / 1000000, m, weight)[0]) # memorize the largest reward rate_his.append(np.max(r_list)) rate_his_ratio.append(rate_his[-1] / rate[i_idx][0]) # record the index of largest reward k_idx_his.append(np.argmax(r_list)) # record K in case of adaptive K K_his.append(K) # save the mode with largest reward mode_his.append(m_list[np.argmax(r_list)]) # if i <0.6*n: # encode the mode with largest reward mem.encode(h, m_list[np.argmax(r_list)]) total_time = time.time() - start_time
else: # test 从测试集中取出数据 i_idx = i - n + num_test + split_idx h = channel[i_idx, :] # 取出信道增益 # the action selection must be either 'OP' or 'KNN' # 输入 (信道增益 K OP) # 输出 K个长度为N的数组,并且数组的元素是0或者1 m_list = mem.decode(h, K, decoder_mode) ################################################## # 主要是这一块的内容没有弄得很清楚 r_list = [] for m in m_list: r_list.append(bisection(h / 1000000, m)[0]) ################################################## # encode the mode with largest reward # 选出其中具有最大加权计算速率的动作 mem.encode(h, m_list[np.argmax(r_list)]) # the main code for DROO training ends here # the following codes store some interested metrics for illustrations # memorize the largest reward rate_his.append(np.max(r_list)) rate_his_ratio.append(rate_his[-1] / rate[i_idx][0]) # record the index of largest reward k_idx_his.append(np.argmax(r_list)) # record K in case of adaptive K K_his.append(K)
if Delta > 1: max_k = max(k_idx_his[-Delta:-1]) + 1 else: max_k = k_idx_his[-1] + 1 K = min(max_k + 1, N) i_idx = i h = channel[i_idx, :] # the action selection must be either 'OP' or 'KNN' m_list = mem.decode(h, K, decoder_mode) r_list = [] for m in m_list: # only acitve users are used to compute the rate r_list.append(bisection(h[0:N_active] / 1000000, m[0:N_active])[0]) # memorize the largest reward rate_his.append(np.max(r_list)) rate_his_ratio.append(rate_his[-1] / rate[i_idx][0]) # record the index of largest reward k_idx_his.append(np.argmax(r_list)) # record K in case of adaptive K K_his.append(K) # save the mode with largest reward mode_his.append(m_list[np.argmax(r_list)]) # if i <0.6*n: # encode the mode with largest reward mem.encode(h, m_list[np.argmax(r_list)]) total_time = time.time() - start_time
max_k = k_idx_his[-1] + 1 K = min(max_k + 1, N) # training i_idx = i % split_idx # split_index = 24000 h = channel[i_idx, :] # the action selection must be either 'OP' or 'KNN' m_list = mem.decode(h, K, decoder_mode) r_list = [] for m in m_list: # Compute the reward r_list.append( bisection(h / 1e6, m)[0] ) # Because we've multiplied channel gain by 1e6 to for better # training result, so h need to divided by 1e6 # encode the mode with largest reward mem.encode(h, m_list[np.argmax(r_list)]) # the main code for DROO training ends here if (i + 1) % test_interval == 0: test_rate_ratio = [] m_best_arr = np.empty((0, N)) test_idx = np.random.choice(range(split_idx, len(channel)), size=batch_size) for tidx in test_idx: h_test = channel[tidx, :] m_list_test = mem.decode(h_test, K, decoder_mode) test_rlist = []