if Delta > 1:
                max_k = max(k_idx_his[-Delta:-1]) + 1
            else:
                max_k = k_idx_his[-1] + 1
            K = min(max_k + 1, N)

        i_idx = i
        h = channel[i_idx, :]

        # the action selection must be either 'OP' or 'KNN'
        m_list = mem.decode(h, K, decoder_mode)

        r_list = []
        for m in m_list:
            # only acitve users are used to compute the rate
            r_list.append(bisection(h / 1000000, m, weight)[0])

        # memorize the largest reward
        rate_his.append(np.max(r_list))
        rate_his_ratio.append(rate_his[-1] / rate[i_idx][0])
        # record the index of largest reward
        k_idx_his.append(np.argmax(r_list))
        # record K in case of adaptive K
        K_his.append(K)
        # save the mode with largest reward
        mode_his.append(m_list[np.argmax(r_list)])
        #        if i <0.6*n:
        # encode the mode with largest reward
        mem.encode(h, m_list[np.argmax(r_list)])

    total_time = time.time() - start_time
Beispiel #2
0
        else:
            # test      从测试集中取出数据
            i_idx = i - n + num_test + split_idx

        h = channel[i_idx, :]  # 取出信道增益

        # the action selection must be either 'OP' or 'KNN'
        # 输入 (信道增益 K OP)
        # 输出 K个长度为N的数组,并且数组的元素是0或者1
        m_list = mem.decode(h, K, decoder_mode)

        ##################################################
        # 主要是这一块的内容没有弄得很清楚
        r_list = []
        for m in m_list:
            r_list.append(bisection(h / 1000000, m)[0])
        ##################################################

        # encode the mode with largest reward
        # 选出其中具有最大加权计算速率的动作
        mem.encode(h, m_list[np.argmax(r_list)])
        # the main code for DROO training ends here

        # the following codes store some interested metrics for illustrations
        # memorize the largest reward
        rate_his.append(np.max(r_list))
        rate_his_ratio.append(rate_his[-1] / rate[i_idx][0])
        # record the index of largest reward
        k_idx_his.append(np.argmax(r_list))
        # record K in case of adaptive K
        K_his.append(K)
Beispiel #3
0
            if Delta > 1:
                max_k = max(k_idx_his[-Delta:-1]) + 1
            else:
                max_k = k_idx_his[-1] + 1
            K = min(max_k + 1, N)

        i_idx = i
        h = channel[i_idx, :]

        # the action selection must be either 'OP' or 'KNN'
        m_list = mem.decode(h, K, decoder_mode)

        r_list = []
        for m in m_list:
            # only acitve users are used to compute the rate
            r_list.append(bisection(h[0:N_active] / 1000000, m[0:N_active])[0])

        # memorize the largest reward
        rate_his.append(np.max(r_list))
        rate_his_ratio.append(rate_his[-1] / rate[i_idx][0])
        # record the index of largest reward
        k_idx_his.append(np.argmax(r_list))
        # record K in case of adaptive K
        K_his.append(K)
        # save the mode with largest reward
        mode_his.append(m_list[np.argmax(r_list)])
        #        if i <0.6*n:
        # encode the mode with largest reward
        mem.encode(h, m_list[np.argmax(r_list)])

    total_time = time.time() - start_time
Beispiel #4
0
                max_k = k_idx_his[-1] + 1
            K = min(max_k + 1, N)

        # training
        i_idx = i % split_idx  # split_index = 24000

        h = channel[i_idx, :]

        # the action selection must be either 'OP' or 'KNN'
        m_list = mem.decode(h, K, decoder_mode)

        r_list = []
        for m in m_list:
            # Compute the reward
            r_list.append(
                bisection(h / 1e6, m)[0]
            )  # Because we've multiplied channel gain by 1e6 to for better
            # training result, so h need to divided by 1e6

        # encode the mode with largest reward
        mem.encode(h, m_list[np.argmax(r_list)])
        # the main code for DROO training ends here
        if (i + 1) % test_interval == 0:
            test_rate_ratio = []
            m_best_arr = np.empty((0, N))
            test_idx = np.random.choice(range(split_idx, len(channel)),
                                        size=batch_size)
            for tidx in test_idx:
                h_test = channel[tidx, :]
                m_list_test = mem.decode(h_test, K, decoder_mode)
                test_rlist = []