Beispiel #1
0
def Benchmark_test_time(max_episode, Ns, fd, max_dis, maxM):
    env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p,
                       p_n, power_num)
    bench = Benchmark_alg(env)
    env.set_Ns(Ns)
    time_cost_1 = 0
    time_cost_2 = 0
    for k in range(1, max_episode + 1):
        s = env.reset__()
        for i in range(int(Ns) - 1):
            tc, p = bench.time_cost(s)
            time_cost_1 = time_cost_1 + tc[0]
            time_cost_2 = time_cost_2 + tc[1]
            s_next, _ = env.step__(p)
            s = s_next

    return [time_cost_1, time_cost_2]
Beispiel #2
0
def Benchmark_test(max_episode, Ns, fd, max_dis, maxM):
    env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p,
                       p_n, power_num)
    bench = Benchmark_alg(env)
    env.set_Ns(Ns)
    reward_hist = list()
    for k in range(1, max_episode + 1):
        reward_list = list()
        s = env.reset__()
        for i in range(int(Ns) - 1):
            p = bench.calculate(s)
            s_next, r = env.step__(p)
            s = s_next
            reward_list.append(r)
        reward_hist.append(reward_list)

    reward_hist = np.reshape(reward_hist, [max_episode, Ns - 1, 4])
    reward_hist = np.nanmean(np.nanmean(reward_hist, 0), 0)
    return reward_hist
Beispiel #3
0
def Test_policy_time(weight_file, max_episode, Ns, fd, max_dis, maxM):
    env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p,
                       p_n, power_num)
    tf.reset_default_graph()
    with tf.Session() as sess:
        env.set_Ns(Ns)
        dnn = DNN(env, weight_file)
        policy = Policy(sess, dnn)

        tf.global_variables_initializer().run()
        policy.load_params()
        time_cost = 0
        for k in range(1, max_episode + 1):
            s_actor, _ = env.reset()
            for i in range(int(Ns) - 1):
                st = time.time()
                p = policy.predict_p(s_actor)
                time_cost = time_cost + time.time() - st
                s_actor, _, _, _ = env.step(p)
    return time_cost
Beispiel #4
0
def Test_policy_all(weight_file, max_episode, Ns, fd, max_dis, maxM):
    env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p,
                       p_n, power_num)
    tf.reset_default_graph()
    with tf.Session() as sess:
        env.set_Ns(Ns)
        dnn = DNN(env, weight_file)
        policy = Policy(sess, dnn)

        tf.global_variables_initializer().run()
        policy.load_params()
        reward_hist = list()
        for k in range(1, max_episode + 1):
            reward_policy_list = list()
            s_actor, _ = env.reset()
            for i in range(int(Ns) - 1):
                p = policy.predict_p(s_actor)
                s_actor, _, _, r = env.step(p)
                reward_policy_list.append(r)
            reward_hist.append(np.mean(reward_policy_list))  # bps/Hz per link
    return np.mean(reward_hist)
Beispiel #5
0
def Test_ddpg_all(weight_file, max_episode, Ns, fd, max_dis, maxM):
    env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p,
                       p_n, power_num)
    tf.reset_default_graph()
    with tf.Session() as sess:
        env.set_Ns(Ns)
        dnn = DDPG(env, weight_file)
        actor = Actor(sess, dnn)

        tf.global_variables_initializer().run()
        actor.load_params()
        reward_hist = list()
        for k in range(1, max_episode + 1):
            reward_list = list()
            s_actor, _ = env.reset()
            for i in range(int(Ns) - 1):
                p = actor.predict_p(s_actor)
                s_actor_next, _, _, r = env.step(p[:, 0])
                s_actor = s_actor_next
                reward_list.append(r)
            reward_hist.append(np.mean(reward_list))
    return np.mean(reward_hist)
Beispiel #6
0
def Train_policy_quan(weight_file, power_num, fd, max_dis, maxM):
    env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p,
                       p_n, power_num)
    tf.reset_default_graph()
    with tf.Session() as sess:
        Train(sess, env, weight_file)
Beispiel #7
0
    tf.reset_default_graph()
    with tf.Session() as sess:
        env.set_Ns(Ns)
        dnn = DNN(env, weight_file)
        policy = Policy(sess, dnn)

        tf.global_variables_initializer().run()
        policy.load_params()
        time_cost = 0
        for k in range(1, max_episode + 1):
            s_actor, _ = env.reset()
            for i in range(int(Ns) - 1):
                st = time.time()
                p = policy.predict_p(s_actor)
                time_cost = time_cost + time.time() - st
                s_actor, _, _, _ = env.step(p)
    return time_cost


if __name__ == "__main__":
    env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p,
                       p_n, power_num)
    weight_file = './saved_networks/reinforce_2.mat'
    tf.reset_default_graph()
    with tf.Session() as sess:
        train_hist = Train(sess, env, weight_file)

    tf.reset_default_graph()
    with tf.Session() as sess:
        test_hist = Test(sess, env, weight_file)