def Benchmark_test_time(max_episode, Ns, fd, max_dis, maxM): env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) bench = Benchmark_alg(env) env.set_Ns(Ns) time_cost_1 = 0 time_cost_2 = 0 for k in range(1, max_episode + 1): s = env.reset__() for i in range(int(Ns) - 1): tc, p = bench.time_cost(s) time_cost_1 = time_cost_1 + tc[0] time_cost_2 = time_cost_2 + tc[1] s_next, _ = env.step__(p) s = s_next return [time_cost_1, time_cost_2]
def Benchmark_test(max_episode, Ns, fd, max_dis, maxM): env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) bench = Benchmark_alg(env) env.set_Ns(Ns) reward_hist = list() for k in range(1, max_episode + 1): reward_list = list() s = env.reset__() for i in range(int(Ns) - 1): p = bench.calculate(s) s_next, r = env.step__(p) s = s_next reward_list.append(r) reward_hist.append(reward_list) reward_hist = np.reshape(reward_hist, [max_episode, Ns - 1, 4]) reward_hist = np.nanmean(np.nanmean(reward_hist, 0), 0) return reward_hist
def Test_policy_time(weight_file, max_episode, Ns, fd, max_dis, maxM): env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) tf.reset_default_graph() with tf.Session() as sess: env.set_Ns(Ns) dnn = DNN(env, weight_file) policy = Policy(sess, dnn) tf.global_variables_initializer().run() policy.load_params() time_cost = 0 for k in range(1, max_episode + 1): s_actor, _ = env.reset() for i in range(int(Ns) - 1): st = time.time() p = policy.predict_p(s_actor) time_cost = time_cost + time.time() - st s_actor, _, _, _ = env.step(p) return time_cost
def Test_policy_all(weight_file, max_episode, Ns, fd, max_dis, maxM): env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) tf.reset_default_graph() with tf.Session() as sess: env.set_Ns(Ns) dnn = DNN(env, weight_file) policy = Policy(sess, dnn) tf.global_variables_initializer().run() policy.load_params() reward_hist = list() for k in range(1, max_episode + 1): reward_policy_list = list() s_actor, _ = env.reset() for i in range(int(Ns) - 1): p = policy.predict_p(s_actor) s_actor, _, _, r = env.step(p) reward_policy_list.append(r) reward_hist.append(np.mean(reward_policy_list)) # bps/Hz per link return np.mean(reward_hist)
def Test_ddpg_all(weight_file, max_episode, Ns, fd, max_dis, maxM): env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) tf.reset_default_graph() with tf.Session() as sess: env.set_Ns(Ns) dnn = DDPG(env, weight_file) actor = Actor(sess, dnn) tf.global_variables_initializer().run() actor.load_params() reward_hist = list() for k in range(1, max_episode + 1): reward_list = list() s_actor, _ = env.reset() for i in range(int(Ns) - 1): p = actor.predict_p(s_actor) s_actor_next, _, _, r = env.step(p[:, 0]) s_actor = s_actor_next reward_list.append(r) reward_hist.append(np.mean(reward_list)) return np.mean(reward_hist)
def Train_policy_quan(weight_file, power_num, fd, max_dis, maxM): env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) tf.reset_default_graph() with tf.Session() as sess: Train(sess, env, weight_file)
tf.reset_default_graph() with tf.Session() as sess: env.set_Ns(Ns) dnn = DNN(env, weight_file) policy = Policy(sess, dnn) tf.global_variables_initializer().run() policy.load_params() time_cost = 0 for k in range(1, max_episode + 1): s_actor, _ = env.reset() for i in range(int(Ns) - 1): st = time.time() p = policy.predict_p(s_actor) time_cost = time_cost + time.time() - st s_actor, _, _, _ = env.step(p) return time_cost if __name__ == "__main__": env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) weight_file = './saved_networks/reinforce_2.mat' tf.reset_default_graph() with tf.Session() as sess: train_hist = Train(sess, env, weight_file) tf.reset_default_graph() with tf.Session() as sess: test_hist = Test(sess, env, weight_file)