energy = [] EE_rate_total = np.zeros(50) EE_rate_mean = [] counter = 0 mean_min = 10 min_index = 0 for i_episode in range(1000): print('iteration is %d' % i_episode) # 获取回合 i_episode 第一个 observation observation = env.reset() observation_naive = env_naive.reset(bs_list=env.bs_list) ep_r = 0 while count_time <= 1000000000: count_time += 1 action = RL.choose_action(observation) action_naive = env_naive.choose_action() observation_, energy_cost, energy_cost_max, temp = env.step(action, beta=-10, lambda_=0.5) # 获取下一个 state # print(observation_) observation_naive_, energy_cost_naive, energy_cost_max_naive = env_naive.step(action_naive, temp, beta=-10) index_ob = np.zeros(number) for i in range(number): index_ob[i] = i * 4 + 1 r1 = [] for i in index_ob: r1.append(observation_[int(i)]) r1 = pd.DataFrame(r1) # print(r1) r1 = (r1 - 500) / 500.0 r1[r1.iloc[:, 0] != -1.0] = 0
from VCM_environment import VCMEN from RL_brain import DuelingDQN MEMORY_SIZE = 1000 ACTION_SPACE = 8 if __name__ == "__main__": env = VCMEN() load_model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") agent = DuelingDQN(n_actions=ACTION_SPACE, n_features=144, memory_size=MEMORY_SIZE, environment_name=env.name, dueling=True, load_model_dir=load_model_dir) state_t, reward_t, win = env.observe() step = 0 while not win: step += 1 print(state_t) # choose observation = state_t.flatten() action_t = agent.choose_action(observation) # act env.execute_action(action_t) state_t_1, reward_t, win = env.observe() state_t = state_t_1 print(step)