Example #1
0
energy = []
EE_rate_total = np.zeros(50)
EE_rate_mean = []
counter = 0
mean_min = 10
min_index = 0
for i_episode in range(1000):
    print('iteration is %d' % i_episode)
    # 获取回合 i_episode 第一个 observation
    observation = env.reset()
    observation_naive = env_naive.reset(bs_list=env.bs_list)
    ep_r = 0

    while count_time <= 1000000000:
        count_time += 1
        action = RL.choose_action(observation)
        action_naive = env_naive.choose_action()
        observation_, energy_cost, energy_cost_max, temp = env.step(action, beta=-10, lambda_=0.5)  # 获取下一个 state
        # print(observation_)
        observation_naive_, energy_cost_naive, energy_cost_max_naive = env_naive.step(action_naive, temp, beta=-10)
        index_ob = np.zeros(number)
        for i in range(number):
            index_ob[i] = i * 4 + 1
        r1 = []
        for i in index_ob:
            r1.append(observation_[int(i)])

        r1 = pd.DataFrame(r1)
        # print(r1)
        r1 = (r1 - 500) / 500.0
        r1[r1.iloc[:, 0] != -1.0] = 0
Example #2
0
from VCM_environment import VCMEN
from RL_brain import DuelingDQN

MEMORY_SIZE = 1000
ACTION_SPACE = 8

if __name__ == "__main__":
    env = VCMEN()
    load_model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                  "models")
    agent = DuelingDQN(n_actions=ACTION_SPACE,
                       n_features=144,
                       memory_size=MEMORY_SIZE,
                       environment_name=env.name,
                       dueling=True,
                       load_model_dir=load_model_dir)
    state_t, reward_t, win = env.observe()

    step = 0
    while not win:
        step += 1
        print(state_t)
        # choose
        observation = state_t.flatten()
        action_t = agent.choose_action(observation)
        # act
        env.execute_action(action_t)
        state_t_1, reward_t, win = env.observe()
        state_t = state_t_1

    print(step)