Esempio n. 1
0
def eval(cfg, saved_model_path=SAVED_MODEL_PATH):
    print('start to eval ! \n')
    env = NormalizedActions(gym.make("Pendulum-v0"))
    n_states = env.observation_space.shape[0]
    n_actions = env.action_space.shape[0]
    agent = DDPG(n_states,
                 n_actions,
                 critic_lr=1e-3,
                 actor_lr=1e-4,
                 gamma=0.99,
                 soft_tau=1e-2,
                 memory_capacity=100000,
                 batch_size=128)
    agent.load_model(saved_model_path + 'checkpoint.pth')
    rewards = []
    moving_average_rewards = []
    ep_steps = []
    log_dir = os.path.split(
        os.path.abspath(__file__))[0] + "/logs/eval/" + SEQUENCE
    writer = SummaryWriter(log_dir)
    for i_episode in range(1, cfg.eval_eps + 1):
        state = env.reset()  # reset环境状态
        ep_reward = 0
        for i_step in range(1, cfg.eval_steps + 1):
            action = agent.select_action(state)  # 根据当前环境state选择action
            next_state, reward, done, _ = env.step(action)  # 更新环境参数
            ep_reward += reward
            state = next_state  # 跳转到下一个状态
            if done:
                break
        print('Episode:', i_episode, ' Reward: %i' % int(ep_reward),
              'n_steps:', i_step, 'done: ', done)
        ep_steps.append(i_step)
        rewards.append(ep_reward)
        # 计算滑动窗口的reward
        if i_episode == 1:
            moving_average_rewards.append(ep_reward)
        else:
            moving_average_rewards.append(0.9 * moving_average_rewards[-1] +
                                          0.1 * ep_reward)
        writer.add_scalars('rewards', {
            'raw': rewards[-1],
            'moving_average': moving_average_rewards[-1]
        }, i_episode)
        writer.add_scalar('steps_of_each_episode', ep_steps[-1], i_episode)
    writer.close()
    '''存储reward等相关结果'''
    if not os.path.exists(RESULT_PATH):  # 检测是否存在文件夹
        os.mkdir(RESULT_PATH)
    np.save(RESULT_PATH + 'rewards_eval.npy', rewards)
    np.save(RESULT_PATH + 'moving_average_rewards_eval.npy',
            moving_average_rewards)
    np.save(RESULT_PATH + 'steps_eval.npy', ep_steps)
Esempio n. 2
0
import gym
from agent import DDPG

env = gym.make('Pendulum-v0')

agent = DDPG(env)
agent.load_model()

state = env.reset()

cumulative_reward = 0
for i in range(200):
    action = agent.get_action(state)
    env.render()
    state, reward, _, _ = env.step(action * 2)
    cumulative_reward += reward
print('Cumulative Reward: {}'.format(cumulative_reward))