Esempio n. 1
0
import gym
import torch

from env_wrappers import ActionNormalizedEnv
from models import DDPG_Actor

model_name = 'ddpg_01'
env_id = "Pendulum-v0"
identity = model_name + '_' + env_id
env = ActionNormalizedEnv(gym.make(env_id))

obs_size = env.observation_space.shape[0]
act_size = env.action_space.shape[0]
act_net = DDPG_Actor(obs_size, act_size)
act_net.load_state_dict(torch.load(identity + '_act.pth'))


def test_policy(actor, env, vis=False, n_episodes=2, max_len=500):
    returns = []
    for i_episode in range(n_episodes):
        state = env.reset()
        if vis: env.render()
        episode_return = 0
        for t in range(max_len):
            action = actor.get_action([state])
            state, reward, done, _ = env.step(action)
            episode_return += reward
            if vis: env.render()
            if done:
                returns.append(episode_return)
                break
Esempio n. 2
0
import numpy as np
import torch.optim as optim

from env_wrappers import ActionNormalizedEnv
from models import *
from ou import OUNoise
from replay_buffer import ReplayBuffer
from utils import *

GAMMA = 0.99
SOFT_TAU = 1e-2

model_name = 'ddpg_01'
env_id = "Pendulum-v0"
identity = model_name + '_' + env_id
env = ActionNormalizedEnv(gym.make(env_id))

ou_noise = OUNoise(env.action_space)

obs_size = env.observation_space.shape[0]
act_size = env.action_space.shape[0]

act_net = DDPG_Actor(obs_size, act_size)
cri_net = DDPG_Critic(obs_size, act_size)
act_net_t = DDPG_Actor(obs_size, act_size)
cri_net_t = DDPG_Critic(obs_size, act_size)

hard_update(act_net_t, act_net)
hard_update(cri_net_t, cri_net)

mse_criterion = nn.MSELoss()
Esempio n. 3
0
 def _thunk():
     env = gym.make(env_id)
     return ActionNormalizedEnv(env)