import gym import torch from env_wrappers import ActionNormalizedEnv from models import DDPG_Actor model_name = 'ddpg_01' env_id = "Pendulum-v0" identity = model_name + '_' + env_id env = ActionNormalizedEnv(gym.make(env_id)) obs_size = env.observation_space.shape[0] act_size = env.action_space.shape[0] act_net = DDPG_Actor(obs_size, act_size) act_net.load_state_dict(torch.load(identity + '_act.pth')) def test_policy(actor, env, vis=False, n_episodes=2, max_len=500): returns = [] for i_episode in range(n_episodes): state = env.reset() if vis: env.render() episode_return = 0 for t in range(max_len): action = actor.get_action([state]) state, reward, done, _ = env.step(action) episode_return += reward if vis: env.render() if done: returns.append(episode_return) break
import numpy as np import torch.optim as optim from env_wrappers import ActionNormalizedEnv from models import * from ou import OUNoise from replay_buffer import ReplayBuffer from utils import * GAMMA = 0.99 SOFT_TAU = 1e-2 model_name = 'ddpg_01' env_id = "Pendulum-v0" identity = model_name + '_' + env_id env = ActionNormalizedEnv(gym.make(env_id)) ou_noise = OUNoise(env.action_space) obs_size = env.observation_space.shape[0] act_size = env.action_space.shape[0] act_net = DDPG_Actor(obs_size, act_size) cri_net = DDPG_Critic(obs_size, act_size) act_net_t = DDPG_Actor(obs_size, act_size) cri_net_t = DDPG_Critic(obs_size, act_size) hard_update(act_net_t, act_net) hard_update(cri_net_t, cri_net) mse_criterion = nn.MSELoss()
def _thunk(): env = gym.make(env_id) return ActionNormalizedEnv(env)