def main(): device = torch.device("cpu") env = UnityEnvironment(file_name='reacher20/reacher', base_port=64739) # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] action_size = brain.vector_action_space_size num_agents = len(env_info.agents) states = env_info.vector_observations state_size = states.shape[1] agent = PPOAgent(state_size=state_size, action_size=action_size, hidden_size=256, num_agents=num_agents, random_seed=0, ppo_epochs=4, mini_batch_size=128, normalize_advantages=True, learning_rate=3e-4, clip_gradients=True, gamma=0.99, tau=0.95, device=device) agent.load_model('assets/ppo_checkpoint_37.10.pth') test_agent(env, brain_name, agent, device, real_time=True)
def evaluate(args): env = gym.make(args.env) env_params = get_env_params(env, args) env.close() agent = PPOAgent(args, env_params) agent.load_model(load_model_remark=args.load_model_remark) parent_conn, child_conn = Pipe() worker = AtariEnvironment(args.env, 1, child_conn, is_render=True, max_episode_step=args.max_episode_step) worker.start() for i_episode in range(100): obs = worker.reset() while True: obs = np.expand_dims(obs, axis=0) action = agent.choose_action(obs / 255) parent_conn.send(action[0]) obs_, r, done, info = parent_conn.recv() obs = obs_ if done: break
def main(args): model_store_sprefix = "snapshot" # NormalizedEnv env = gym.make(args.env) env.seed(args.seed) torch.manual_seed(args.seed) env, generator, model, cont = get_functions(env, args) optimizer = optim.Adam(model.parameters(), lr=args.rllr) memory = Memory(args) agent = PPOAgent(args, model, optimizer, env, generator, memory, cont) if args.resume: agent.load_model(model_store_sprefix) agent.train(model_store_sprefix, args.save_interval)
env = gym.make(args.env) envs = SubprocVecEnv([make_env(args.env) for i in range(args.n_envs)]) n_inputs = envs.observation_space.shape[0] n_outs = envs.action_space.n agent = PPOAgent(lr=args.lr, n_inputs=n_inputs, n_hidden=args.n_hidden, n_outs=n_outs, td_n=args.td_n, ppo_epochs=args.ppo_epochs, mini_batch_size=args.mini_batch_size) if args.load_best_pretrained_model: agent.load_model('../models/ppo/model.pt') print('Loaded pretrained model') if args.test_env: state = env.reset() done = False score = 0 while not done: env.render() dist, value = agent.step(state) action = dist.sample() state, reward, done, _ = env.step(action.cpu().numpy()) score += reward print(score) else: