def main(): env = gym.make("LunarLander-v2") agent = Agent(env) agent.load_state_dict(torch.load("./models/agent.pt")) agent.eval() obs = env.reset() done = False for i in range(10000): env.render() obs = torch.from_numpy(obs).float() action, _, _ = agent.get_action(obs) obs, rew, done, info = env.step(action.cpu().numpy()) sleep(0.001) if done: obs = env.reset()
if args.anneal_lr: frac = 1.0 - (update - 1.0) / num_updates lrnow = lr(frac) optimizer.param_groups[0]['lr'] = lrnow # TRY NOT TO MODIFY: prepare the execution of the game. for step in range(0, args.num_steps): envs.render() global_step += 1 * args.num_envs obs[step] = next_obs dones[step] = next_done # ALGO LOGIC: put action logic here with torch.no_grad(): values[step] = agent.get_value(obs[step]).flatten() action, logproba, _, invalid_action_masks[step] = agent.get_action( obs[step], envs=envs) actions[step] = action.T logprobs[step] = logproba # TRY NOT TO MODIFY: execute the game and log data. next_obs, rs, ds, infos = envs.step(action.T) rewards[step], next_done = rs.view(-1), torch.Tensor(ds).to(device) wins[step, :] = 0 for idx, info in enumerate(infos): if 'episode' in info.keys(): print( f"global_step={global_step}, episode_reward={info['episode']['r']}" ) writer.add_scalar("charts/episode_reward", info['episode']['r'], global_step)
# Annealing the rate if instructed to do so. if args.anneal_lr: frac = 1.0 - (update - 1.0) / num_updates lrnow = lr(frac) optimizer.param_groups[0]['lr'] = lrnow # TRY NOT TO MODIFY: prepare the execution of the game. for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs dones[step] = next_done # ALGO LOGIC: put action logic here with torch.no_grad(): values[step] = agent.get_value(obs[step]).flatten() action, logproba, _ = agent.get_action(obs[step]) actions[step] = action logprobs[step] = logproba # TRY NOT TO MODIFY: execute the game and log data. next_obs, rs, ds, infos = envs.step(action) #envs.render() rewards[step], next_done = rs.view(-1), torch.Tensor(ds).to(device) for info in infos: if 'episode' in info.keys(): print( f"global_step={global_step}, episode_reward={info['episode']['r']}" ) writer.add_scalar("charts/episode_reward",
agent.load_state_dict(torch.load(args.agent_model_path)) agent.eval() for update in range(1, num_updates + 1): # TRY NOT TO MODIFY: prepare the execution of the game. for step in range(0, args.num_steps): envs.render() global_step += 1 * args.num_envs obs[step] = next_obs dones[step] = next_done # ALGO LOGIC: put action logic here with torch.no_grad(): values[step] = agent.get_value(obs[step]).flatten() # raise action, logproba, _, invalid_action_masks[step] = agent.get_action( obs[step], envs=envs) # print(action.T.cpu()[0].numpy(), invalid_action_masks[step].cpu()[0].numpy().sum()) actions[step] = action.T logprobs[step] = logproba # TRY NOT TO MODIFY: execute the game and log data. try: next_obs, rs, ds, infos = envs.step(action.T) except Exception as e: e.printStackTrace() raise rewards[step], next_done = rs.view(-1), torch.Tensor(ds).to(device) for idx, info in enumerate(infos): if 'episode' in info.keys():