def main():

    env = gym.make("LunarLander-v2")
    agent = Agent(env)
    agent.load_state_dict(torch.load("./models/agent.pt"))
    agent.eval()

    obs = env.reset()
    done = False
    for i in range(10000):
        env.render()
        obs = torch.from_numpy(obs).float()
        action, _, _ = agent.get_action(obs)
        obs, rew, done, info = env.step(action.cpu().numpy())
        sleep(0.001)
        if done:
            obs = env.reset()
Beispiel #2
0
    if args.anneal_lr:
        frac = 1.0 - (update - 1.0) / num_updates
        lrnow = lr(frac)
        optimizer.param_groups[0]['lr'] = lrnow

    # TRY NOT TO MODIFY: prepare the execution of the game.
    for step in range(0, args.num_steps):
        envs.render()
        global_step += 1 * args.num_envs
        obs[step] = next_obs
        dones[step] = next_done

        # ALGO LOGIC: put action logic here
        with torch.no_grad():
            values[step] = agent.get_value(obs[step]).flatten()
            action, logproba, _, invalid_action_masks[step] = agent.get_action(
                obs[step], envs=envs)

        actions[step] = action.T
        logprobs[step] = logproba

        # TRY NOT TO MODIFY: execute the game and log data.
        next_obs, rs, ds, infos = envs.step(action.T)
        rewards[step], next_done = rs.view(-1), torch.Tensor(ds).to(device)
        wins[step, :] = 0
        for idx, info in enumerate(infos):
            if 'episode' in info.keys():
                print(
                    f"global_step={global_step}, episode_reward={info['episode']['r']}"
                )
                writer.add_scalar("charts/episode_reward",
                                  info['episode']['r'], global_step)
    # Annealing the rate if instructed to do so.
    if args.anneal_lr:
        frac = 1.0 - (update - 1.0) / num_updates
        lrnow = lr(frac)
        optimizer.param_groups[0]['lr'] = lrnow

    # TRY NOT TO MODIFY: prepare the execution of the game.
    for step in range(0, args.num_steps):
        global_step += 1 * args.num_envs
        obs[step] = next_obs
        dones[step] = next_done

        # ALGO LOGIC: put action logic here
        with torch.no_grad():
            values[step] = agent.get_value(obs[step]).flatten()
            action, logproba, _ = agent.get_action(obs[step])

        actions[step] = action
        logprobs[step] = logproba

        # TRY NOT TO MODIFY: execute the game and log data.
        next_obs, rs, ds, infos = envs.step(action)
        #envs.render()
        rewards[step], next_done = rs.view(-1), torch.Tensor(ds).to(device)

        for info in infos:
            if 'episode' in info.keys():
                print(
                    f"global_step={global_step}, episode_reward={info['episode']['r']}"
                )
                writer.add_scalar("charts/episode_reward",
Beispiel #4
0
agent.load_state_dict(torch.load(args.agent_model_path))
agent.eval()
for update in range(1, num_updates + 1):
    # TRY NOT TO MODIFY: prepare the execution of the game.
    for step in range(0, args.num_steps):
        envs.render()
        global_step += 1 * args.num_envs
        obs[step] = next_obs
        dones[step] = next_done

        # ALGO LOGIC: put action logic here
        with torch.no_grad():
            values[step] = agent.get_value(obs[step]).flatten()
            # raise
            action, logproba, _, invalid_action_masks[step] = agent.get_action(
                obs[step], envs=envs)
            # print(action.T.cpu()[0].numpy(), invalid_action_masks[step].cpu()[0].numpy().sum())

        actions[step] = action.T
        logprobs[step] = logproba

        # TRY NOT TO MODIFY: execute the game and log data.
        try:
            next_obs, rs, ds, infos = envs.step(action.T)
        except Exception as e:
            e.printStackTrace()
            raise
        rewards[step], next_done = rs.view(-1), torch.Tensor(ds).to(device)

        for idx, info in enumerate(infos):
            if 'episode' in info.keys():