def main(): """ Train and evaluate agent. This function basically does the same as the checker that evaluates your agent. You can use it for debugging your agent and visualizing what it does. """ from lunar_lander import LunarLander from gym.wrappers.monitoring.video_recorder import VideoRecorder env = LunarLander() agent = Agent(env) agent.train() rec = VideoRecorder(env, "policy.mp4") episode_length = 300 n_eval = 100 returns = [] print("Evaluating agent...") for i in range(n_eval): print(f"Testing policy: episode {i+1}/{n_eval}") state = env.reset() cumulative_return = 0 # The environment will set terminal to True if an episode is done. terminal = False env.reset() for t in range(episode_length): # if i <= 10: # rec.capture_frame() # Taking an action in the environment action = agent.get_action( torch.as_tensor(state, dtype=torch.float32)) state, reward, terminal = env.transition(action) cumulative_return += reward if terminal: break returns.append(cumulative_return) print(f"Achieved {cumulative_return:.2f} return.") # if i == 10: # rec.close() # print("Saved video of 10 episodes to 'policy.mp4'.") env.close() print(f"Average return: {np.mean(returns):.2f}")
) steps += 1 if steps % 3000 == 0 or done: print("\naction " + str(["{:+0.2f}".format(x) for x in a])) print("\nstep {}".format(steps)) if args.collect_data and steps % 3000 == 0: print("... saving data") store_data(samples) save_results(episode_rewards) # reset the samples storage for the next 2500 steps samples = { "state": [], "state_img": [], "next_state": [], "next_state_img": [], "reward": [], "action": [], "terminal": [], } # env.render() if done: print("REWARD", episode_reward) break episode_rewards.append(episode_reward) env.close()