def video_oc(): agent = OptionCriticAgent(env, num_options=4, device=device).to(device) from gym import wrappers env_monitor = wrappers.Monitor(env, directory="videos/oc", force=True) checkpoint = torch.load("checkpoint_oc.pt") agent.load_state_dict(checkpoint["agent"]) rw = evaluate_oc( agent, env_monitor, n_games=2, ) env_monitor.close() print(rw)
def video_a2c(): agent = A2CAgent(env, device).to(device) from gym import wrappers env_monitor = wrappers.Monitor(env, directory="videos/a2c", force=True) checkpoint = torch.load("checkpoint_a2c.pt") agent.load_state_dict(checkpoint["agent"]) rw = evaluate( agent, env_monitor, n_games=2, ) env_monitor.close() print(rw)