Example #1
0
def record_video(env_id,
                 model,
                 video_length=500,
                 prefix='',
                 video_folder='videos/'):
    """
  :param env_id: (str)
  :param model: (RL model)
  :param video_length: (int)
  :param prefix: (str)
  :param video_folder: (str)
  """
    eval_env = DummyVecEnv([lambda: gym.make(env_id)])
    # Start the video at step=0 and record 500 steps
    eval_env = VecVideoRecorder(eval_env,
                                video_folder=video_folder,
                                record_video_trigger=lambda step: step == 0,
                                video_length=video_length,
                                name_prefix=prefix)

    obs = eval_env.reset()
    for _ in range(video_length):
        action, _ = model.predict(obs)
        obs, _, _, _ = eval_env.step(action)

    # Close the video recorder
    eval_env.close()
Example #2
0
def main(config, agent):
    with open(config) as fp:
        json_data = json.load(fp)

    video_path = os.path.join("./videos", agent, "pong")
    config = GameConfig.deserialize(json_data)
    config.agents_config[args.agent]["save_path"] += "best_model.zip"
    # config.agents_config[args.agent]["save_path"] = "my_models/pong/pong_ppo/best_model.zip"
    print(config.agents_config[args.agent]["save_path"])
    # env = retro.make(config.game_name)
    env = gym.make("PongNoFrameskip-v4")

    agent = AgentLoader.get_agent(args.agent,
                                  config.agents_config,
                                  env,
                                  load=True)
    env.close()
    env = gym.make("PongNoFrameskip-v4")
    env = DummyVecEnv([lambda: env])
    # env = retro.make(config.game_name, record=video_path)
    env = VecVideoRecorder(
        env,
        video_path,
        record_video_trigger=lambda x: x == 0,
    )

    obs = env.reset()
    done = False
    while not done:
        actions, _ = agent.agent.predict(obs)
        obs, rew, done, info = env.step(actions)

    env.close()
def record_video(env_name, train_env, model, videoLength=500, prefix='', videoPath='videos/'):
    print('record_video function')
    # Wrap the env in a Vec Video Recorder 
    local_eval_env = SubprocVecEnv([make_env(env_name, i, log_dir=log_dir) for i in range(1)])
    local_eval_env = VecNormalize(local_eval_env, norm_obs=True, norm_reward=True, clip_obs=10.)
    sync_envs_normalization(train_env, local_eval_env)
    local_eval_env = VecVideoRecorder(local_eval_env, video_folder=videoPath,
                              record_video_trigger=lambda step: step == 0, video_length=videoLength,
                              name_prefix=prefix)
    obs = local_eval_env.reset()
    for _ in range(videoLength):
        action, _ = model.predict(obs)
        obs, _, _, _ = local_eval_env.step(action)

    # Close the video recorder
    local_eval_env.close()
Example #4
0
def run(env):
    drive = PPO.load("conduziadrive")

    env = VecVideoRecorder(
        env,
        log_dir + '/videos/',
        record_video_trigger=lambda x: x == 0,
        video_length=1000,
        name_prefix="conduzia-drive-agent-{}".format(gym_env_id))

    env = VecNormalize(env,
                       gamma=0.9997,
                       norm_obs=True,
                       norm_reward=True,
                       clip_obs=10.,
                       epsilon=0.1)

    rewards = []
    total_reward = 0

    while True:
        obs = env.reset()

        for t in range(1000):
            action, _states = drive.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            env.render()
            total_reward += reward
            if t % 100 == 0:
                print(t)
            if done:
                break
        print("Finished after {} timesteps".format(t + 1))
        print("Reward: {}".format(total_reward))
        rewards.append(total_reward)
        env.close()
Example #5
0
def record_video(trained_model, env, video_folder, video_length, name):

    obs = env.reset()

    trained_model.set_env(env)
    # Record the video starting at the first step
    env = VecVideoRecorder(env,
                           video_folder,
                           record_video_trigger=lambda x: x == 0,
                           video_length=video_length,
                           name_prefix=name)

    env.reset()
    for _ in range(video_length + 1):
        action = trained_model.predict(obs)
        obs, _, _, _ = env.step(action[0])
    # Save the video
    env.close()
hyperparams, stats_path = get_saved_hyperparams(stats_path)


is_atari = 'NoFrameskip' in env_id

env = create_test_env(env_id, n_envs=n_envs,
                      stats_path=stats_path, seed=seed, log_dir=None,
                      should_render=not args.no_render, hyperparams=hyperparams)

model = ALGOS[algo].load(model_path)

obs = env.reset()

# Note: apparently it renders by default
env = VecVideoRecorder(env, video_folder,
                       record_video_trigger=lambda x: x == 0, video_length=video_length,
                       name_prefix="{}-{}".format(algo, env_id))

env.reset()
for _ in range(video_length + 1):
    # action = [env.action_space.sample()]
    action, _ = model.predict(obs, deterministic=deterministic)
    if isinstance(env.action_space, gym.spaces.Box):
        action = np.clip(action, env.action_space.low, env.action_space.high)
    obs, _, _, _ = env.step(action)

# Workaround for https://github.com/openai/gym/issues/893
if n_envs == 1 and 'Bullet' not in env_id and not is_atari:
    env = env.venv
    # DummyVecEnv
    while isinstance(env, VecNormalize) or isinstance(env, VecFrameStack):
def record_video(env_id: str = "CartPole-v1",
                 algo: str = "ppo",
                 folder: str = "rl-trained-agents",
                 video_folder: str = "logs/videos/",
                 video_length: int = 1000,
                 n_envs: int = 1,
                 deterministic: bool = False,
                 seed: int = 0,
                 no_render: bool = False,
                 exp_id: int = 0):

    if exp_id == 0:
        exp_id = get_latest_run_id(os.path.join(folder, algo), env_id)
        print(f"Loading latest experiment, id={exp_id}")
    # Sanity checks
    if exp_id > 0:
        log_path = os.path.join(folder, algo, f"{env_id}_{exp_id}")
    else:
        log_path = os.path.join(folder, algo)

    model_path = os.path.join(log_path, f"{env_id}.zip")

    stats_path = os.path.join(log_path, env_id)
    hyperparams, stats_path = get_saved_hyperparams(stats_path)

    is_atari = "NoFrameskip" in env_id

    env = create_test_env(
        env_id,
        n_envs=n_envs,
        stats_path=stats_path,
        seed=seed,
        log_dir=None,
        should_render=not no_render,
        hyperparams=hyperparams,
    )

    model = ALGOS[algo].load(model_path)

    obs = env.reset()

    # Note: apparently it renders by default
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda x: x == 0,
        video_length=video_length,
        name_prefix=f"{algo}-{env_id}",
    )

    env.reset()
    for _ in range(video_length + 1):
        action, _ = model.predict(obs, deterministic=deterministic)
        obs, _, _, _ = env.step(action)

    # Workaround for https://github.com/openai/gym/issues/893
    if n_envs == 1 and "Bullet" not in env_id and not is_atari:
        env = env.venv
        # DummyVecEnv
        while isinstance(env, VecEnvWrapper):
            env = env.venv
        if isinstance(env, DummyVecEnv):
            env.envs[0].env.close()
        else:
            env.close()
    else:
        # SubprocVecEnv
        env.close()