Example #1
0
def create_test_env(level=0,
                    stats_path=None,
                    seed=0,
                    log_dir='',
                    hyperparams=None):
    """
    Create environment for testing a trained agent

    :param level: (int)
    :param stats_path: (str) path to folder containing saved running averaged
    :param seed: (int) Seed for random number generator
    :param log_dir: (str) Where to log rewards
    :param hyperparams: (dict) Additional hyperparams (ex: n_stack)
    :return: (gym.Env)
    """
    # HACK to save logs
    if log_dir is not None:
        os.environ["OPENAI_LOG_FORMAT"] = 'csv'
        os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir)
        os.makedirs(log_dir, exist_ok=True)
        logger.configure()

    vae_path = hyperparams['vae_path']
    if vae_path == '':
        vae_path = os.path.join(stats_path, 'vae.pkl')
    vae = None
    if stats_path is not None and os.path.isfile(vae_path):
        vae = load_vae(vae_path)

    env = DummyVecEnv(
        [make_env(level, seed, log_dir, vae=vae, frame_skip=TEST_FRAME_SKIP)])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams['normalize']:
            print("Loading running average")
            print("with params: {}".format(hyperparams['normalize_kwargs']))
            env = VecNormalize(env,
                               training=False,
                               **hyperparams['normalize_kwargs'])
            env.load_running_average(stats_path)

        n_stack = hyperparams.get('frame_stack', 0)
        if n_stack > 0:
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env
Example #2
0
if args.trained_agent.endswith('.pkl') and os.path.isfile(args.trained_agent):
    # Continue training
    print("Loading pretrained agent")
    # Policy should not be changed
    del hyperparams['policy']

    model = ALGOS[args.algo].load(args.trained_agent,
                                  env=env,
                                  tensorboard_log=tensorboard_log,
                                  verbose=1,
                                  **hyperparams)

    exp_folder = args.trained_agent.split('.pkl')[0]
    if normalize:
        print("Loading saved running average")
        env.load_running_average(exp_folder)
else:
    # Train an agent from scratch
    model = ALGOS[args.algo](env=env,
                             tensorboard_log=tensorboard_log,
                             verbose=1,
                             **hyperparams)

# Teleoperation mode:
# we don't wrap the environment with a monitor or in a vecenv
if args.teleop:
    assert args.algo == "sac", "Teleoperation mode is not yet implemented for {}".format(
        args.algo)
    env = TeleopEnv(env, is_training=True)
    model.set_env(env)
    env.model = model
Example #3
0
                         name_resume + ".zip")
    elif (args.algo == "td3"):
        model = TD3.load(workDirectory + "/resultats/" + name_resume + "/" +
                         name_resume + ".zip")
env = DummyVecEnv(
    [lambda: e.AidaBulletEnv(
        commands,
        render=False,
        on_rack=False,
    )])
if normalize:
    env = VecNormalize(env,
                       clip_obs=1000.0,
                       clip_reward=1000.0,
                       training=False)
    env.load_running_average(workDirectory + "/resultats/" + name_resume +
                             "/normalizeData")

images = []
obs = env.reset()
img = env.render(mode='rgb_array')
for i in range(15 * 2 * 10):
    images.append(img)
    action, _ = model.predict(obs, deterministic=True)
    obs, _, _, _ = env.step(action)
    img = env.render(mode='rgb_array')
    print("frame " + str(i) + "/" + str(2 * 150))
if (args.dir == None):
    imageio.mimsave(
        workDirectory + "/resultats/" + name_resume + "/video/" + name_resume +
        ".gif", [np.array(img) for i, img in enumerate(images) if i % 2 == 0],
        fps=50)