def create_test_env(level=0, stats_path=None, seed=0, log_dir='', hyperparams=None): """ Create environment for testing a trained agent :param level: (int) :param stats_path: (str) path to folder containing saved running averaged :param seed: (int) Seed for random number generator :param log_dir: (str) Where to log rewards :param hyperparams: (dict) Additional hyperparams (ex: n_stack) :return: (gym.Env) """ # HACK to save logs if log_dir is not None: os.environ["OPENAI_LOG_FORMAT"] = 'csv' os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir) os.makedirs(log_dir, exist_ok=True) logger.configure() vae_path = hyperparams['vae_path'] if vae_path == '': vae_path = os.path.join(stats_path, 'vae.pkl') vae = None if stats_path is not None and os.path.isfile(vae_path): vae = load_vae(vae_path) env = DummyVecEnv( [make_env(level, seed, log_dir, vae=vae, frame_skip=TEST_FRAME_SKIP)]) # Load saved stats for normalizing input and rewards # And optionally stack frames if stats_path is not None: if hyperparams['normalize']: print("Loading running average") print("with params: {}".format(hyperparams['normalize_kwargs'])) env = VecNormalize(env, training=False, **hyperparams['normalize_kwargs']) env.load_running_average(stats_path) n_stack = hyperparams.get('frame_stack', 0) if n_stack > 0: print("Stacking {} frames".format(n_stack)) env = VecFrameStack(env, n_stack) return env
if args.trained_agent.endswith('.pkl') and os.path.isfile(args.trained_agent): # Continue training print("Loading pretrained agent") # Policy should not be changed del hyperparams['policy'] model = ALGOS[args.algo].load(args.trained_agent, env=env, tensorboard_log=tensorboard_log, verbose=1, **hyperparams) exp_folder = args.trained_agent.split('.pkl')[0] if normalize: print("Loading saved running average") env.load_running_average(exp_folder) else: # Train an agent from scratch model = ALGOS[args.algo](env=env, tensorboard_log=tensorboard_log, verbose=1, **hyperparams) # Teleoperation mode: # we don't wrap the environment with a monitor or in a vecenv if args.teleop: assert args.algo == "sac", "Teleoperation mode is not yet implemented for {}".format( args.algo) env = TeleopEnv(env, is_training=True) model.set_env(env) env.model = model
name_resume + ".zip") elif (args.algo == "td3"): model = TD3.load(workDirectory + "/resultats/" + name_resume + "/" + name_resume + ".zip") env = DummyVecEnv( [lambda: e.AidaBulletEnv( commands, render=False, on_rack=False, )]) if normalize: env = VecNormalize(env, clip_obs=1000.0, clip_reward=1000.0, training=False) env.load_running_average(workDirectory + "/resultats/" + name_resume + "/normalizeData") images = [] obs = env.reset() img = env.render(mode='rgb_array') for i in range(15 * 2 * 10): images.append(img) action, _ = model.predict(obs, deterministic=True) obs, _, _, _ = env.step(action) img = env.render(mode='rgb_array') print("frame " + str(i) + "/" + str(2 * 150)) if (args.dir == None): imageio.mimsave( workDirectory + "/resultats/" + name_resume + "/video/" + name_resume + ".gif", [np.array(img) for i, img in enumerate(images) if i % 2 == 0], fps=50)