def __init__(self, verbose=0, save_log_flag=False, log_num=None, do_proc_simulation=True, custom_num_proc=0): super(ModuleSelectEnv, self).__init__() self.verbose = verbose self.save_log_flag = save_log_flag if self.save_log_flag and log_num is not None: self._init_log_to_write(log_num) self.do_proc_simulation = do_proc_simulation self.num_proc = custom_num_proc stats_path = "modules/logs/sac/DonkeyVae-v0-level-0_6/DonkeyVae-v0-level-0" hyperparams, stats_path = get_saved_hyperparams(stats_path, norm_reward=False) hyperparams['vae_path'] = "modules/logs/vae-level-0-dim-32.pkl" self.inner_env = create_test_env(stats_path=stats_path, seed=0, log_dir="modules/logs", hyperparams=hyperparams) model_path = "modules/logs/sac/DonkeyVae-v0-level-0_6/DonkeyVae-v0-level-0.pkl" self.model = ALGOS["sac"].load(model_path) self.num_modules = 5 self.module0 = VAESACModule(self.inner_env, self.model, delay_weights[0], static_terms[0]) self.module1 = VAESACModule(self.inner_env, self.model, delay_weights[1], static_terms[1]) self.module2 = VAESACModule(self.inner_env, self.model, delay_weights[2], static_terms[2]) self.module3 = VAESACModule(self.inner_env, self.model, delay_weights[3], static_terms[3]) self.module4 = VAESACModule(self.inner_env, self.model, delay_weights[4], static_terms[4]) if self.continuous: # the probability of selection of end-to-end module self.action_space = spaces.Box(low=-1, high=1, shape=(self.num_modules, )) else: # lane detection, end-to-end self.action_space = spaces.Discrete(self.num_modules) self.observation_space = spaces.Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=(1, ), # 0 to 11, total 12 dim dtype=np.float32)
def __init__(self): set_global_seeds(0) hyperparams, stats_path = get_saved_hyperparams(STATS_PATH, norm_reward=False) hyperparams["vae_path"] = LEVEL_NAME.vae() self.env = create_test_env( stats_path=stats_path, seed=0, log_dir=None, hyperparams=hyperparams, ) self.model = DDPG.load(LEVEL_NAME.model())
def __init__(self, service: AIExchangeService): set_global_seeds(0) hyperparams, stats_path = get_saved_hyperparams(STATS_PATH, norm_reward=False) hyperparams["vae_path"] = LEVEL_NAME.vae() self.service = service self.simulation = Simulation(service) self.env = create_test_env( stats_path=stats_path, seed=0, log_dir=None, hyperparams=hyperparams, simulation=self.simulation, ) self.model = DDPG.load(LEVEL_NAME.model())
if skip_eval: print("Skipping eval...") else: return_code = subprocess.call(["python", "enjoy.py"] + arguments) if return_code != 0: print("Error during evaluation, skipping...") continue x, y = ts2xy(load_results(reward_log), "timesteps") if len(x) > 0: # Retrieve training timesteps from config exp_id = get_latest_run_id(os.path.join(args.log_dir, algo), env_id) log_path = os.path.join(args.log_dir, algo, f"{env_id}_{exp_id}", env_id) hyperparams, _ = get_saved_hyperparams(log_path) # Hack to format it properly if hyperparams["n_timesteps"] < 1e6: n_training_timesteps = f"{int(hyperparams['n_timesteps'] / 1e3)}k" else: n_training_timesteps = f"{int(hyperparams['n_timesteps'] / 1e6)}M" mean_reward = np.mean(y) std_reward = np.std(y) results["algo"].append(algo) results["env_id"].append(env_id) results["mean_reward"].append(mean_reward) results["std_reward"].append(std_reward) results["n_timesteps"].append(n_training_timesteps) results["eval_timesteps"].append(x[-1]) results["eval_episodes"].append(len(y))
elif load_checkpoint is None: # Default: load latest model model_path = os.path.join(log_path, f"{env_id}.zip") name_prefix = f"final-model-{algo}-{env_id}" else: model_path = os.path.join( log_path, f"rl_model_{args.load_checkpoint}_steps.zip") name_prefix = f"checkpoint-{args.load_checkpoint}-{algo}-{env_id}" found = os.path.isfile(model_path) if not found: raise ValueError( f"No model found for {algo} on {env_id}, path: {model_path}") stats_path = os.path.join(log_path, env_id) hyperparams, stats_path = get_saved_hyperparams(stats_path) is_atari = ExperimentManager.is_atari(env_id) env = create_test_env( env_id, n_envs=n_envs, stats_path=stats_path, seed=seed, log_dir=None, should_render=not args.no_render, hyperparams=hyperparams, ) model = ALGOS[algo].load(model_path, env=env)
else: log_path = os.path.join(folder, algo) best_path = '' if args.best_model: best_path = '_best' model_path = os.path.join(log_path, "{}{}.pkl".format(ENV_ID, best_path)) assert os.path.isdir(log_path), "The {} folder was not found".format(log_path) assert os.path.isfile(model_path), "No model found for {} on {}, path: {}".format(algo, ENV_ID, model_path) set_global_seeds(args.seed) stats_path = os.path.join(log_path, ENV_ID) hyperparams, stats_path = get_saved_hyperparams(stats_path, norm_reward=args.norm_reward) hyperparams['vae_path'] = args.vae_path log_dir = args.reward_log if args.reward_log != '' else None env = create_test_env(stats_path=stats_path, seed=args.seed, log_dir=log_dir, hyperparams=hyperparams) model = ALGOS[algo].load(model_path) obs = env.reset() # Force deterministic for SAC and DDPG deterministic = args.deterministic or algo in ['ddpg', 'sac'] if args.verbose >= 1: print("Deterministic actions: {}".format(deterministic))
if best_model: best_path = '_best' model_path = os.path.join(log_path, "{}{}.pkl".format(ENV_ID, best_path)) assert os.path.isdir(log_path), "The {} folder was not found".format(log_path) assert os.path.isfile(model_path), "No model found for {} on {}, path: {}".format(algo, ENV_ID, model_path) set_global_seeds(0) stats_path = os.path.join(log_path, ENV_ID) hyperparams, stats_path = get_saved_hyperparams(stats_path, norm_reward=False) hyperparams['vae_path'] = vae_path reward_log = "" log_dir = reward_log if reward_log != '' else None #env = create_test_env(stats_path=stats_path, seed=0, log_dir=log_dir, #hyperparams=hyperparams) model = ALGOS["sac"].load(model_path) #obs = env.reset() #print(obs.shape)
def record_video(env_id: str = "CartPole-v1", algo: str = "ppo", folder: str = "rl-trained-agents", video_folder: str = "logs/videos/", video_length: int = 1000, n_envs: int = 1, deterministic: bool = False, seed: int = 0, no_render: bool = False, exp_id: int = 0): if exp_id == 0: exp_id = get_latest_run_id(os.path.join(folder, algo), env_id) print(f"Loading latest experiment, id={exp_id}") # Sanity checks if exp_id > 0: log_path = os.path.join(folder, algo, f"{env_id}_{exp_id}") else: log_path = os.path.join(folder, algo) model_path = os.path.join(log_path, f"{env_id}.zip") stats_path = os.path.join(log_path, env_id) hyperparams, stats_path = get_saved_hyperparams(stats_path) is_atari = "NoFrameskip" in env_id env = create_test_env( env_id, n_envs=n_envs, stats_path=stats_path, seed=seed, log_dir=None, should_render=not no_render, hyperparams=hyperparams, ) model = ALGOS[algo].load(model_path) obs = env.reset() # Note: apparently it renders by default env = VecVideoRecorder( env, video_folder, record_video_trigger=lambda x: x == 0, video_length=video_length, name_prefix=f"{algo}-{env_id}", ) env.reset() for _ in range(video_length + 1): action, _ = model.predict(obs, deterministic=deterministic) obs, _, _, _ = env.step(action) # Workaround for https://github.com/openai/gym/issues/893 if n_envs == 1 and "Bullet" not in env_id and not is_atari: env = env.venv # DummyVecEnv while isinstance(env, VecEnvWrapper): env = env.venv if isinstance(env, DummyVecEnv): env.envs[0].env.close() else: env.close() else: # SubprocVecEnv env.close()