コード例 #1
0
    def __init__(self,
                 verbose=0,
                 save_log_flag=False,
                 log_num=None,
                 do_proc_simulation=True,
                 custom_num_proc=0):
        super(ModuleSelectEnv, self).__init__()
        self.verbose = verbose
        self.save_log_flag = save_log_flag
        if self.save_log_flag and log_num is not None:
            self._init_log_to_write(log_num)
        self.do_proc_simulation = do_proc_simulation
        self.num_proc = custom_num_proc

        stats_path = "modules/logs/sac/DonkeyVae-v0-level-0_6/DonkeyVae-v0-level-0"
        hyperparams, stats_path = get_saved_hyperparams(stats_path,
                                                        norm_reward=False)
        hyperparams['vae_path'] = "modules/logs/vae-level-0-dim-32.pkl"
        self.inner_env = create_test_env(stats_path=stats_path,
                                         seed=0,
                                         log_dir="modules/logs",
                                         hyperparams=hyperparams)

        model_path = "modules/logs/sac/DonkeyVae-v0-level-0_6/DonkeyVae-v0-level-0.pkl"
        self.model = ALGOS["sac"].load(model_path)

        self.num_modules = 5
        self.module0 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[0], static_terms[0])
        self.module1 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[1], static_terms[1])
        self.module2 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[2], static_terms[2])
        self.module3 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[3], static_terms[3])
        self.module4 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[4], static_terms[4])

        if self.continuous:
            # the probability of selection of end-to-end module
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(self.num_modules, ))
        else:
            # lane detection, end-to-end
            self.action_space = spaces.Discrete(self.num_modules)

        self.observation_space = spaces.Box(
            low=np.finfo(np.float32).min,
            high=np.finfo(np.float32).max,
            shape=(1, ),  # 0 to 11, total 12 dim
            dtype=np.float32)
コード例 #2
0
ファイル: ai.py プロジェクト: foxycom/reinforcement-learning
    def __init__(self):
        set_global_seeds(0)

        hyperparams, stats_path = get_saved_hyperparams(STATS_PATH,
                                                        norm_reward=False)
        hyperparams["vae_path"] = LEVEL_NAME.vae()
        self.env = create_test_env(
            stats_path=stats_path,
            seed=0,
            log_dir=None,
            hyperparams=hyperparams,
        )
        self.model = DDPG.load(LEVEL_NAME.model())
コード例 #3
0
ファイル: ai.py プロジェクト: foxycom/reinforcement-learning
    def __init__(self, service: AIExchangeService):
        set_global_seeds(0)

        hyperparams, stats_path = get_saved_hyperparams(STATS_PATH,
                                                        norm_reward=False)
        hyperparams["vae_path"] = LEVEL_NAME.vae()
        self.service = service
        self.simulation = Simulation(service)
        self.env = create_test_env(
            stats_path=stats_path,
            seed=0,
            log_dir=None,
            hyperparams=hyperparams,
            simulation=self.simulation,
        )
        self.model = DDPG.load(LEVEL_NAME.model())
コード例 #4
0
    if skip_eval:
        print("Skipping eval...")
    else:
        return_code = subprocess.call(["python", "enjoy.py"] + arguments)
        if return_code != 0:
            print("Error during evaluation, skipping...")
            continue
        x, y = ts2xy(load_results(reward_log), "timesteps")

    if len(x) > 0:
        # Retrieve training timesteps from config
        exp_id = get_latest_run_id(os.path.join(args.log_dir, algo), env_id)
        log_path = os.path.join(args.log_dir, algo, f"{env_id}_{exp_id}",
                                env_id)
        hyperparams, _ = get_saved_hyperparams(log_path)
        # Hack to format it properly
        if hyperparams["n_timesteps"] < 1e6:
            n_training_timesteps = f"{int(hyperparams['n_timesteps'] / 1e3)}k"
        else:
            n_training_timesteps = f"{int(hyperparams['n_timesteps'] / 1e6)}M"

        mean_reward = np.mean(y)
        std_reward = np.std(y)
        results["algo"].append(algo)
        results["env_id"].append(env_id)
        results["mean_reward"].append(mean_reward)
        results["std_reward"].append(std_reward)
        results["n_timesteps"].append(n_training_timesteps)
        results["eval_timesteps"].append(x[-1])
        results["eval_episodes"].append(len(y))
コード例 #5
0
    elif load_checkpoint is None:
        # Default: load latest model
        model_path = os.path.join(log_path, f"{env_id}.zip")
        name_prefix = f"final-model-{algo}-{env_id}"
    else:
        model_path = os.path.join(
            log_path, f"rl_model_{args.load_checkpoint}_steps.zip")
        name_prefix = f"checkpoint-{args.load_checkpoint}-{algo}-{env_id}"

    found = os.path.isfile(model_path)
    if not found:
        raise ValueError(
            f"No model found for {algo} on {env_id}, path: {model_path}")

    stats_path = os.path.join(log_path, env_id)
    hyperparams, stats_path = get_saved_hyperparams(stats_path)

    is_atari = ExperimentManager.is_atari(env_id)

    env = create_test_env(
        env_id,
        n_envs=n_envs,
        stats_path=stats_path,
        seed=seed,
        log_dir=None,
        should_render=not args.no_render,
        hyperparams=hyperparams,
    )

    model = ALGOS[algo].load(model_path, env=env)
コード例 #6
0
else:
    log_path = os.path.join(folder, algo)

best_path = ''
if args.best_model:
    best_path = '_best'

model_path = os.path.join(log_path, "{}{}.pkl".format(ENV_ID, best_path))

assert os.path.isdir(log_path), "The {} folder was not found".format(log_path)
assert os.path.isfile(model_path), "No model found for {} on {}, path: {}".format(algo, ENV_ID, model_path)

set_global_seeds(args.seed)

stats_path = os.path.join(log_path, ENV_ID)
hyperparams, stats_path = get_saved_hyperparams(stats_path, norm_reward=args.norm_reward)
hyperparams['vae_path'] = args.vae_path

log_dir = args.reward_log if args.reward_log != '' else None

env = create_test_env(stats_path=stats_path, seed=args.seed, log_dir=log_dir,
                      hyperparams=hyperparams)

model = ALGOS[algo].load(model_path)

obs = env.reset()

# Force deterministic for SAC and DDPG
deterministic = args.deterministic or algo in ['ddpg', 'sac']
if args.verbose >= 1:
    print("Deterministic actions: {}".format(deterministic))
コード例 #7
0
if best_model:
    best_path = '_best'

model_path = os.path.join(log_path, "{}{}.pkl".format(ENV_ID, best_path))


assert os.path.isdir(log_path), "The {} folder was not found".format(log_path)
assert os.path.isfile(model_path), "No model found for {} on {}, path: {}".format(algo, ENV_ID, model_path)

set_global_seeds(0)


stats_path = os.path.join(log_path, ENV_ID)


hyperparams, stats_path = get_saved_hyperparams(stats_path, norm_reward=False)
hyperparams['vae_path'] = vae_path

reward_log = ""

log_dir = reward_log if reward_log != '' else None

#env = create_test_env(stats_path=stats_path, seed=0, log_dir=log_dir,
                      #hyperparams=hyperparams)

model = ALGOS["sac"].load(model_path)

#obs = env.reset()

#print(obs.shape)
コード例 #8
0
def record_video(env_id: str = "CartPole-v1",
                 algo: str = "ppo",
                 folder: str = "rl-trained-agents",
                 video_folder: str = "logs/videos/",
                 video_length: int = 1000,
                 n_envs: int = 1,
                 deterministic: bool = False,
                 seed: int = 0,
                 no_render: bool = False,
                 exp_id: int = 0):

    if exp_id == 0:
        exp_id = get_latest_run_id(os.path.join(folder, algo), env_id)
        print(f"Loading latest experiment, id={exp_id}")
    # Sanity checks
    if exp_id > 0:
        log_path = os.path.join(folder, algo, f"{env_id}_{exp_id}")
    else:
        log_path = os.path.join(folder, algo)

    model_path = os.path.join(log_path, f"{env_id}.zip")

    stats_path = os.path.join(log_path, env_id)
    hyperparams, stats_path = get_saved_hyperparams(stats_path)

    is_atari = "NoFrameskip" in env_id

    env = create_test_env(
        env_id,
        n_envs=n_envs,
        stats_path=stats_path,
        seed=seed,
        log_dir=None,
        should_render=not no_render,
        hyperparams=hyperparams,
    )

    model = ALGOS[algo].load(model_path)

    obs = env.reset()

    # Note: apparently it renders by default
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda x: x == 0,
        video_length=video_length,
        name_prefix=f"{algo}-{env_id}",
    )

    env.reset()
    for _ in range(video_length + 1):
        action, _ = model.predict(obs, deterministic=deterministic)
        obs, _, _, _ = env.step(action)

    # Workaround for https://github.com/openai/gym/issues/893
    if n_envs == 1 and "Bullet" not in env_id and not is_atari:
        env = env.venv
        # DummyVecEnv
        while isinstance(env, VecEnvWrapper):
            env = env.venv
        if isinstance(env, DummyVecEnv):
            env.envs[0].env.close()
        else:
            env.close()
    else:
        # SubprocVecEnv
        env.close()