Exemple #1
0
def load_old_ppo2(root_dir, env, env_name, index, transparent_params):
    try:
        from baselines.ppo2 import ppo2 as ppo2_old
    except ImportError as e:
        msg = "{}. HINT: you need to install (OpenAI) Baselines to use old_ppo2".format(
            e)
        raise ImportError(msg)

    denv = FakeSingleSpacesVec(env, agent_id=index)
    possible_fnames = ["model.pkl", "final_model.pkl"]
    model_path = None
    for fname in possible_fnames:
        candidate_path = os.path.join(root_dir, fname)
        if os.path.exists(candidate_path):
            model_path = candidate_path
    if model_path is None:
        raise FileNotFoundError(f"Could not find model at '{root_dir}' "
                                f"under any filename '{possible_fnames}'")

    graph = tf.Graph()
    sess = tf.Session(graph=graph)
    with sess.as_default():
        with graph.as_default():
            pylog.info(f"Loading Baselines PPO2 policy from '{model_path}'")
            policy = ppo2_old.learn(
                network="mlp",
                env=denv,
                total_timesteps=1,
                seed=0,
                nminibatches=4,
                log_interval=1,
                save_interval=1,
                load_path=model_path,
            )
    stable_policy = OpenAIToStablePolicy(policy,
                                         ob_space=denv.observation_space,
                                         ac_space=denv.action_space)
    model = PolicyToModel(stable_policy)

    try:
        normalize_path = os.path.join(root_dir, "normalize.pkl")
        with open(normalize_path, "rb") as f:
            old_vec_normalize = pickle.load(f)
        vec_normalize = vec_env.VecNormalize(denv, training=False)
        vec_normalize.obs_rms = old_vec_normalize.ob_rms
        vec_normalize.ret_rms = old_vec_normalize.ret_rms
        model = NormalizeModel(model, vec_normalize)
        pylog.info(f"Loaded normalization statistics from '{normalize_path}'")
    except FileNotFoundError:
        # We did not use VecNormalize during training, skip
        pass

    return model
Exemple #2
0
    def f(root_dir, env, env_name, index, transparent_params):
        denv = FakeSingleSpacesVec(env, agent_id=index)
        pylog.info(
            f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'")
        model = load_backward_compatible_model(cls, root_dir, denv)

        try:
            vec_normalize = load_vec_normalize(root_dir, denv)
            model = NormalizeModel(model, vec_normalize)
        except FileNotFoundError:
            # No saved VecNormalize, must have not trained with normalization.
            pass

        return model
Exemple #3
0
    def f(root_dir, env, env_name, index, transparent_params):
        denv = FakeSingleSpacesVec(env, agent_id=index)
        pylog.info(
            f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'")
        model = load_backward_compatible_model(cls, root_dir, denv)
        try:
            vec_normalize = VecNormalize(denv, training=False)
            vec_normalize.load_running_average(root_dir)
            model = NormalizeModel(model, vec_normalize)
            pylog.info(f"Loaded normalization statistics from '{root_dir}'")
        except FileNotFoundError:
            # We did not use VecNormalize during training, skip
            pass

        return model
def create_multi_agent_curried_policy_wrapper(mon_dir,
                                              env_name,
                                              num_envs,
                                              embed_index,
                                              max_steps,
                                              state_shape=None,
                                              add_zoo=False,
                                              num_zoo=5):
    def episode_limit(env):
        return time_limit.TimeLimit(env, max_episode_steps=max_steps)

    def env_fn(i):
        return make_env(env_name,
                        seed=42,
                        i=i,
                        out_dir=mon_dir,
                        pre_wrappers=[episode_limit])

    vec_env = make_dummy_vec_multi_env(
        [lambda: env_fn(i) for i in range(num_envs)])

    zoo = load_policy(
        policy_path="1",
        policy_type="zoo",
        env=vec_env,
        env_name=env_name,
        index=1 - embed_index,
        transparent_params=None,
    )
    half_env = FakeSingleSpacesVec(vec_env, agent_id=embed_index)
    policies = [
        _get_constant_policy(half_env,
                             constant_value=half_env.action_space.sample(),
                             state_shape=state_shape) for _ in range(10)
    ]
    if add_zoo:
        policies += [zoo] * num_zoo

    policy_wrapper = MultiPolicyWrapper(policies=policies, num_envs=num_envs)

    vec_env = CurryVecEnv(venv=vec_env,
                          policy=policy_wrapper,
                          agent_idx=embed_index,
                          deterministic=False)
    vec_env = FlattenSingletonVecEnv(vec_env)

    yield vec_env, policy_wrapper, zoo
    policy_wrapper.close()
Exemple #5
0
def load_random(path, env, env_name, index, transparent_params):
    denv = FakeSingleSpacesVec(env, agent_id=index)
    policy = RandomPolicy(denv)
    return PolicyToModel(policy)