Python PPO2.loadの例

プログラミング言語: Python

名前空間/パッケージ名: stable_baselines.ppo2.ppo2

クラス/型: PPO2

メソッド/関数: load

hotexamples.comのコード掲載数: 4

Python PPO2.load - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのstable_baselines.ppo2.ppo2.PPO2.loadの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

PPO2(5)

load(4)

よく使われるメソッド

PPO2 (5)

load (4)

コード例 #1

ファイルを表示

ファイル: evaluate_wheeled_pybullet_windows.py プロジェクト: junyaoshi/DeepRobots

from envs.WheeledRobotPybulletEnv import WheeledRobotPybulletEnv
from stable_baselines.ppo2.ppo2 import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
import matplotlib.pyplot as plt

raw_env = WheeledRobotPybulletEnv(decision_interval=1,
                                  use_GUI=True,
                                  num_episode_steps=5)
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
vec_env = DummyVecEnv([lambda: raw_env])

dir_name = "results\LearningResults\PPO_WheeledRobotPybullet"
tensorboard_dir = dir_name + "\\tensorboard"
model_dir = dir_name + "\\model"
model = PPO2.load(model_dir, vec_env)
# model.learn(total_timesteps=100, tb_log_name="test")
# model.save(model_dir)

env = vec_env.envs[0]
obs_prev = env.reset()

x_poss = [env.snake_robot.x]
y_poss = [env.snake_robot.y]
thetas = [env.snake_robot.theta]
times = [0]
a1s = [env.snake_robot.a1]
a2s = [env.snake_robot.a2]
a1dots = [env.snake_robot.a1dot]
a2dots = [env.snake_robot.a2dot]
# robot_params = []

コード例 #2

ファイルを表示

ファイル: train.py プロジェクト: jingGM/vizdoom

 def load(self, path=None):
     if path is None:
         path = self.model_path
     model = PPO2.load(path, env=self.env)

コード例 #3

ファイルを表示

ファイル: run_ppo.py プロジェクト: mcfcwangke5555/arena-flatland

def run_ppo(config,
            state_collector,
            agent_name="ppo_99_8507750",
            policy="CnnPolicy",
            mode="train",
            task_mode="static",
            stack_offset=15,
            num_stacks=1,
            debug=True,
            normalize=True,
            disc_action_space=False,
            ns=""):

    path_to_models = config['PATHES']['path_to_models']

    # Loading agent
    model = PPO2.load("%s/%s/%s.pkl" %
                      (path_to_models, agent_name, agent_name))

    print("Loaded %s" % agent_name)
    print("--------------------------------------------------")
    print("Normalize: ", normalize)
    print("Policy: %s" % policy)
    print("Discrete action space: ", disc_action_space)
    print("Observation space size: ", model.observation_space.shape)
    print("Debug: ", debug)
    print("Number of stacks: %d, stack offset: %d" %
          (model.observation_space.shape[2], stack_offset))
    print("\n")

    #Loading environment
    env = load_train_env(ns, state_collector, 0.46, 19, num_stacks,
                         stack_offset, debug, task_mode, mode, policy,
                         disc_action_space, normalize)

    # Resetting environment
    if mode == "train" or mode == "eval":
        obs = env.reset()
    if mode == "exec" or mode == "exec_rw":
        if disc_action_space:
            obs, rewards, dones, info = env.step([5])
        else:
            obs, rewards, dones, info = env.step([[0.0, 0.0]])

    if debug:
        #Cummulative reward.
        cum_reward = 0
    while True:
        #Determining action for given observation
        action, _states = model.predict(obs)

        # Clipping actions
        if not disc_action_space:
            action = np.maximum(np.minimum(model.action_space.high, action),
                                model.action_space.low)

        #Executing action in environment
        obs, rewards, dones, info = env.step(action)

        if debug:
            cum_reward += rewards

            # Episode over?
            if dones:
                print("Episode finished with reward of %f." % cum_reward)
                cum_reward = 0

        time.sleep(0.0001)
        if rospy.is_shutdown():
            print('shutdown')
            break

コード例 #4

ファイルを表示

        S = s_Dyymmdd_HHMM
    """
    import datetime
    date = str(datetime.datetime.now())
    date = date[2:4] + date[5:7] + date[8:10] + '_' + date[11:13] + date[14:16] + date[17:19]
    return s + '_D' + date


raw_env = IdealFluidSwimmerWithSpringEnv()
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
vec_env = DummyVecEnv([lambda: raw_env])

results_dir = '../results/PPO_IdealFluidSwimmerWithSpringFixedA1ddot/trial_1.22_D210122_163220'
model_path = os.path.join(results_dir, "model")
model = PPO2.load(model_path, vec_env)

env = vec_env.envs[0]
obs_prev = env.reset()

x_poss = [env.snake_robot.x]
y_poss = [env.snake_robot.y]
thetas = [env.snake_robot.theta]
times = [0]
a1s = [env.snake_robot.a1]
a2s = [env.snake_robot.a2]
a1dots = [env.snake_robot.a1dot]
a2dots = [env.snake_robot.a2dot]
a1ddots = [env.snake_robot.a1ddot]
ks = [env.snake_robot.k]
cs = [env.snake_robot.c]