コード例 #1
0
from envs.WheeledRobotPybulletEnv import WheeledRobotPybulletEnv
from stable_baselines.ppo2.ppo2 import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
import matplotlib.pyplot as plt

raw_env = WheeledRobotPybulletEnv(decision_interval=1,
                                  use_GUI=True,
                                  num_episode_steps=5)
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
vec_env = DummyVecEnv([lambda: raw_env])

dir_name = "results\LearningResults\PPO_WheeledRobotPybullet"
tensorboard_dir = dir_name + "\\tensorboard"
model_dir = dir_name + "\\model"
model = PPO2.load(model_dir, vec_env)
# model.learn(total_timesteps=100, tb_log_name="test")
# model.save(model_dir)

env = vec_env.envs[0]
obs_prev = env.reset()

x_poss = [env.snake_robot.x]
y_poss = [env.snake_robot.y]
thetas = [env.snake_robot.theta]
times = [0]
a1s = [env.snake_robot.a1]
a2s = [env.snake_robot.a2]
a1dots = [env.snake_robot.a1dot]
a2dots = [env.snake_robot.a2dot]
# robot_params = []
コード例 #2
0
ファイル: train.py プロジェクト: jingGM/vizdoom
 def load(self, path=None):
     if path is None:
         path = self.model_path
     model = PPO2.load(path, env=self.env)
コード例 #3
0
def run_ppo(config,
            state_collector,
            agent_name="ppo_99_8507750",
            policy="CnnPolicy",
            mode="train",
            task_mode="static",
            stack_offset=15,
            num_stacks=1,
            debug=True,
            normalize=True,
            disc_action_space=False,
            ns=""):

    path_to_models = config['PATHES']['path_to_models']

    # Loading agent
    model = PPO2.load("%s/%s/%s.pkl" %
                      (path_to_models, agent_name, agent_name))

    print("Loaded %s" % agent_name)
    print("--------------------------------------------------")
    print("Normalize: ", normalize)
    print("Policy: %s" % policy)
    print("Discrete action space: ", disc_action_space)
    print("Observation space size: ", model.observation_space.shape)
    print("Debug: ", debug)
    print("Number of stacks: %d, stack offset: %d" %
          (model.observation_space.shape[2], stack_offset))
    print("\n")

    #Loading environment
    env = load_train_env(ns, state_collector, 0.46, 19, num_stacks,
                         stack_offset, debug, task_mode, mode, policy,
                         disc_action_space, normalize)

    # Resetting environment
    if mode == "train" or mode == "eval":
        obs = env.reset()
    if mode == "exec" or mode == "exec_rw":
        if disc_action_space:
            obs, rewards, dones, info = env.step([5])
        else:
            obs, rewards, dones, info = env.step([[0.0, 0.0]])

    if debug:
        #Cummulative reward.
        cum_reward = 0
    while True:
        #Determining action for given observation
        action, _states = model.predict(obs)

        # Clipping actions
        if not disc_action_space:
            action = np.maximum(np.minimum(model.action_space.high, action),
                                model.action_space.low)

        #Executing action in environment
        obs, rewards, dones, info = env.step(action)

        if debug:
            cum_reward += rewards

            # Episode over?
            if dones:
                print("Episode finished with reward of %f." % cum_reward)
                cum_reward = 0

        time.sleep(0.0001)
        if rospy.is_shutdown():
            print('shutdown')
            break
コード例 #4
0
        S = s_Dyymmdd_HHMM
    """
    import datetime
    date = str(datetime.datetime.now())
    date = date[2:4] + date[5:7] + date[8:10] + '_' + date[11:13] + date[14:16] + date[17:19]
    return s + '_D' + date


raw_env = IdealFluidSwimmerWithSpringEnv()
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
vec_env = DummyVecEnv([lambda: raw_env])

results_dir = '../results/PPO_IdealFluidSwimmerWithSpringFixedA1ddot/trial_1.22_D210122_163220'
model_path = os.path.join(results_dir, "model")
model = PPO2.load(model_path, vec_env)

env = vec_env.envs[0]
obs_prev = env.reset()

x_poss = [env.snake_robot.x]
y_poss = [env.snake_robot.y]
thetas = [env.snake_robot.theta]
times = [0]
a1s = [env.snake_robot.a1]
a2s = [env.snake_robot.a2]
a1dots = [env.snake_robot.a1dot]
a2dots = [env.snake_robot.a2dot]
a1ddots = [env.snake_robot.a1ddot]
ks = [env.snake_robot.k]
cs = [env.snake_robot.c]