from envs.WheeledRobotPybulletEnv import WheeledRobotPybulletEnv from stable_baselines.ppo2.ppo2 import PPO2 from stable_baselines.common.vec_env import DummyVecEnv import matplotlib.pyplot as plt raw_env = WheeledRobotPybulletEnv(decision_interval=1, use_GUI=True, num_episode_steps=5) # Optional: PPO2 requires a vectorized environment to run # the env is now wrapped automatically when passing it to the constructor vec_env = DummyVecEnv([lambda: raw_env]) dir_name = "results\LearningResults\PPO_WheeledRobotPybullet" tensorboard_dir = dir_name + "\\tensorboard" model_dir = dir_name + "\\model" model = PPO2.load(model_dir, vec_env) # model.learn(total_timesteps=100, tb_log_name="test") # model.save(model_dir) env = vec_env.envs[0] obs_prev = env.reset() x_poss = [env.snake_robot.x] y_poss = [env.snake_robot.y] thetas = [env.snake_robot.theta] times = [0] a1s = [env.snake_robot.a1] a2s = [env.snake_robot.a2] a1dots = [env.snake_robot.a1dot] a2dots = [env.snake_robot.a2dot] # robot_params = []
def load(self, path=None): if path is None: path = self.model_path model = PPO2.load(path, env=self.env)
def run_ppo(config, state_collector, agent_name="ppo_99_8507750", policy="CnnPolicy", mode="train", task_mode="static", stack_offset=15, num_stacks=1, debug=True, normalize=True, disc_action_space=False, ns=""): path_to_models = config['PATHES']['path_to_models'] # Loading agent model = PPO2.load("%s/%s/%s.pkl" % (path_to_models, agent_name, agent_name)) print("Loaded %s" % agent_name) print("--------------------------------------------------") print("Normalize: ", normalize) print("Policy: %s" % policy) print("Discrete action space: ", disc_action_space) print("Observation space size: ", model.observation_space.shape) print("Debug: ", debug) print("Number of stacks: %d, stack offset: %d" % (model.observation_space.shape[2], stack_offset)) print("\n") #Loading environment env = load_train_env(ns, state_collector, 0.46, 19, num_stacks, stack_offset, debug, task_mode, mode, policy, disc_action_space, normalize) # Resetting environment if mode == "train" or mode == "eval": obs = env.reset() if mode == "exec" or mode == "exec_rw": if disc_action_space: obs, rewards, dones, info = env.step([5]) else: obs, rewards, dones, info = env.step([[0.0, 0.0]]) if debug: #Cummulative reward. cum_reward = 0 while True: #Determining action for given observation action, _states = model.predict(obs) # Clipping actions if not disc_action_space: action = np.maximum(np.minimum(model.action_space.high, action), model.action_space.low) #Executing action in environment obs, rewards, dones, info = env.step(action) if debug: cum_reward += rewards # Episode over? if dones: print("Episode finished with reward of %f." % cum_reward) cum_reward = 0 time.sleep(0.0001) if rospy.is_shutdown(): print('shutdown') break
S = s_Dyymmdd_HHMM """ import datetime date = str(datetime.datetime.now()) date = date[2:4] + date[5:7] + date[8:10] + '_' + date[11:13] + date[14:16] + date[17:19] return s + '_D' + date raw_env = IdealFluidSwimmerWithSpringEnv() # Optional: PPO2 requires a vectorized environment to run # the env is now wrapped automatically when passing it to the constructor vec_env = DummyVecEnv([lambda: raw_env]) results_dir = '../results/PPO_IdealFluidSwimmerWithSpringFixedA1ddot/trial_1.22_D210122_163220' model_path = os.path.join(results_dir, "model") model = PPO2.load(model_path, vec_env) env = vec_env.envs[0] obs_prev = env.reset() x_poss = [env.snake_robot.x] y_poss = [env.snake_robot.y] thetas = [env.snake_robot.theta] times = [0] a1s = [env.snake_robot.a1] a2s = [env.snake_robot.a2] a1dots = [env.snake_robot.a1dot] a2dots = [env.snake_robot.a2dot] a1ddots = [env.snake_robot.a1ddot] ks = [env.snake_robot.k] cs = [env.snake_robot.c]