epochs = rospy.get_param("/ML/epochs") hdim = rospy.get_param("/ML/hdim") policy_lr = rospy.get_param("/ML/policy_lr") value_lr = rospy.get_param("/ML/value_lr") max_std = rospy.get_param("/ML/max_std") clip_range = rospy.get_param("/ML/clip_range") n_step = rospy.get_param("/ML/n_step") gamma = rospy.get_param("/ML/gamma") lam = rospy.get_param("/ML/lam") episode_size = rospy.get_param("/ML/episode_size") batch_size = rospy.get_param("/ML/batch_size") nupdates = rospy.get_param("/ML/nupdates") maxlen_num = rospy.get_param("/ML/maxlen_num") agent = PPOGAEAgent(obs_dim, n_act, epochs, hdim, policy_lr, value_lr, max_std, clip_range, seed) #agent = PPOGAEAgent(obs_dim, n_act, epochs=10, hdim=obs_dim, policy_lr=3e-3, value_lr=1e-3, max_std=1.0, clip_range=0.2, seed=seed) ''' PPO Agent with Gaussian policy ''' def run_episode(env, animate=False): # Run policy and collect (state, action, reward) pairs obs = env.reset() observes, actions, rewards, infos = [], [], [], [] done = False for update in range(n_step): print("update", update) obs = np.array(obs) obs = obs.astype(np.float32).reshape((1, -1)) # numpy.ndarray (1, num_obs) observes.append(obs)
# import our training environment import gym from env.ur_door_opening_env import URSimDoorOpening # import our training algorithms from algorithm.ppo_gae import PPOGAEAgent seed = 0 obs_dim = 21 # env.observation_space.shape[0] # have to change number of hdim n_act = 6 #config: act_dim #env.action_space.n agent = PPOGAEAgent(obs_dim, n_act, epochs=10, hdim=64, policy_lr=1e-4, value_lr=1e-4, max_std=1.0, clip_range=0.2, seed=seed) #agent = PPOGAEAgent(obs_dim, n_act, epochs=10, hdim=obs_dim, policy_lr=3e-3, value_lr=1e-3, max_std=1.0, clip_range=0.2, seed=seed) ''' PPO Agent with Gaussian policy ''' def run_episode( env, animate=False): # Run policy and collect (state, action, reward) pairs obs = env.reset() observes, actions, rewards, infos = [], [], [], []