def test(args=get_args()): torch.set_num_threads(1) # we just need only one thread for NN model_path = os.path.join(args.logdir, args.task, 'ddpg/policy.pth') env = gym_make() policy = load_policy(env, args.layer, model_path) print(env.action_space.low, env.action_space.high) obs = env.reset() while True: action = policy_forward(policy, obs, eps=0.05) print(action) obs, reward, done, info = env.step(action[0]) print(reward) env.render() if done: break
def steer(self, from_node, to_node, t_max_extend=10.0, t_tree=5.0): # using RL policy to steer from_node to to_node parent_node = from_node new_node = self.Node(from_node.state) new_node.parent = parent_node new_node.path.append(new_node.state) state = new_node.state goal = to_node.state.copy() env = self.gym env.state = state env.goal = goal obs = env._obs() dt = 0.2 n_max_extend = round(t_max_extend / dt) n_tree = round(t_tree / dt) new_node_list = [] for n_extend in range(1, n_max_extend + 1): action = policy_forward(self.policy, obs, eps=0.05)[0] # control with RL policy obs, rewards, done, info = env.step(action) state = env.state new_node.state = state new_node.path.append(state) if not env.robot_env.valid_state_check(state): # collision break elif np.linalg.norm(state[:2] - goal[:2]) <= 1.0: # reach new_node_list.append(new_node) break elif n_extend % n_tree == 0: new_node_list.append(new_node) parent_node = new_node new_node = self.Node(parent_node.state) new_node.parent = parent_node new_node.path.append(new_node.state) self.node_list += new_node_list return new_node_list
def test_policy(): # torch.set_num_threads(1) # we just need only one thread for NN model_path = os.path.dirname( __file__) + '/../data/net/end2end/ddpg/policy.pth' test_env1 = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/test_env1.pkl', 'rb')) obs_list = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl', 'rb'))[:1] test_env2 = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/test_env2.pkl', 'rb')) training_env = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/training_env.pkl', 'rb')) env = DifferentialDriveGym(obs_list_list=[training_env]) env.reset() policy = load_policy(env, [1024, 512, 512, 512], model_path) obs = env.reset() fig, ax = plt.subplots() while True: action = policy_forward(policy, obs, eps=0.05) print(action) obs, reward, done, info = env.step(action[0]) print(reward) env.render() if done: break