LOOPS = 100 SCENE_FILE = join(dirname(abspath(__file__)), 'reacher_v2.ttt') pr = PyRep() pr.launch(SCENE_FILE, headless=True) pr.start() #agent = pr.get_arm(Panda) # We could have made this target in the scene, but lets create one dynamically #target = pr.create_primitive(type=pr.primitive_type_sphere, # size=[0.05, 0.05, 0.05], # color=[1.0, 0.1, 0.1], # static=True, respondable=False) position_min, position_max = [0.8, -0.2, 1.0], [1.0, 0.2, 1.4] #starting_joint_positions = agent.get_joint_positions() joint1 = pr.get_joint('link_1') joint1.set_control_loop_enabled(0) for i in range(LOOPS): # Reset the arm at the start of each 'episode' #agent.set_joint_positions(starting_joint_positions) # Get a random position within a cuboid and set the target position # pos = np.random.uniform(position_min, position_max).tolist() # target.set_position(pos) # Get a path to the target (rotate so z points down) # path = agent.get_path(position=pos, orientation=[0, math.radians(180), 0]) # if path is None:
class environment(object): def __init__(self, continuous_control=False): self.pr = PyRep() SCENE_FILE = join(dirname(abspath(__file__)), 'reacher_v2.ttt') self.pr.launch(SCENE_FILE, headless=True) self.pr.start() self.reached = 0 self.done = False self.continuous_control = continuous_control self.target = self.pr.get_object('target') self.end_effector = self.pr.get_dummy('end_effector') self.joint1 = self.pr.get_joint('link_1') self.joint2 = self.pr.get_joint('link_2') self.reacher = self.pr.get_object('reacher') self.camera = self.pr.get_vision_sensor('Vision_sensor') self.increment = 4 * pi / 180 # to radians self.action_all = [[self.increment, self.increment], [-self.increment, -self.increment], [0, self.increment], [0, -self.increment], [self.increment, 0], [-self.increment, 0], [-self.increment, self.increment], [self.increment, -self.increment]] def threshold_check(self): for _ in range(5): self.reset_target_position(random_=True) while True: self.pr.step() ee_pos = self.end_effector_pos() dist_ee_target = sqrt((ee_pos[0] - self.target_pos[0])**2 + \ (ee_pos[1] - self.target_pos[1])**2) if dist_ee_target < 0.018: reward = 1 print('TARGET REACHED') self.done = True break else: reward = -dist_ee_target / 10 print('Reward:%s' % reward) def render(self): img = self.camera.capture_rgb() return img * 256 def get_obs(self): joints_pos = self.get_joints_pos() cos_joints = [] sin_joints = [] for _, theta in enumerate(joints_pos): cos_joints.append(cos(theta)) sin_joints.append(sin(theta)) joints_vel = self.get_joints_vel() target_pos = self.target_position() ee_pos = self.end_effector_pos() targ_vec = np.array(ee_pos) - np.array(target_pos) obs = np.concatenate( (cos_joints, sin_joints, joints_pos, joints_vel, targ_vec[0:2]), axis=0) return obs def step_(self, action): #TODO: change directly from pos to vel without changing scene for action_rep in range(4): if self.continuous_control == True: self.joint1.set_joint_target_velocity(action[0]) # radians/s self.joint2.set_joint_target_velocity(action[1]) else: position_all = self.action_all[action] joints_pos = self.get_joints_pos() joint1_pos = joints_pos[0] joint2_pos = joints_pos[1] self.joint1.set_joint_target_position( joint1_pos + position_all[0]) # radians self.joint2.set_joint_target_position(joint2_pos + position_all[1]) self.pr.step() ee_pos = self.end_effector_pos() dist_ee_target = sqrt((ee_pos[0] - self.target_pos[0])**2 + \ (ee_pos[1] - self.target_pos[1])**2) if dist_ee_target < 0.018: reward = 1 self.done = True else: reward = -dist_ee_target / 10 return reward, self.done def end_effector_pos(self): return self.end_effector.get_position() def target_position(self): return self.target.get_position() def get_joints_pos(self): self.joint1_pos = self.joint1.get_joint_position() self.joint2_pos = self.joint2.get_joint_position() return [self.joint1_pos, self.joint2_pos] def get_joints_vel(self): self.joint1_vel = self.joint1.get_joint_velocity() self.joint2_vel = self.joint2.get_joint_velocity() return [self.joint1_vel, self.joint2_vel] def reset_target_position(self, random_=False, x=0.1, y=0.1): if random_ == True: xy_min = 0.04 xy_max = 0.1775 x = random.random() * (xy_max - xy_min) + xy_min y_max = sqrt(xy_max**2 - x**2) y_min = 0 y = random.random() * (y_max - y_min) + y_min quadrant = random.randint(1, 4) if quadrant == 1: x = -x y = -y elif quadrant == 2: x = -x y = y elif quadrant == 3: x = x y = -y elif quadrant == 4: x = x y = y self.target.set_position([x, y, 0.0275]) # self.pr.step() self.target_pos = self.target_position() self.done = False def reset_robot_position(self, random_=False, joint1_pos=0, joint2_pos=0): if random_ == True: joint1_pos = random.random() * 2 * pi joint2_pos = random.random() * 2 * pi if self.continuous_control: self.joint1.set_joint_target_velocity(0) # radians/s self.joint2.set_joint_target_velocity(0) self.joint1.set_joint_position(joint1_pos, allow_force_mode=True) # radians self.joint2.set_joint_position(joint2_pos, allow_force_mode=True) self.pr.step() def display(self): img = self.camera.capture_rgb() plt.imshow(img, interpolation='nearest') plt.axis('off') plt.show() plt.pause(0.01) def random_agent(self, episodes=10): steps_all = [] for _ in range(episodes): steps = 0 while True: action = random.randrange(len(self.action_all)) reward = self.step_(action) steps += 1 if steps == 40: break if reward == 1: steps_all.append(steps) break return sum(steps_all) / episodes def terminate(self): self.pr.start() # Stop the simulation self.pr.shutdown()