class EnvPollos(Env): def __init__(self, ep_length=100): """ Pollos environment for testing purposes :param dim: (int) the size of the dimensions you want to learn :param ep_length: (int) the length of each episodes in timesteps """ self.vrep = vrep logging.basicConfig(level=logging.DEBUG) # they have to be simmetric. We interpret actions as angular increments #self.action_space = Box(low=np.array([-0.1, -1.7, -2.5, -1.5, 0.0, 0.0]), # high=np.array([0.1, 1.7, 2.5, 1.5, 0.0, 0.0])) inc = 0.1 self.action_space = Box(low=np.array([-inc, -inc, -inc, -inc, 0, -inc]), high=np.array([inc, inc, inc, inc, 0, inc])) self.observation_space = Box(low=np.array([-0.5, 0.0, -0.2]), high=np.array([0.5, 1.0, 1.0])) # self.pr = PyRep() self.pr.launch(SCENE_FILE, headless=False) self.pr.start() self.agent = UR10() self.agent.max_velocity = 1 self.agent.set_control_loop_enabled(True) self.agent.set_motor_locked_at_zero_velocity(True) self.joints = [ Joint('UR10_joint1'), Joint('UR10_joint2'), Joint('UR10_joint3'), Joint('UR10_joint4'), Joint('UR10_joint5'), Joint('UR10_joint6') ] #self.joints_limits = [[j.get_joint_interval()[1][0],j.get_joint_interval()[1][0]+j.get_joint_interval()[1][1]] # for j in self.joints] self.high_joints_limits = [0.1, 1.7, 2.7, 0.0, 0.02, 0.3] self.low_joints_limits = [-0.1, -0.2, 0.0, -1.5, -0.02, -0.5] #self.agent_hand = Shape('hand') self.initial_agent_tip_position = self.agent.get_tip().get_position() self.initial_agent_tip_quaternion = self.agent.get_tip( ).get_quaternion() self.initial_agent_tip_euler = self.agent.get_tip().get_orientation() self.target = Dummy('UR10_target') self.initial_target_orientation = self.target.get_orientation() self.initial_joint_positions = self.agent.get_joint_positions() self.pollo_target = Dummy('pollo_target') self.pollo = Shape('pollo') #self.table_target = Dummy('table_target') self.initial_pollo_position = self.pollo.get_position() self.initial_pollo_orientation = self.pollo.get_quaternion() #self.table_target = Dummy('table_target') #self.succion = Shape('suctionPad') self.waypoints = [Dummy('dummy_gancho%d' % i) for i in range(4)] # objects #self.scene_objects = [Shape('table0'), Shape('Plane'), Shape('Plane0'), Shape('ConcretBlock')] #self.agent_tip = self.agent.get_tip() self.initial_distance = np.linalg.norm( np.array(self.initial_pollo_position) - np.array(self.initial_agent_tip_position)) # camera self.camera = VisionSensor('kinect_depth') self.camera_matrix_extrinsics = vrep.simGetObjectMatrix( self.camera.get_handle(), -1) self.np_camera_matrix_extrinsics = np.delete( np.linalg.inv(self.vrepToNP(self.camera_matrix_extrinsics)), 3, 0) width = 640.0 height = 480.0 angle = math.radians(57.0) focalx_px = (width / 2.0) / math.tan(angle / 2.0) focaly_px = (height / 2.0) / math.tan(angle / 2.0) self.np_camera_matrix_intrinsics = np.array([[-focalx_px, 0.0, 320.0], [0.0, -focalx_px, 240.0], [0.0, 0.0, 1.0]]) self.reset() def reset(self): pos = list( np.random.uniform([-0.1, -0.1, 0.0], [0.1, 0.1, 0.1]) + self.initial_pollo_position) self.pollo.set_position(pos) self.pollo.set_quaternion(self.initial_pollo_orientation) self.agent.set_joint_positions(self.initial_joint_positions) self.initial_epoch_time = time.time() while True: # wait for arm to stop self.pr.step() # Step the physics simulation a = self.agent.get_joint_velocities() if not np.any(np.where(np.fabs(a) < 0.1, False, True)): break print("ENV RESET DONE") return self._get_state() def step(self, action): if action is None: self.pr.step() return self._get_state(), 0.0, False, {} # check for nan if np.any(np.isnan(action)): print(action) return self._get_state(), -1.0, False, {} # action[1] = np.interp(action[1], [-1,7,1.7], [-0.2,1.7]) # action[2] = np.interp(action[2], [-2.5,2.5], [0,2.5]) # action[3] = np.interp(action[3], [-1.5,1.5], [-1.5,0]) # add actions as increments to current joints value new_joints = np.array( self.agent.get_joint_positions()) + np.array(action) # check limits if np.any(np.greater(new_joints, self.high_joints_limits)) or np.any( np.less(new_joints, self.low_joints_limits)): logging.debug("New joints value out of limits r=-10") return self._get_state(), -10.0, True, {} # move the robot and wait to stop self.agent.set_joint_target_positions(new_joints) reloj = time.time() while True: # wait for arm to stop self.pr.step() # Step the physics simulation a = self.agent.get_joint_velocities() if not np.any(np.where(np.fabs(a) < 0.1, False, True)) or (time.time() - reloj) > 3: break # compute relevant magnitudes np_pollo_target = np.array(self.pollo_target.get_position()) np_robot_tip_position = np.array(self.agent.get_tip().get_position()) dist = np.linalg.norm(np_pollo_target - np_robot_tip_position) altura = np.clip((np_pollo_target[2] - self.initial_pollo_position[2]), 0, 0.5) for obj in self.scene_objects: # colisiĆ³n con la mesa if self.agent_hand.check_collision(obj): logging.debug("Collision with objects r=-10") return self._get_state(), -10.0, True, {} if altura > 0.3: # pollo en mano logging.debug("Height reached !!! r=0") return self._get_state(), 30.0, True, {} elif dist > self.initial_distance: # mano se aleja logging.debug("Hand moving away from chicken r=-10") return self._get_state(), -10.0, True, {} if time.time() - self.initial_epoch_time > 5: # too much time logging.debug("Time out. End of epoch r=-10") return self._get_state(), -10.0, True, {} # Reward #pollo_height = np.exp(-altura*20) # para 0.3m pollo_height = 0.002, para 0m pollo_height = 1 reward = altura - dist logging.debug("New joints value out of limits r=-10") return self._get_state(), reward, False, {} def close(self): self.pr.stop() self.pr.shutdown() def render(self): print("RENDER") np_pollo_en_camara = self.getPolloEnCamara() # capture depth image depth = self.camera.capture_rgb() circ = plt.Circle( (int(np_pollo_en_camara[0]), int(np_pollo_en_camara[1])), 10) plt.clf() fig = plt.gcf() ax = fig.gca() ax.add_artist(circ) ax.imshow(depth, cmap='hot') plt.pause(0.000001) # Aux # transform env.pollo_target.get_position() to camera coordinates and project pollo_en_camera a image coordinates def getPolloEnCamara(self): np_pollo_target = np.array(self.pollo_target.get_position()) np_pollo_target_cam = self.np_camera_matrix_extrinsics.dot( np.append([np_pollo_target], 1.0)) np_pollo_en_camara = self.np_camera_matrix_intrinsics.dot( np_pollo_target_cam) np_pollo_en_camara = np_pollo_en_camara / np_pollo_en_camara[2] np_pollo_en_camara = np.delete(np_pollo_en_camara, 2) return np_pollo_en_camara def getPolloEnCamaraEx(self): np_pollo_target = np.array(self.pollo_target.get_position()) np_pollo_en_camara = self.np_camera_matrix_extrinsics.dot( np.append([np_pollo_target], 1.0)) return np_pollo_en_camara def _get_state(self): # Return state containing arm joint angles/velocities & target position # return (self.agent.get_joint_positions() + # self.agent.get_joint_velocities() + # self.pollo_target.get_position()) p = self.getPolloEnCamaraEx() j = self.agent.get_joint_positions() #r = np.array([p[0],p[1],p[2],j[0],j[1],j[2],j[3],j[4],j[5]]) r = np.array([p[0], p[1], p[2]]) return r def vrepToNP(self, c): return np.array([[c[0], c[4], c[8], c[3]], [c[1], c[5], c[9], c[7]], [c[2], c[6], c[10], c[11]], [0, 0, 0, 1]])
class PioneerEnv(object): def __init__(self, escene_name='proximity_sensor.ttt', start=[100, 100], goal=[180, 500], rand_area=[100, 450], path_resolution=5.0, margin=0., margin_to_goal=0.5, action_max=[.5, 1.], action_min=[0., 0.], _load_path=True, path_name="PathNodes", type_of_planning="PID", type_replay_buffer="random", max_laser_range=1.0, headless=False): SCENE_FILE = join(dirname(abspath(__file__)), escene_name) self.pr = PyRep() self.pr.launch(SCENE_FILE, headless=headless) self.pr.start() self.agent = Pioneer("Pioneer_p3dx", type_of_planning=type_of_planning, type_replay_buffer=type_replay_buffer) self.agent.set_control_loop_enabled(False) self.initial_joint_positions = self.agent.get_joint_positions() self.initial_position = self.agent.get_position() print(f"Agent initial position: {self.initial_position}") self.vision_map = VisionSensor("VisionMap") self.vision_map.handle_explicitly() self.vision_map_handle = self.vision_map.get_handle() self.floor = Shape("Floor_respondable") self.perspective_angle = self.vision_map.get_perspective_angle # degrees self.resolution = self.vision_map.get_resolution # list [resx, resy] self.margin = margin self.margin_to_goal = margin_to_goal self.rand_area = rand_area self.start = start self.goal = goal self.type_of_planning = type_of_planning self.max_laser_range = max_laser_range self.max_angle_orientation = np.pi scene_image = self.vision_map.capture_rgb() * 255 scene_image = np.flipud(scene_image) self.rew_weights = [1, 10000, 5000] self.start_position = Dummy("Start").get_position() self.goal_position = Dummy("Goal").get_position() # [x, y, z] self.local_goal_aux = Dummy("Local_goal_aux") self.local_goal_aux_pos_list = [[-3.125, 2.175, 0], [2.9, 3.575, 0], self.goal_position, self.goal_position] self.ind_local_goal_aux = 0 self.max_distance = get_distance([-7.5, -4.1, 0.], self.goal_position) self.distance_to_goal_m1 = get_distance(self.start_position, self.goal_position) self.c_lin_vel = self.c_ang_vel = self.b_lin_vel = self.b_ang_vel = 0. self.action_max = action_max self.action_min = action_min if type_of_planning == 'PID': self.planning_info_logger = get_logger("./loggers", "Planning_Info.log") self.image_path = None if exists("./paths/" + path_name + ".npy") and _load_path: print("Load Path..") self.image_path = np.load("./paths/" + path_name + ".npy", allow_pickle=True) else: print("Planning...") self.image_path = self.Planning( scene_image, self.start, self.goal, self.rand_area, path_resolution=path_resolution, logger=self.planning_info_logger) assert self.image_path is not None, "path should not be a Nonetype" self.real_path = self.path_image2real(self.image_path, self.start_position) # project in coppelia sim sim_drawing_points = 0 point_size = 10 #[pix] duplicate_tolerance = 0 parent_obj_handle = self.floor.get_handle() max_iter_count = 999999 ambient_diffuse = (255, 0, 0) blue_ambient_diffuse = (0, 0, 255) point_container = sim.simAddDrawingObject( sim_drawing_points, point_size, duplicate_tolerance, parent_obj_handle, max_iter_count, ambient_diffuse=ambient_diffuse) local_point_container = sim.simAddDrawingObject( sim_drawing_points, point_size, duplicate_tolerance, parent_obj_handle, max_iter_count, ambient_diffuse=blue_ambient_diffuse) # debug for point in self.real_path: point_data = (point[0], point[1], 0) sim.simAddDrawingObjectItem(point_container, point_data) # You need to get the real coord in the real world assert local_point_container is not None, "point container shouldn't be empty" self.agent.load_point_container(local_point_container) self.agent.load_path(self.real_path) def reset(self): self.pr.stop() if self.type_of_planning == 'PID': self.agent.local_goal_reset() self.pr.start() self.pr.step() sensor_state, distance_to_goal, orientation_to_goal = self._get_state() self.distance_to_goal_m1 = distance_to_goal self.orientation_to_goal_m1 = orientation_to_goal sensor_state_n, distance_to_goal, orientation_to_goal = self.normalize_states( sensor_state, distance_to_goal, orientation_to_goal) # pos_x, pos_y = self.agent.get_position()[:-1] return np.hstack((sensor_state_n, distance_to_goal, orientation_to_goal)), sensor_state def step(self, action, pf_f=0): self.agent.set_joint_target_velocities(action) self.pr.step() # Step the physics simulation scene_image = self.vision_map.capture_rgb() * 255 # numpy -> [w, h, 3] sensor_state, distance_to_goal, orientation_to_goal = self._get_state() self.b_lin_vel, self.b_ang_vel = self.c_lin_vel, self.c_ang_vel self.c_lin_vel, self.c_ang_vel = self.agent.get_velocity( ) # 3d coordinates self.c_lin_vel = np.linalg.norm(self.c_lin_vel) self.c_ang_vel = self.c_ang_vel[-1] # w/r z reward, done = self._get_reward(sensor_state, distance_to_goal, orientation_to_goal, pf_f) sensor_state_n, distance_to_goal, orientation_to_goal = self.normalize_states( sensor_state, distance_to_goal, orientation_to_goal) # pos_x, pos_y = self.agent.get_position()[:-1] state = np.hstack( (sensor_state_n, distance_to_goal, orientation_to_goal)) return state, reward, scene_image, done, sensor_state def _get_state(self): sensor_state = np.array([ proxy_sensor.read() for proxy_sensor in self.agent.proximity_sensors ]) # list of distances. -1 if not detect anything goal_transformed = world_to_robot_frame( self.agent.get_position(), self.goal_position, self.agent.get_orientation()[-1]) # distance_to_goal = get_distance(goal_transformed[:-1], np.array([0,0])) # robot frame distance_to_goal = get_distance(self.agent.get_position()[:-1], self.goal_position[:-1]) orientation_to_goal = np.arctan2(goal_transformed[1], goal_transformed[0]) return sensor_state, distance_to_goal, orientation_to_goal def _get_reward(self, sensor_state, distance_to_goal, orientation_to_goal, pf_f=0): done = False r_local_goal = self.update_local_goal_aux() r_target = -(distance_to_goal - self.distance_to_goal_m1) r_vel = self.c_lin_vel / self.action_max[0] # r_orientation = - (orientation_to_goal - self.orientation_to_goal_m1) reward = self.rew_weights[0] * (r_local_goal + r_vel) # - pf_f/20) # distance_to_goal = get_distance(agent_position[:-1], goal[:-1]) self.distance_to_goal_m1 = distance_to_goal self.orientation_to_goal_m1 = orientation_to_goal # collision check if self.collision_check(sensor_state, self.margin): reward = -1 * self.rew_weights[1] done = True # goal achievement if distance_to_goal < self.margin_to_goal: reward = self.rew_weights[2] done = True return reward, done def shutdown(self): self.pr.stop() self.pr.shutdown() def model_update(self, method="DDPG", pretraining_loop=False): if method == "DDPG": self.agent.trainer.update() elif method == "IL": loss = self.agent.trainer.IL_update() return loss elif method == "CoL": loss = self.agent.trainer.CoL_update(pretraining_loop) else: raise NotImplementedError() def normalize_states(self, sensor_state, distance_to_goal, orientation_to_goal): sensor_state = self.normalize_laser(sensor_state, self.norm_func) distance_to_goal = self.norm_func(distance_to_goal, self.max_distance) orientation_to_goal = self.norm_func(orientation_to_goal, self.max_angle_orientation) return sensor_state, distance_to_goal, orientation_to_goal def normalize_laser(self, obs, norm_func): return np.array([ norm_func(o, self.max_laser_range) if o >= 0 else -1 for o in obs ]) def norm_func(self, x, max_value): return 2 * (1 - min(x, max_value) / max_value) - 1 def set_margin(self, margin): self.margin = margin # def set_start_goal_position(self, start_pos, goal_pos): def update_local_goal_aux(self): pos = self.agent.get_position()[:-1] distance = get_distance(pos, self.local_goal_aux.get_position()[:-1]) if distance < 0.5: self.local_goal_aux.set_position( self.local_goal_aux_pos_list[self.ind_local_goal_aux]) self.ind_local_goal_aux += 1 return -1 * distance**2 @staticmethod def collision_check(observations, margin): if observations.sum() == -1 * observations.shape[0]: return False elif observations[observations >= 0].min() < margin: return True else: return False @staticmethod def Planning(Map, start, goal, rand_area, path_resolution=5.0, logger=None): """ :parameter Map(ndarray): Image that planning over with """ Map = Image.fromarray(Map.astype(np.uint8)).convert('L') path, n_paths = main(Map, start, goal, rand_area, path_resolution=path_resolution, logger=logger, show_animation=False) if path is not None: np.save("./paths/PathNodes_" + str(n_paths) + ".npy", path) return path else: logger.info("Not found Path") return None @staticmethod def path_image2real(image_path, start): """ image_path: np.array[pix] points of the image path start_array: np.array """ scale = 13.0 / 512 # [m/pix] x_init = start[0] y_init = start[1] deltas = [(image_path[i + 1][0] - image_path[i][0], image_path[i + 1][1] - image_path[i][1]) for i in range(image_path.shape[0] - 1)] path = np.zeros((image_path.shape[0], 3)) path[0, :] = np.array([x_init, y_init, 0]) for i in range(1, image_path.shape[0]): path[i, :] = np.array([ path[i - 1, 0] + deltas[i - 1][0] * scale, path[i - 1, 1] - deltas[i - 1][1] * scale, 0 ]) rot_mat = np.diagflat([-1, -1, 1]) tras_mat = np.zeros_like(path) tras_mat[:, 0] = np.ones_like(path.shape[0]) * 0.3 tras_mat[:, 1] = np.ones_like(path.shape[0]) * 4.65 path = path @ rot_mat + tras_mat return np.flip(path, axis=0)
class EnvPollos(Env): def __init__(self, ep_length=100): """ Pollos environment for testing purposes :param dim: (int) the size of the dimensions you want to learn :param ep_length: (int) the length of each episodes in timesteps """ logging.basicConfig(level=logging.DEBUG) # self.pr = PyRep() self.pr.launch(SCENE_FILE, headless=False) self.pr.start() self.agent = UR10() self.agent.max_velocity = 0.8 self.agent.set_control_loop_enabled(True) #self.agent.set_motor_locked_at_zero_velocity(True) self.MAX_INC = 0.2 self.joints = [ Joint('UR10_joint1'), Joint('UR10_joint2'), Joint('UR10_joint3'), Joint('UR10_joint4'), Joint('UR10_joint5'), Joint('UR10_joint6') ] #self.joints_limits = [[j.get_joint_interval()[1][0],j.get_joint_interval()[1][0]+j.get_joint_interval()[1][1]] # for j in self.joints] self.high_joints_limits = [0.1, 1.7, 2.7, 0.0, 0.02, 0.3] self.low_joints_limits = [-0.1, -0.2, 0.0, -1.5, -0.02, -0.5] self.initial_joint_positions = self.agent.get_joint_positions() self.agent_hand = Shape('hand') self.initial_agent_tip_position = self.agent.get_tip().get_position() self.initial_agent_tip_quaternion = self.agent.get_tip( ).get_quaternion() self.target = Dummy('UR10_target') self.pollo_target = Dummy('pollo_target') self.pollo = Shape('pollo') self.initial_pollo_position = self.pollo.get_position() self.initial_pollo_orientation = self.pollo.get_quaternion() self.table_target = Dummy('table_target') self.table_target = Dummy('table_target') # objects to check collisions self.scene_objects = [ Shape('table0'), Shape('Plane'), Shape('Plane0'), Shape('ConcretBlock') ] self.initial_distance = np.linalg.norm( np.array(self.initial_pollo_position) - np.array(self.initial_agent_tip_position)) # camera self.camera = VisionSensor('kinect_depth') self.camera_matrix_extrinsics = vrep.simGetObjectMatrix( self.camera.get_handle(), -1) self.np_camera_matrix_extrinsics = np.delete( np.linalg.inv(self.vrepToNP(self.camera_matrix_extrinsics)), 3, 0) width = 640.0 height = 480.0 angle = math.radians(57.0) focalx_px = (width / 2.0) / math.tan(angle / 2.0) focaly_px = (height / 2.0) / math.tan(angle / 2.0) self.np_camera_matrix_intrinsics = np.array([[-focalx_px, 0.0, 320.0], [0.0, -focalx_px, 240.0], [0.0, 0.0, 1.0]]) self.reset() def reset(self): pos = list( np.random.uniform([-0.1, -0.1, 0.0], [0.1, 0.1, 0.1]) + self.initial_pollo_position) self.pollo.set_position(pos) self.pollo.set_quaternion(self.initial_pollo_orientation) self.agent.set_joint_positions(self.initial_joint_positions) self.initial_epoch_time = time.time() while True: # wait for arm to stop self.pr.step() # Step the physics simulation a = self.agent.get_joint_velocities() if not np.any(np.where(np.fabs(a) < 0.1, False, True)): break return self._get_state() def step(self, action): if action is None: print(self.total_reward) return self._get_state(), -10.0, True, {} # check for nan if np.any(np.isnan(action)): print("NAN values ", action) self.NANS_COMING = True return self._get_state(), -10.0, True, {} # check for strange values # if np.any(np.greater(action, self.MAX_INC)) or np.any(np.less(action, -self.MAX_INC)): # print("Strange values ", action) # self.NANS_COMING = True # return self._get_state(), -10.0, True, {} # check for NAN in VREP get_position() if np.any(np.isnan(self.agent.get_tip().get_position())): print("NAN values in get_position()", action, self.agent.get_tip().get_position()) return self._get_state(), -10.0, True, {} self.pr.step() return self._get_state(), 0, True, {} def close(self): self.pr.stop() self.pr.shutdown() def render(self): print("RENDER") np_pollo_en_camara = self.getPolloEnCamara() # capture depth image depth = self.camera.capture_rgb() circ = plt.Circle( (int(np_pollo_en_camara[0]), int(np_pollo_en_camara[1])), 10) plt.clf() fig = plt.gcf() ax = fig.gca() ax.add_artist(circ) ax.imshow(depth, cmap='hot') plt.pause(0.000001) # Aux # transform env.pollo_target.get_position() to camera coordinates and project pollo_en_camera a image coordinates def getPolloEnCamara(self): np_pollo_target = np.array(self.pollo_target.get_position()) np_pollo_target_cam = self.np_camera_matrix_extrinsics.dot( np.append([np_pollo_target], 1.0)) np_pollo_en_camara = self.np_camera_matrix_intrinsics.dot( np_pollo_target_cam) np_pollo_en_camara = np_pollo_en_camara / np_pollo_en_camara[2] np_pollo_en_camara = np.delete(np_pollo_en_camara, 2) return np_pollo_en_camara def getPolloEnCamaraEx(self): np_pollo_target = np.array(self.pollo_target.get_position()) np_pollo_en_camara = self.np_camera_matrix_extrinsics.dot( np.append([np_pollo_target], 1.0)) return np_pollo_en_camara def _get_state(self): # Return state containing arm joint angles/velocities & target position # return (self.agent.get_joint_positions() + # self.agent.get_joint_velocities() + # self.pollo_target.get_position()) p = self.getPolloEnCamaraEx() j = self.agent.get_joint_positions() #r = np.array([p[0],p[1],p[2],j[0],j[1],j[2],j[3],j[4],j[5]]) r = np.array([p[0], p[1], p[2]]) return r def vrepToNP(self, c): return np.array([[c[0], c[4], c[8], c[3]], [c[1], c[5], c[9], c[7]], [c[2], c[6], c[10], c[11]], [0, 0, 0, 1]])
class EnvPollos(Env): def __init__(self, ep_length=100): """ Pollos environment for testing purposes :param dim: (int) the size of the dimensions you want to learn :param ep_length: (int) the length of each episodes in timesteps """ # 5 points in 3D space forming the trajectory self.action_space = Box(low=-0.3, high=0.3, shape=(5,3), dtype=np.float32) self.observation_space = Box(low=np.array([-0.1, -0.2, 0, -1.5, 0.0, 0.0, -0.5, -0.2, -0.2]), high=np.array([0.1, 1.7, 2.5, 0, 0.0, 0.0, 0.5, 0.2, 1.0])) # self.pr = PyRep() self.pr.launch(SCENE_FILE, headless=False) self.pr.start() self.agent = UR10() self.agent.max_velocity = 1.2 self.joints = [Joint('UR10_joint1'), Joint('UR10_joint2'), Joint('UR10_joint3'), Joint('UR10_joint4'), Joint('UR10_joint5'), Joint('UR10_joint6')] self.joints_limits = [[j.get_joint_interval()[1][0],j.get_joint_interval()[1][0]+j.get_joint_interval()[1][1]] for j in self.joints] print(self.joints_limits) self.agent_hand = Shape('hand') self.agent.set_control_loop_enabled(True) self.agent.set_motor_locked_at_zero_velocity(True) self.initial_agent_tip_position = self.agent.get_tip().get_position() self.initial_agent_tip_quaternion = self.agent.get_tip().get_quaternion() self.target = Dummy('UR10_target') self.initial_joint_positions = self.agent.get_joint_positions() self.pollo_target = Dummy('pollo_target') self.pollo = Shape('pollo') self.table_target = Dummy('table_target') self.initial_pollo_position = self.pollo.get_position() self.initial_pollo_orientation = self.pollo.get_quaternion() self.table_target = Dummy('table_target') # objects self.scene_objects = [Shape('table0'), Shape('Plane'), Shape('Plane0'), Shape('ConcretBlock')] #self.agent_tip = self.agent.get_tip() self.initial_distance = np.linalg.norm(np.array(self.initial_pollo_position)-np.array(self.initial_agent_tip_position)) # camera self.camera = VisionSensor('kinect_depth') self.camera_matrix_extrinsics = vrep.simGetObjectMatrix(self.camera.get_handle(),-1) self.np_camera_matrix_extrinsics = np.delete(np.linalg.inv(self.vrepToNP(self.camera_matrix_extrinsics)), 3, 0) width = 640.0 height = 480.0 angle = math.radians(57.0) focalx_px = (width/2.0) / math.tan(angle/2.0) focaly_px = (height/2.0) / math.tan(angle/2.0) self.np_camera_matrix_intrinsics = np.array([[-focalx_px, 0.0, 320.0], [0.0, -focalx_px, 240.0], [0.0, 0.0, 1.0]]) self.reset() def reset(self): pos = list(np.random.uniform( [-0.1, -0.1, 0.0], [0.1, 0.1, 0.1]) + self.initial_pollo_position) self.pollo.set_position(pos) self.pollo.set_quaternion(self.initial_pollo_orientation) self.agent.set_joint_positions(self.initial_joint_positions) self.initial_epoch_time = time.time() while True: # wait for arm to stop self.pr.step() # Step the physics simulation a = self.agent.get_joint_velocities() if not np.any(np.where( np.fabs(a) < 0.1, False, True )): break return self._get_state() def step(self, action): print(action.shape, action) if action is None: self.pr.step() return self._get_state(), 0.0, False, {} if np.any(np.isnan(action)): print(action) return self._get_state(), -1.0, False, {} # create a path with tip and pollo at its endpoints and 5 adjustable points in the middle np_pollo_target = np.array(self.pollo_target.get_position()) np_robot_tip_position = np.array(self.agent.get_tip().get_position()) np_robot_tip_orientation = np.array(self.agent.get_tip().get_orientation()) c_path = CartesianPath.create() c_path.insert_control_points(action[4]) # point after pollo to secure the grasp c_path.insert_control_points([list(np_pollo_target) + list(np_robot_tip_orientation)]) # pollo c_path.insert_control_points(action[0:3]) c_path.insert_control_points([list(np_robot_tip_position) + list(np_robot_tip_orientation)]) # robot hand try: self.path = self.agent.get_path_from_cartesian_path(c_path) except IKError as e: print('Agent::grasp Could not find joint values') return self._get_state(), -1, True, {} # go through path reloj = time.time() while self.path.step(): self.pr.step() # Step the physics simulation if (time.time()-reloj) > 4: return self._get_state(), -10.0, True, {} # Too much time if any((self.agent_hand.check_collision(obj) for obj in self.scene_objects)): # colisiĆ³n con la mesa return self._get_state(), -10.0, True, {} # path ok, compute reward np_pollo_target = np.array(self.pollo_target.get_position()) np_robot_tip_position = np.array(self.agent.get_tip().get_position()) dist = np.linalg.norm(np_pollo_target-np_robot_tip_position) altura = np.clip((np_pollo_target[2] - self.initial_pollo_position[2]), 0, 0.5) if altura > 0.3: # pollo en mano return self._get_state(), altura, True, {} elif dist > self.initial_distance: # mano se aleja return self._get_state(), -10.0, True, {} if time.time() - self.initial_epoch_time > 5: # too much time return self._get_state(), -10.0, True, {} # Reward pollo_height = np.exp(-altura*10) # para 1m pollo_height = 0.001, para 0m pollo_height = 1 reward = - pollo_height - dist return self._get_state(), reward, False, {} def close(self): self.pr.stop() self.pr.shutdown() def render(self): print("RENDER") np_pollo_en_camara = self.getPolloEnCamara() # capture depth image depth = self.camera.capture_rgb() circ = plt.Circle((int(np_pollo_en_camara[0]), int(np_pollo_en_camara[1])),10) plt.clf() fig = plt.gcf() ax = fig.gca() ax.add_artist(circ) ax.imshow(depth, cmap = 'hot') plt.pause(0.000001) # Aux # transform env.pollo_target.get_position() to camera coordinates and project pollo_en_camera a image coordinates def getPolloEnCamara(self): np_pollo_target = np.array(self.pollo_target.get_position()) np_pollo_target_cam = self.np_camera_matrix_extrinsics.dot(np.append([np_pollo_target],1.0)) np_pollo_en_camara = self.np_camera_matrix_intrinsics.dot(np_pollo_target_cam) np_pollo_en_camara = np_pollo_en_camara / np_pollo_en_camara[2] np_pollo_en_camara = np.delete(np_pollo_en_camara,2) return np_pollo_en_camara def getPolloEnCamaraEx(self): np_pollo_target = np.array(self.pollo_target.get_position()) np_pollo_en_camara = self.np_camera_matrix_extrinsics.dot(np.append([np_pollo_target],1.0)) return np_pollo_en_camara def _get_state(self): # Return state containing arm joint angles/velocities & target position # return (self.agent.get_joint_positions() + # self.agent.get_joint_velocities() + # self.pollo_target.get_position()) p = self.getPolloEnCamaraEx() j = self.agent.get_joint_positions() r = np.array([j[0],j[1],j[2],j[3],j[4],j[5],p[0],p[1],p[2]]) return r def vrepToNP(self, c): return np.array([[c[0],c[4],c[8],c[3]], [c[1],c[5],c[9],c[7]], [c[2],c[6],c[10],c[11]], [0, 0, 0, 1]])