class ScoopEnv: RIGHT = 0 LEFT = 1 CLOSE = 2 OPEN = 3 def __init__(self, port=19997, memory_size=60): rospy.init_node('env', anonymous=True) self.sim_client = utils.connectToSimulation('127.0.0.1', port) # Create UR5 and restart simulator self.rdd = RDD(self.sim_client) self.ur5 = UR5(self.sim_client, self.rdd) self.nA = 4 self.cube = None self.cube_start_position = [-0.2, 0.85, 0.025] self.cube_size = [0.1, 0.2, 0.04] self.open_position = 0.3 self.narrow_position = None self.wide_position = None # self.rdd_position = [0 for _ in range(2 * memory_size)] # self.rdd_force = [0 for _ in range(2 * memory_size)] self.narrow_p = [] # self.narrow_t = [0 for _ in range(memory_size)] self.rdd_sub = rospy.Subscriber('sim/rdd_joints', Float32MultiArray, self.rddJointsCallback, queue_size=1) self.tip_position = None self.tip_orientation = None self.tip_pos_sub = rospy.Subscriber('sim/ur5_tip_pose', Float32MultiArray, self.tipPosCallback, queue_size=1) self.target_position = None self.target_orientation = None self.target_pos_sub = rospy.Subscriber('sim/ur5_target_pose', Float32MultiArray, self.targetPosCallback, queue_size=1) self.cube_position = None self.cube_orientation = None self.cube_pos_sub = rospy.Subscriber('sim/cube_pose', Float32MultiArray, self.cubePosCallback, queue_size=1) def rddJointsCallback(self, msg): """ callback function for rdd joints state :param msg: Float32MultiArray :return: """ data = list(msg.data) if len(self.narrow_p) < 1000: self.narrow_p.append(data[0]) self.narrow_position = data[0] self.wide_position = data[1] def tipPosCallback(self, msg): """ callback function for ur5 tip :param msg: Float32MultiArray :return: """ data = list(msg.data) self.tip_position = data[:3] self.tip_orientation = data[3:] def targetPosCallback(self, msg): """ callback function for ur5 target :param msg: Float32MultiArray :return: """ data = list(msg.data) self.target_position = data[:3] self.target_orientation = data[3:] def cubePosCallback(self, msg): """ callback function for cube :param msg: Float32MultiArray :return: """ data = list(msg.data) self.cube_position = data[:3] self.cube_orientation = data[3:] def getObs(self): """ get observation from position and force :return: the observation, List[List[float], List[float]] """ p = copy(self.narrow_p) if len(p) == 0: p = [0.] xs = [i for i in range(len(p))] resampled = np.interp(np.linspace(0, len(p)-1, 20), xs, p).tolist() return resampled def reset(self): """ reset the environment :return: the observation, List[List[float], List[float]] """ vrep.simxStopSimulation(self.sim_client, VREP_BLOCKING) time.sleep(1) vrep.simxStartSimulation(self.sim_client, VREP_BLOCKING) time.sleep(1) sim_ret, self.cube = utils.getObjectHandle(self.sim_client, 'cube') utils.setObjectPosition(self.sim_client, self.ur5.UR5_target, [-0.2, 0.6, 0.08]) dy = 0.3 * np.random.random() # dy = 0 # dz = 0.1 * np.random.random() - 0.05 current_pose = self.ur5.getEndEffectorPose() target_pose = current_pose.copy() target_pose[1, 3] += dy # target_pose[2, 3] += dz self.rdd.setFingerPos(-0.1) self.ur5.moveTo(target_pose) self.narrow_p = [] self.target_position = None while self.target_position is None: time.sleep(0.1) return [0. for _ in range(20)] def step(self, a): """ take a step :param a: action, int :return: observation, reward, done, info """ self.narrow_p = [] if a in [self.RIGHT, self.LEFT]: current_position = self.target_position target_pose = transformations.euler_matrix(self.target_orientation[0], self.target_orientation[1], self.target_orientation[2]) target_pose[:3, -1] = current_position if a == self.RIGHT: target_pose[1, 3] -= 0.03 elif a == self.LEFT: target_pose[1, 3] += 0.03 self.ur5.moveTo(target_pose) # utils.setObjectPositionOneShot(self.sim_client, self.ur5.UR5_target, target_pose[:3, 3]) elif a == self.CLOSE: self.rdd.setFingerPos(-0.1) _finger_pos = self.narrow_position time.sleep(0.1) t = 0 while abs(_finger_pos - self.narrow_position) > 0.01: _finger_pos = self.narrow_position time.sleep(0.1) t += 1 if t == 10: break elif a == self.OPEN: self.rdd.setFingerPos() _finger_pos = self.narrow_position time.sleep(0.1) t = 0 while abs(_finger_pos - self.narrow_position) > 0.01: _finger_pos = self.narrow_position time.sleep(0.1) t += 1 if t == 10: break cube_orientation = self.cube_orientation cube_position = self.cube_position tip_position = self.tip_position narrow_position = self.narrow_position target_position = self.target_position # arm is in wrong pose # sim_ret, target_position = utils.getObjectPosition(self.sim_client, self.ur5.UR5_target) if target_position[1] < 0.42 or target_position[1] > 0.95 or target_position[2] < 0 or target_position[ 2] > 0.2: print 'Wrong arm position: ', target_position return None, -1, True, None # cube in wrong position while any(np.isnan(cube_position)): res, cube_position = utils.getObjectPosition(self.sim_client, self.cube) if cube_position[0] < self.cube_start_position[0] - self.cube_size[0] or \ cube_position[0] > self.cube_start_position[0] + self.cube_size[0] or \ cube_position[1] < self.cube_start_position[1] - self.cube_size[1] or \ cube_position[1] > self.cube_start_position[1] + self.cube_size[1]: print 'Wrong cube position: ', cube_position return None, 0, True, None # cube is lifted if np.all(tip_position > (np.array(cube_position) - np.array(self.cube_size))) and \ np.all(tip_position < (np.array(cube_position) + np.array(self.cube_size))) and \ cube_orientation[0] < -0.01 and \ narrow_position > -0.5: return None, 1, True, None # cube is not lifted return self.getObs(), 0, False, None
class ScoopEnv: RIGHT = 0 LEFT = 1 def __init__(self, port=19997, memory_size=60): self.sim_client = utils.connectToSimulation('127.0.0.1', port) # Create UR5 and restart simulator self.rdd = RDD(self.sim_client) self.ur5 = UR5(self.sim_client, self.rdd) self.nA = 2 self.cube = None self.cube_start_position = [-0.2, 0.85, 0.025] self.cube_size = [0.1, 0.2, 0.04] self.open_position = 0.3 def sendClearSignal(self): sim_ret = vrep.simxSetIntegerSignal(self.sim_client, 'clear', 1, utils.VREP_ONESHOT) def getObs(self): sim_ret, data = vrep.simxGetStringSignal(self.sim_client, 'theta', vrep.simx_opmode_blocking) p = vrep.simxUnpackFloats(data) if len(p) == 0: p = [0.] xs = [i for i in range(len(p))] resampled = np.interp(np.linspace(0, len(p) - 1, 20), xs, p).tolist() return resampled def reset(self): """ reset the environment :return: the observation, List[List[float], List[float]] """ vrep.simxStopSimulation(self.sim_client, utils.VREP_BLOCKING) time.sleep(1) vrep.simxStartSimulation(self.sim_client, utils.VREP_BLOCKING) time.sleep(1) sim_ret, self.cube = utils.getObjectHandle(self.sim_client, 'cube') self.rdd.setFingerPos(-0.1) utils.setObjectPosition(self.sim_client, self.ur5.UR5_target, [-0.2, 0.6, 0.08]) # utils.setObjectPosition(self.sim_client, self.ur5.UR5_target, [-0.2, 0.6, 0.15]) dy = 0.3 * np.random.random() # dz = 0.1 * np.random.random() - 0.05 current_pose = self.ur5.getEndEffectorPose() target_pose = current_pose.copy() target_pose[1, 3] += dy # target_pose[2, 3] += dz self.sendClearSignal() self.ur5.moveTo(target_pose) self.rdd.setFingerPos() return self.getObs() def step(self, a): """ take a step :param a: action, int :return: observation, reward, done, info """ self.sendClearSignal() sim_ret, target_position = utils.getObjectPosition(self.sim_client, self.ur5.UR5_target) sim_ret, target_orientation = utils.getObjectOrientation(self.sim_client, self.ur5.UR5_target) target_pose = transformations.euler_matrix(target_orientation[0], target_orientation[1], target_orientation[2]) target_pose[:3, -1] = target_position if a == self.RIGHT: target_pose[1, 3] -= 0.05 elif a == self.LEFT: target_pose[1, 3] += 0.05 self.ur5.moveTo(target_pose) sim_ret, cube_orientation = utils.getObjectOrientation(self.sim_client, self.cube) sim_ret, cube_position = utils.getObjectPosition(self.sim_client, self.cube) sim_ret, target_position = utils.getObjectPosition(self.sim_client, self.ur5.UR5_target) # arm is in wrong pose # sim_ret, target_position = utils.getObjectPosition(self.sim_client, self.ur5.UR5_target) if target_position[1] < 0.42 or target_position[1] > 0.95 or target_position[2] < 0 or target_position[ 2] > 0.2: print 'Wrong arm position: ', target_position return None, -1, True, None # cube in wrong position while any(np.isnan(cube_position)): res, cube_position = utils.getObjectPosition(self.sim_client, self.cube) if cube_position[0] < self.cube_start_position[0] - self.cube_size[0] or \ cube_position[0] > self.cube_start_position[0] + self.cube_size[0] or \ cube_position[1] < self.cube_start_position[1] - self.cube_size[1] or \ cube_position[1] > self.cube_start_position[1] + self.cube_size[1]: print 'Wrong cube position: ', cube_position return None, 0, True, None # cube is lifted if cube_orientation[0] < -0.05: return None, 1, True, None # cube is not lifted return self.getObs(), 0, False, None
class ScoopEnv: RIGHT = 0 LEFT = 1 CLOSE = 2 OPEN = 3 def __init__(self, port=19997): rospy.init_node('env', anonymous=True) self.sim_client = utils.connectToSimulation('127.0.0.1', port) # Create UR5 and restart simulator self.rdd = RDD(self.sim_client) self.ur5 = UR5(self.sim_client, self.rdd) self.nA = 4 self.cube = None self.cube_start_position = [-0.2, 0.9, 0.05] self.cube_size = [0.1, 0.2, 0.04] self.open_position = 0.3 self.narrow_position = None self.wide_position = None self.state = [0 for _ in range(4 * 60)] self.state_sub = rospy.Subscriber('sim/state', Float32MultiArray, self.stateCallback, queue_size=1) self.tip_position = None self.tip_orientation = None self.tip_pos_sub = rospy.Subscriber('sim/ur5_tip_pose', Float32MultiArray, self.tipPosCallback, queue_size=1) self.target_position = None self.target_orientation = None self.target_pos_sub = rospy.Subscriber('sim/ur5_target_pose', Float32MultiArray, self.targetPosCallback, queue_size=1) self.cube_position = None self.cube_orientation = None self.cube_pos_sub = rospy.Subscriber('sim/cube_pose', Float32MultiArray, self.cubePosCallback, queue_size=1) def stateCallback(self, msg): data = list(msg.data) self.state = self.state[4:] + data self.narrow_position = data[0] self.wide_position = data[1] def tipPosCallback(self, msg): data = list(msg.data) self.tip_position = data[:3] self.tip_orientation = data[3:] def targetPosCallback(self, msg): data = list(msg.data) self.target_position = data[:3] self.target_orientation = data[3:] def cubePosCallback(self, msg): data = list(msg.data) self.cube_position = data[:3] self.cube_orientation = data[3:] def reset(self): vrep.simxStopSimulation(self.sim_client, VREP_BLOCKING) time.sleep(1) vrep.simxStartSimulation(self.sim_client, VREP_BLOCKING) time.sleep(1) sim_ret, self.cube = utils.getObjectHandle(self.sim_client, 'cube') utils.setObjectPosition(self.sim_client, self.ur5.UR5_target, [-0.2, 0.6, 0.08]) dy = 0.3 * np.random.random() # dy = 0 # dz = 0.1 * np.random.random() - 0.05 current_pose = self.ur5.getEndEffectorPose() target_pose = current_pose.copy() target_pose[1, 3] += dy # target_pose[2, 3] += dz self.ur5.moveTo(target_pose) # self.rdd.open(self.open_position) self.target_position = None while self.target_position is None: time.sleep(0.1) return self.state def step(self, a): if a in [self.RIGHT, self.LEFT]: current_position = self.target_position target_pose = transformations.euler_matrix( self.target_orientation[0], self.target_orientation[1], self.target_orientation[2]) target_pose[:3, -1] = current_position if a == self.RIGHT: target_pose[1, 3] -= 0.01 elif a == self.LEFT: target_pose[1, 3] += 0.01 utils.setObjectPositionOneShot(self.sim_client, self.ur5.UR5_target, target_pose[:3, 3]) elif a == self.CLOSE: self.rdd.setFingerPos(0.) elif a == self.OPEN: self.rdd.setFingerPos() # sim_ret, cube_orientation = utils.getObjectOrientation(self.sim_client, self.cube) # sim_ret, cube_position = utils.getObjectPosition(self.sim_client, self.cube) # sim_ret, tip_position = utils.getObjectPosition(self.sim_client, self.ur5.gripper_tip) # sim_ret, narrow_position = utils.getJointPosition(self.sim_client, self.rdd.finger_joint_narrow) cube_orientation = self.cube_orientation cube_position = self.cube_position tip_position = self.tip_position narrow_position = self.narrow_position target_position = self.target_position # arm is in wrong pose # sim_ret, target_position = utils.getObjectPosition(self.sim_client, self.ur5.UR5_target) if target_position[1] < 0.42 or target_position[ 1] > 0.95 or target_position[2] < 0 or target_position[2] > 0.2: print 'Wrong arm position: ', target_position return None, -1, True, None # cube in wrong position while any(np.isnan(cube_position)): res, cube_position = utils.getObjectPosition( self.sim_client, self.cube) if cube_position[0] < self.cube_start_position[0] - self.cube_size[0] or \ cube_position[0] > self.cube_start_position[0] + self.cube_size[0] or \ cube_position[1] < self.cube_start_position[1] - self.cube_size[1] or \ cube_position[1] > self.cube_start_position[1] + self.cube_size[1]: print 'Wrong cube position: ', cube_position return None, 0, True, None # cube is lifted if np.all(tip_position > (np.array(cube_position) - np.array(self.cube_size))) and \ np.all(tip_position < (np.array(cube_position) + np.array(self.cube_size))) and \ (cube_orientation[0] < -0.02 or cube_position[2] > self.cube_start_position[2] + 0.02) and \ narrow_position > -0.5: return None, 1, True, None # cube is not lifted return self.state, 0, False, None
class ScoopEnv: RIGHT = 0 LEFT = 1 UP = 2 DOWN = 3 def __init__(self, port=19997): np.random.seed(port) self.sim_client = utils.connectToSimulation('127.0.0.1', port) # Create UR5 and restart simulator self.rdd = RDD(self.sim_client) self.ur5 = UR5(self.sim_client, self.rdd) self.sensor = VisionSensor(self.sim_client, 'Vision_sensor', None, None, True, False) self.nA = 4 self.observation_space = np.zeros(4) self.cube = None self.cube_start_position = [-0.2, 0.85, 0.025] self.cube_size = [0.1, 0.2, 0.04] self.open_position = 0.3 def getObs(self): sim_ret, theta = utils.getJointPosition(self.sim_client, self.rdd.finger_joint_narrow) sim_ret, tip_position = utils.getObjectPosition(self.sim_client, self.ur5.gripper_tip) return np.concatenate((tip_position, [theta])) def reset(self): """ reset the environment :return: the observation, List[List[float], List[float]] """ vrep.simxStopSimulation(self.sim_client, utils.VREP_BLOCKING) time.sleep(1) vrep.simxStartSimulation(self.sim_client, utils.VREP_BLOCKING) time.sleep(1) sim_ret, self.cube = utils.getObjectHandle(self.sim_client, 'cube') # utils.setObjectPosition(self.sim_client, self.ur5.UR5_target, [-0.2, 0.6, 0.08]) utils.setObjectPosition(self.sim_client, self.ur5.UR5_target, [-0.2, 0.6, 0.15]) dy = 0.3 * np.random.random() dz = 0.1 * np.random.random() - 0.05 current_pose = self.ur5.getEndEffectorPose() target_pose = current_pose.copy() target_pose[1, 3] += dy target_pose[2, 3] += dz self.rdd.setFingerPos() self.ur5.moveTo(target_pose) return self.getObs() def getReward(self): sim_ret, narrow_tip = utils.getObjectHandle(self.sim_client, 'narrow_tip') sim_ret, cube_bottom = utils.getObjectHandle(self.sim_client, 'cube_bottom') sim_ret, tip_position = utils.getObjectPosition(self.sim_client, narrow_tip) sim_ret, bottom_position = utils.getObjectPosition(self.sim_client, cube_bottom) sim_ret, cube_orientation = utils.getObjectOrientation(self.sim_client, self.cube) return -np.linalg.norm(tip_position-bottom_position) + (-10 * cube_orientation[0]) def step(self, a): """ take a step :param a: action, int :return: observation, reward, done, info """ sim_ret, target_position = utils.getObjectPosition(self.sim_client, self.ur5.UR5_target) sim_ret, target_orientation = utils.getObjectOrientation(self.sim_client, self.ur5.UR5_target) target_pose = transformations.euler_matrix(target_orientation[0], target_orientation[1], target_orientation[2]) target_pose[:3, -1] = target_position if a == self.RIGHT: target_pose[1, 3] -= 0.05 elif a == self.LEFT: target_pose[1, 3] += 0.05 elif a == self.UP: target_pose[2, 3] += 0.03 elif a == self.DOWN: target_pose[2, 3] -= 0.03 target_position = target_pose[:, 3] if 0.42 < target_position[1] < 0.95 and 0 < target_position[2] < 0.3: self.ur5.moveTo(target_pose) sim_ret, cube_orientation = utils.getObjectOrientation(self.sim_client, self.cube) sim_ret, cube_position = utils.getObjectPosition(self.sim_client, self.cube) # cube in wrong position while any(np.isnan(cube_position)): res, cube_position = utils.getObjectPosition(self.sim_client, self.cube) if cube_position[0] < self.cube_start_position[0] - self.cube_size[0] or \ cube_position[0] > self.cube_start_position[0] + self.cube_size[0] or \ cube_position[1] < self.cube_start_position[1] - self.cube_size[1] or \ cube_position[1] > self.cube_start_position[1] + self.cube_size[1]: # print 'Wrong cube position: ', cube_position return None, self.getReward(), True, None # cube is lifted if cube_orientation[0] < -0.02: return None, self.getReward(), True, None # cube is not lifted return self.getObs(), self.getReward(), False, None
class ScoopEnv: RIGHT = 0 LEFT = 1 UP = 2 DOWN = 3 def __init__(self, port=19997): np.random.seed(port) self.sim_client = utils.connectToSimulation('127.0.0.1', port) # Create UR5 and restart simulator self.rdd = RDD(self.sim_client) self.ur5 = UR5(self.sim_client, self.rdd) self.sensor = VisionSensor(self.sim_client, 'Vision_sensor_top', None, None, True, False) self.nA = 4 self.observation_space = (np.zeros((12, 64, 64)), np.zeros((4, 20))) self.cube = None self.cube_start_position = [-0.2, 0.85, 0.025] self.cube_size = [0.1, 0.2, 0.04] self.open_position = 0.3 self.img_his = [np.zeros((3, 64, 64)) for _ in range(4)] self.theta_his = [np.zeros((1, 20)) for _ in range(4)] def sendClearSignal(self): sim_ret = vrep.simxSetIntegerSignal(self.sim_client, 'clear', 1, utils.VREP_ONESHOT) def getObs(self): sim_ret, data = vrep.simxGetStringSignal(self.sim_client, 'theta', vrep.simx_opmode_blocking) p = vrep.simxUnpackFloats(data) if len(p) == 0: p = [0.] xs = [i for i in range(len(p))] resampled = np.interp(np.linspace(0, len(p) - 1, 20), xs, p) img_obs = np.rollaxis(self.sensor.getColorData(), 2, 0) theta_obs = np.expand_dims(resampled, 0) self.img_his = self.img_his[1:] + [img_obs] self.theta_his = self.theta_his[1:] + [theta_obs] return np.concatenate(self.img_his, 0), np.concatenate(self.theta_his, 0) def reset(self): """ reset the environment :return: the observation, List[List[float], List[float]] """ vrep.simxStopSimulation(self.sim_client, utils.VREP_BLOCKING) time.sleep(1) vrep.simxStartSimulation(self.sim_client, utils.VREP_BLOCKING) time.sleep(1) sim_ret, self.cube = utils.getObjectHandle(self.sim_client, 'cube') # utils.setObjectPosition(self.sim_client, self.ur5.UR5_target, [-0.2, 0.6, 0.08]) utils.setObjectPosition(self.sim_client, self.ur5.UR5_target, [-0.2, 0.6, 0.15]) dy = 0.3 * np.random.random() dz = 0.1 * np.random.random() - 0.05 current_pose = self.ur5.getEndEffectorPose() target_pose = current_pose.copy() target_pose[1, 3] += dy target_pose[2, 3] += dz self.rdd.setFingerPos() self.sendClearSignal() self.ur5.moveTo(target_pose) return self.getObs() def getReward(self): sim_ret, narrow_tip = utils.getObjectHandle(self.sim_client, 'narrow_tip') sim_ret, cube_bottom = utils.getObjectHandle(self.sim_client, 'cube_bottom') sim_ret, tip_position = utils.getObjectPosition( self.sim_client, narrow_tip) sim_ret, bottom_position = utils.getObjectPosition( self.sim_client, cube_bottom) sim_ret, cube_orientation = utils.getObjectOrientation( self.sim_client, self.cube) return -np.linalg.norm(tip_position - bottom_position) + ( -10 * cube_orientation[0]) def step(self, a): """ take a step :param a: action, int :return: observation, reward, done, info """ self.sendClearSignal() sim_ret, target_position = utils.getObjectPosition( self.sim_client, self.ur5.UR5_target) sim_ret, target_orientation = utils.getObjectOrientation( self.sim_client, self.ur5.UR5_target) target_pose = transformations.euler_matrix(target_orientation[0], target_orientation[1], target_orientation[2]) target_pose[:3, -1] = target_position if a == self.RIGHT: target_pose[1, 3] -= 0.05 elif a == self.LEFT: target_pose[1, 3] += 0.05 elif a == self.UP: target_pose[2, 3] += 0.03 elif a == self.DOWN: target_pose[2, 3] -= 0.03 target_position = target_pose[:, 3] if 0.42 < target_position[1] < 0.95 and 0 < target_position[2] < 0.3: self.ur5.moveTo(target_pose) sim_ret, cube_orientation = utils.getObjectOrientation( self.sim_client, self.cube) sim_ret, cube_position = utils.getObjectPosition( self.sim_client, self.cube) # cube in wrong position while any(np.isnan(cube_position)): res, cube_position = utils.getObjectPosition( self.sim_client, self.cube) if cube_position[0] < self.cube_start_position[0] - self.cube_size[0] or \ cube_position[0] > self.cube_start_position[0] + self.cube_size[0] or \ cube_position[1] < self.cube_start_position[1] - self.cube_size[1] or \ cube_position[1] > self.cube_start_position[1] + self.cube_size[1]: # print 'Wrong cube position: ', cube_position return None, self.getReward(), True, None # cube is lifted if cube_orientation[0] < -0.02: return None, self.getReward(), True, None # cube is not lifted return self.getObs(), self.getReward(), False, None