def __init__(self, base_link, tip_link, timeout=0.005, epsilon=1e-5, solve_type="Speed", urdf_string=None): """ Create a TRAC_IK instance and keep track of it. :param str base_link: Starting link of the chain. :param str tip_link: Last link of the chain. :param float timeout: Timeout in seconds for the IK calls. :param float epsilon: Error epsilon. :param solve_type str: Type of solver, can be: Speed (default), Distance, Manipulation1, Manipulation2 :param urdf_string str: Optional arg, if not given URDF is taken from the param server at /robot_description. """ if urdf_string is None: raise NotImplementedError("Can't load urdf_string") #urdf_string = rospy.get_param('/robot_description') self._urdf_string = urdf_string self._timeout = timeout self._epsilon = epsilon self._solve_type = solve_type self.base_link = base_link self.tip_link = tip_link self._ik_solver = TRAC_IK(self.base_link, self.tip_link, self._urdf_string, self._timeout, self._epsilon, self._solve_type) self.number_of_joints = self._ik_solver.getNrOfJointsInChain() self.joint_names = self._ik_solver.getJointNamesInChain( self._urdf_string) self.link_names = self._ik_solver.getLinkNamesInChain()
def _setup(self): # set up chebychev points self.resolution = 100 self.duration = 0.8 # load in ros parameters self.baselink = rospy.get_param("blue_hardware/baselink") self.endlink = rospy.get_param("blue_hardware/endlink") urdf = rospy.get_param("/robot_description") flag, self.tree = kdl_parser.treeFromString(urdf) # build kinematic chain and fk and jacobian solvers chain_ee = self.tree.getChain(self.baselink, self.endlink) self.fk_ee = kdl.ChainFkSolverPos_recursive(chain_ee) self.jac_ee = kdl.ChainJntToJacSolver(chain_ee) # building robot joint state self.num_joints = chain_ee.getNrOfJoints() self.joint_names = rospy.get_param("blue_hardware/joint_names") self.joint_names = self.joint_names[:self.num_joints] if self.debug: rospy.loginfo(self.joint_names) self.joints = kdl.JntArray(self.num_joints) # todo make seperate in abstract class self.ik = TRAC_IK(self.baselink, self.endlink, urdf, 0.005, 5e-5, "Distance")
def __init__(self, gripper_pub, joints_pub, urdf): self.joints_pub = joints_pub self.gripper_pub = gripper_pub self.ik_solver = TRAC_IK( "base_link", "tool", urdf, 0.1, # default seconds 1e-5, # default epsilon "Speed")
def __init__(self, max_len=50, near_init=False, fixed_obj_init=False): dirname = os.path.dirname(os.path.abspath(__file__)) print(dirname) mujoco_env.MujocoEnv.__init__( self, os.path.join(dirname, "mjc/baxter_orient_left_cts_rotate.xml"), 1) self.near_init = near_init self.fixed_obj_init = fixed_obj_init utils.EzPickle.__init__(self) ## mujoco things # task space action space low = np.array([-1., -1.]) high = np.array([1., 1.]) self.action_space = spaces.Box(low, high) self.tuck_pose = { 'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50]) } self.start_pose = { 'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52]) } ## starting pose self.init_qpos = self.data.qpos.copy().flatten() self.init_qpos[1:8] = np.array(self.start_pose["left"]).T ## ik setup urdf_filename = osp.join(dirname, "urdf", "baxter.urdf") with open(urdf_filename) as f: urdf = f.read() # mode; Speed, Distance, Manipulation1, Manipulation2 self.ik_solver = TRAC_IK( "base", "left_gripper", urdf, 0.005, # default seconds 1e-5, # default epsilon "Speed") self.old_state = np.zeros((6, )) self.max_num_steps = max_len print("INIT DONE!")
class Arm: gripper_pub = None joints_sub = None joints_pub = None bx = by = bz = 0.001 brx = bry = brz = 0.001 kdl_kin = None ik_solver = None def __init__(self, gripper_pub, joints_pub, urdf): self.joints_pub = joints_pub self.gripper_pub = gripper_pub self.ik_solver = TRAC_IK( "base_link", "tool", urdf, 0.1, # default seconds 1e-5, # default epsilon "Speed") # print("Number of joints:") # print(self.ik_solver.getNrOfJointsInChain()) # print("Joint names:") # print(self.ik_solver.getJointNamesInChain(urdf)) # print("Link names:") # print(self.ik_solver.getLinkNamesInChain()) def gripperClose(self, bool): msg = Bool() msg.data = bool self.gripper_pub.publish(msg) def solve(self, pos, quat, trans, rotat, q0): rotation = euler_from_quaternion(quat) # print(trans, dist_3d(pos, trans), dist_3d(rotation, rotat)) factor = 5 if trans is not None and ( abs(pos[0] - trans[0]) > factor * self.bx or abs(pos[1] - trans[1]) > factor * self.by or abs(pos[2] - trans[2]) > factor * self.bz or abs(rotation[0] - rotat[0]) > factor * self.brx or abs(rotation[1] - rotat[1]) > factor * self.bry or abs(rotation[2] - rotat[2]) > factor * self.brz): sol = self.ik_solver.CartToJnt(q0, pos[0], pos[1], pos[2], quat[0], quat[1], quat[2], quat[3], self.bx, self.by, self.bz, self.brx, self.bry, self.brz) msg_cmd = Float64MultiArray() msg_cmd.data = sol if len(sol) != 0: self.joints_pub.publish(msg_cmd) else: # print('fail') return True, None # print('pub', sol) return True, sol else: return False, None
def __init__(self): # load in robot kinematic information self.baselink = rospy.get_param("blue_hardware/baselink") self.endlink = rospy.get_param("blue_hardware/endlink") self.posture_target = rospy.get_param("blue_hardware/posture_target") urdf = rospy.get_param("/robot_description") # set up tf2 for transformation lookups self.tf_buffer = tf2_ros.Buffer( rospy.Duration(1.0)) # tf buffer length tf_listener = tf2_ros.TransformListener(self.tf_buffer) # build trac-ik solver self.ik = TRAC_IK(self.baselink, self.endlink, urdf, 0.01, 5e-4, "Distance") # "Manipulation2") # "Manipulation1") rospy.Service('inverse_kinematics', InverseKinematics, self.handle_ik_request)
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle): """cts env, 6dim state space: relative state space position of gripper, (block-gripper) and (target-block) random restarts for block and target on the table reward function: - 1(not reaching) actions: (delta_x, delta_y) 5cm push starting state: (0.63, 0.2, 0.59, 0.27, 0.55, 0.3) max_num_steps = 50 """ def __init__(self, max_len=50, near_init=False, fixed_obj_init=False): dirname = os.path.dirname(os.path.abspath(__file__)) print(dirname) mujoco_env.MujocoEnv.__init__( self, os.path.join(dirname, "mjc/baxter_orient_left_cts_rotate.xml"), 1) self.near_init = near_init self.fixed_obj_init = fixed_obj_init utils.EzPickle.__init__(self) ## mujoco things # task space action space low = np.array([-1., -1.]) high = np.array([1., 1.]) self.action_space = spaces.Box(low, high) self.tuck_pose = { 'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50]) } self.start_pose = { 'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52]) } ## starting pose self.init_qpos = self.data.qpos.copy().flatten() self.init_qpos[1:8] = np.array(self.start_pose["left"]).T ## ik setup urdf_filename = osp.join(dirname, "urdf", "baxter.urdf") with open(urdf_filename) as f: urdf = f.read() # mode; Speed, Distance, Manipulation1, Manipulation2 self.ik_solver = TRAC_IK( "base", "left_gripper", urdf, 0.005, # default seconds 1e-5, # default epsilon "Speed") self.old_state = np.zeros((6, )) self.max_num_steps = max_len print("INIT DONE!") ## gym methods def reset_model(self): target_pos = np.array([0.62, 0.3, 0.1]) if self.near_init: # print("last state:",self.old_state) # print("New Episode!") box_size = 0.1 half_size = box_size / 2.0 min_box_gripper_dist = 0.03 #the box is 3 cm x 3 cm so this seems reasonable while 1: qpos = self.init_qpos + self.np_random.uniform( low=-.005, high=.005, size=self.model.model.nq) qvel = self.init_qvel + self.np_random.uniform( low=-.005, high=.005, size=self.model.model.nv) ## random target location qpos[-2:] = qpos[-2:] + self.np_random.uniform( low=-box_size, high=box_size, size=2) ## random box location # qpos[8:10] = qpos[8:10] + self.np_random.uniform(low=-box_size, high=box_size, size=2) qpos[8:10] = np.array([0.5, 0.35]) box_gripper_dist = np.linalg.norm(target_pos[:2] - qpos[8:10]) if box_gripper_dist > min_box_gripper_dist: break else: # print("last state:",self.old_state) # print("New Episode!") qpos = self.init_qpos + self.np_random.uniform( low=-.005, high=.005, size=self.model.model.nq) qvel = self.init_qvel + self.np_random.uniform( low=-.005, high=.005, size=self.model.model.nv) ## random target location qpos[-2:] = qpos[-2:] + self.np_random.uniform( low=-0.15, high=0.15, size=2) ## random box location qpos[8:10] = qpos[ 8:10] #+ self.np_random.uniform(low=-0.15, high=0.15, size=2) if self.fixed_obj_init: qpos[8:10] = self.init_qpos[8:10] self.set_state(qpos, qvel) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if action_jt_space is not None: self.apply_action(action_jt_space) ## for calculating velocities # self.old_state = np.zeros((6,)) self.contacted = False self.out_of_bound = 0 self.num_step = 0 ob = self._get_obs() gripper_pose = ob[:2] box_pose = ob[2:4] target_pose = ob[4:6] relative_ob = np.concatenate( [gripper_pose, box_pose - gripper_pose, target_pose - box_pose]) return relative_ob def viewer_setup(self): # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.]) cam_pos = np.array([1.0, 0.0, 0.7, 0.5, -45, 180]) self.set_cam_position(cam_pos) def _get_obs(self): ee_x, ee_y = self.data.site_xpos[0][:2] box_x, box_y = self.data.site_xpos[1][:2] target_x, target_y = self.data.site_xpos[2][:2] state = np.array([ee_x, ee_y, box_x, box_y, target_x, target_y]) vel = (state - self.old_state) / self.dt self.old_state = state.copy() return state def set_state(self, qpos, qvel): assert qpos.shape == (self.model.model.nq, ) and qvel.shape == ( self.model.model.nv, ) self.model.data.qpos[:] = qpos self.model.data.qvel[:] = qvel # self.model._compute_subtree() #pylint: disable=W0212 self.model.forward() ## my methods def apply_action(self, action): ctrl = self.data.ctrl.copy() # print(ctrl.shape) ctrl[:7] = np.array(action) self.data.ctrl[:] = ctrl self.do_simulation(ctrl, 1000) def do_ik(self, ee_target, jt_pos): # print("starting to do ik") # print(ee_target[:3]) # Populate seed with current angles if not provided init_pose_trac = tracik.DoubleVector() for i in range(7): init_pose_trac.push_back(jt_pos[i]) x, y, z, qx, qy, qz, qw = ee_target qout = list( self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw)) if (len(qout) > 0): # print("ik sol:",qout) return qout else: #print("!no result found") return None def close_gripper(self, left_gap=0): pass def _step(self, action): ## hack for the init of mujoco.env if (action.shape[0] > 2): return np.zeros((6, 1)), 0, False, {} self.num_step += 1 old_action_jt_space = self.data.qpos[1:8].T.copy() ## parsing of primitive actions delta_x, delta_y = action # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y)) x, y = self.old_state[:2].copy() # print("old x:%.4f, y:%.4f"%(x,y)) x += delta_x * 0.05 y += delta_y * 0.05 # print("x:%.4f, y:%.4f"%(x,y)) # print("x:%.4f,y:%.4f"%(0.2*x + 0.6 , 0.3*y + 0.3)) out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) if np.abs(delta_x * 0.05) > 0.0001 or np.abs(delta_y * 0.05) > 0.0001: target_pos = np.array([x, y, 0.15]) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if (action_jt_space is not None) and (not out_of_bound): #print('running ik') # print("ik:", action_jt_space) self.apply_action(action_jt_space) else: #print("action is out of bounds") action_jt_space = old_action_jt_space.copy() else: #print("action too small to run ik") action_jt_space = old_action_jt_space.copy() # print("controller:",self.data.qpos[1:8].T) ## getting state ob = self._get_obs() gripper_pose = ob[:2] box_pose = ob[2:4] target_pose = ob[4:6] #print("new gripper_pose", gripper_pose, "block pose:", box_pose) ## reward function definition reward_reaching_goal = np.linalg.norm( box_pose - target_pose) < 0.05 #assume: my robot has 5cm error total_reward = -1 * (not reward_reaching_goal) box_x, box_y, box_z = self.data.site_xpos[1] info = {} if reward_reaching_goal == 1: done = True info["done"] = "goal reached" elif box_z < -0.02 or box_x < 0.4 or box_x > 0.8 or box_y < 0.0 or box_y > 0.6: done = True info["done"] = "box out of bounds" total_reward -= (self.max_num_steps - self.num_step) + 5 elif (self.num_step > self.max_num_steps): done = True info["done"] = "max_steps_reached" else: done = False info['absolute_ob'] = ob.copy() relative_ob = np.concatenate( [gripper_pose, box_pose - gripper_pose, target_pose - box_pose]) return relative_ob, total_reward, done, info def apply_hindsight(self, states, actions, goal_state): '''generates hindsight rollout based on the goal ''' #goal = "box (absolute)" + "gripper" (at last time-step) goal = states[-1][2:4] + states[ -1][:2] ## this is the absolute goal location her_states, her_rewards = [], [] for i in range(len(actions)): state = states[i] gripper_pose = state[:2] box_pose = state[2:4] + gripper_pose target_pose = state[4:6] + box_pose state[-2:] = goal.copy() - box_pose reward = self.calc_reward(state, goal, actions[i]) her_states.append(state) her_rewards.append(reward) goal_state[-2:] = np.array([0., 0.]) her_states.append(goal_state) return her_states, her_rewards def calc_reward(self, state, goal, action): ## parsing of primitive actions delta_x, delta_y = action x, y = self.old_state[:2].copy() x += delta_x * 0.05 y += delta_y * 0.05 out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) gripper_pose = state[:2] box_pose = state[2:4] + gripper_pose target_pose = state[-2:] + box_pose ## reward function definition reward_reaching_goal = np.linalg.norm(box_pose - target_pose) < 0.05 total_reward = -1 * (not reward_reaching_goal) return total_reward
class BlueIK: def _setup(self): # set up chebychev points self.resolution = 100 self.duration = 0.8 # load in ros parameters self.baselink = rospy.get_param("blue_hardware/baselink") self.endlink = rospy.get_param("blue_hardware/endlink") urdf = rospy.get_param("/robot_description") flag, self.tree = kdl_parser.treeFromString(urdf) # build kinematic chain and fk and jacobian solvers chain_ee = self.tree.getChain(self.baselink, self.endlink) self.fk_ee = kdl.ChainFkSolverPos_recursive(chain_ee) self.jac_ee = kdl.ChainJntToJacSolver(chain_ee) # building robot joint state self.num_joints = chain_ee.getNrOfJoints() self.joint_names = rospy.get_param("blue_hardware/joint_names") self.joint_names = self.joint_names[:self.num_joints] if self.debug: rospy.loginfo(self.joint_names) self.joints = kdl.JntArray(self.num_joints) # todo make seperate in abstract class self.ik = TRAC_IK(self.baselink, self.endlink, urdf, 0.005, 5e-5, "Distance") # "Manipulation2") # "Manipulation1") def ik_sol(self, target, joints): # target is a target pose object # joints is the starting joint seed seed_state = [] for j in joints: seed_state.append(j) rospy.logerr("TARGET") rospy.logerr(target) result = self.ik.CartToJnt(seed_state, target.position.x, target.position.y, target.position.z, target.orientation.x, target.orientation.y, target.orientation.z, target.orientation.w) if not len(result) == self.num_joints: return self.command_to_joint_state(result) def command_to_joint_state(self, joints): start_pos = [] joint_msg = rospy.wait_for_message("/joint_states", JointState) for i, n in enumerate(self.joint_names): index = joint_msg.name.index(n) start_pos.append(joint_msg.position[index]) rospy.logerr(start_pos) start_pos = np.array(start_pos) end_pos = [] for j in joints: end_pos.append(j) end_pos = np.array(end_pos) joint_diff = end_pos - start_pos max_joint_diff = np.linalg.norm(joint_diff, np.inf) for j in range(self.resolution + 1): step = start_pos + (end_pos - start_pos) * cheb_points(self.resolution, j) self._pub_js_msg(np.array(step)) rospy.sleep( np.abs(max_joint_diff) * self.duration * 1.0 / self.resolution) def _pub_js_msg(self, joints): # joints is an array of 7 joint angles msg = Float64MultiArray() msg.data = joints if self.debug: pass # rospy.logerr("ik result") rospy.logerr(joints) self.command_pub.publish(msg) self.command_pub_ctc.publish(msg) # def update_joints(self, joint_msg): # if self.first: # TODO: brent added this and doesn't understand what's going on with first # first = False # /brent # return # # temp_joints = kdl.JntArray(self.num_joints) # for i, n in enumerate(self.joint_names): # index = joint_msg.name.index(n) # temp_joints[i] = joint_msg.position[index] # self.joints = temp_joints def __init__(self, debug=False): self.debug = debug if self.debug: self.debug_count = 0 self._setup() self.target_pos = Pose() # self.first = True self.command_pub = rospy.Publisher("blue_controllers/joint_position_controller/command", Float64MultiArray, queue_size=1) self.command_pub_ctc = rospy.Publisher("blue_controllers/joint_ctc/command", Float64MultiArray, queue_size=1)
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle): """cts env, 9dim state space: relative state space position of gripper, (block-gripper) and (target-block) 4 restarts for the gripper with pen in hand and 1 without grasped pen reward function: - 1(not reaching) actions: (delta_x, delta_y, delta_z, gap) 5cm push max_num_steps = 20 """ def __init__(self, max_len=50): dirname = os.path.dirname(os.path.abspath(__file__)) mujoco_env.MujocoEnv.__init__( self, os.path.join(dirname, "mjc/baxter_orient_left_cts_with_grippers_pen.xml"), 1) utils.EzPickle.__init__(self) ## mujoco things # task space action space low = np.array([-1., -1., -1., -1.]) high = np.array([1., 1., 1., 1.]) self.action_space = spaces.Box(low, high) self.tuck_pose = { 'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50]) } self.start_pose = { 'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52]) } ## starting pose self.init_qpos = self.data.qpos.copy().flatten() self.init_qpos[1:8] = np.array(self.start_pose["left"]).T ## ik setup urdf_filename = osp.join(dirname, "urdf", "baxter.urdf") with open(urdf_filename) as f: urdf = f.read() # mode; Speed, Distance, Manipulation1, Manipulation2 self.ik_solver = TRAC_IK( "base", "left_gripper", urdf, 0.005, # default seconds 1e-5, # default epsilon "Speed") self.old_state = np.zeros((9, )) self.max_num_steps = max_len print("INIT DONE!") ## gym methods def reset_model(self): # print("last state:",self.old_state) # print("New Episode!") reset_state = 1 #self.np_random.choice(2) if reset_state: grasped_qpos = np.array([ -1.1937e-03, 2.3933e-01, -2.0445e-02, -1.5192e+00, 1.5246e+00, 1.5489e+00, 1.5099e+00, 2.6392e+00, 2.0560e-02, -2.0560e-02, 6.5746e-01, 4.1344e-01, 4.9806e-03, 9.9898e-01, -1.3736e-02, 4.3098e-02, 1.6292e-03, 1.0000e-01, -1.0000e-01 ]) reset_state = self.np_random.choice(4) if reset_state == 0: pass elif reset_state == 1: grasped_qpos[-2:] = np.array([0.0, -0.1]) elif reset_state == 2: grasped_qpos[-2:] = np.array([-0.1, 0.2]) elif reset_state == 3: grasped_qpos[-2:] = np.array([0.1, 0.3]) elif reset_state == 4: grasped_qpos[10:13] = np.array([0.55, 0.35, 0.0]) qvel = self.init_qvel self.set_state(grasped_qpos, qvel) else: qpos = self.init_qpos.copy() ## random target location qpos[-2:] = qpos[-2:] + self.np_random.uniform( low=-0.15, high=0.15, size=2) ## random object location ## qpos[-2:] is the displacement from 0.55 0.3 ## random box location r = 0.2 #np.random.uniform(low=0.1, high=0.2, size=1) theta = np.random.uniform(low=-np.pi, high=np.pi, size=1) qpos[10] = 0.55 + qpos[-2] + r * np.cos(theta) qpos[11] = 0.3 + qpos[-1] + r * np.sin(theta) qvel = self.init_qvel self.set_state(qpos, qvel) self.num_step = 0 ob = self._get_obs() gripper_pose = ob[:3] box_pose = ob[3:6] target_pose = ob[6:9] relative_ob = np.concatenate( [gripper_pose, box_pose - gripper_pose, target_pose - box_pose]) return relative_ob def viewer_setup(self): # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.]) cam_pos = np.array([1.0, 0.0, 0.7, 1.5, -45, 180]) self.set_cam_position(self.viewer, cam_pos) def _get_obs(self): ee_x, ee_y, ee_z = self.data.site_xpos[0][:3] box_x, box_y, box_z = self.data.site_xpos[3][:3] target_x, target_y, target_z = self.data.site_xpos[4][:3] state = np.array([ ee_x, ee_y, ee_z, box_x, box_y, box_z, target_x, target_y, target_z ]) vel = (state - self.old_state) / self.dt self.old_state = state.copy() return state def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) self.model.data.qpos = qpos self.model.data.qvel = qvel # self.model._compute_subtree() #pylint: disable=W0212 self.model.forward() ## my methods def apply_action(self, action): # print("cmd",action) # print("curr jt pos", self.data.qpos[1:8].T) ctrl = self.data.ctrl.copy() # print(ctrl.shape) ctrl[:7, 0] = np.array(action) self.data.ctrl = ctrl self.do_simulation(ctrl, 1000) # print("next jt pos", self.data.qpos[1:8].T) def do_ik(self, ee_target, jt_pos): # print("starting to do ik") # print(ee_target[:3]) # Populate seed with current angles if not provided init_pose_trac = tracik.DoubleVector() for i in range(7): init_pose_trac.push_back(jt_pos[i]) x, y, z, qx, qy, qz, qw = ee_target qout = list( self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw)) if (len(qout) > 0): # print("ik sol:",qout) return qout else: print("!no result found") return None def close_gripper(self, gap=0): # print("before grip location", self.data.site_xpos[0]) # print("gap requested:%.4f"%gap) # print("after grip location", self.data.site_xpos[0]) # print("before grip", self.data.ctrl) ctrl = self.data.ctrl.copy() ctrl[:7, 0] = self.data.qpos[1:8].flatten() ctrl[7, 0] = (gap + 1) * 0.020833 ctrl[8, 0] = -(gap + 1) * 0.020833 self.data.ctrl = ctrl.copy() # print("qpos gripper", self.data.qpos[8:10].T) # print("before grip", self.data.ctrl) def set_cam_position(self, viewer, cam_pos): for i in range(3): viewer.cam.lookat[i] = cam_pos[i] viewer.cam.distance = cam_pos[3] viewer.cam.elevation = cam_pos[4] viewer.cam.azimuth = cam_pos[5] viewer.cam.trackbodyid = -1 def _step(self, action): ## hack for the init of mujoco.env if (action.shape[0] > 4): return np.zeros((9, 1)), 0, False, {} self.num_step += 1 old_action_jt_space = self.data.qpos[1:8].T.copy() ## parsing of primitive actions delta_x, delta_y, delta_z, gap = action # print("gap:%.4f"%gap) # print("grip prev controller:",self.data.qpos[1:8].T) self.close_gripper(gap) # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y)) x, y, z = self.old_state[:3].copy() # print("old x:%.4f, y:%.4f"%(x,y)) x += delta_x * 0.05 y += delta_y * 0.05 z += delta_z * 0.05 # print("x:%.4f, y:%.4f, z:%.4f"%(x,y,z)) # print("prev controller:",self.data.qpos[1:8].T) # print("x:%.4f,y:%.4f"%(0.2*x + 0.6 , 0.3*y + 0.3)) out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) or (z < 0.1 or z > 0.5) if np.abs(delta_x * 0.05) > 0.0001 or np.abs( delta_y * 0.05) > 0.0001 or np.abs(delta_z * 0.05) > 0.0001: target_pos = np.array([x, y, z]) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if (action_jt_space is not None) and (not out_of_bound): # print("ik:", action_jt_space) self.apply_action(action_jt_space) else: action_jt_space = old_action_jt_space.copy() else: action_jt_space = old_action_jt_space.copy() # print("controller:",self.data.qpos[1:8].T) ## getting state ob = self._get_obs() gripper_pose = ob[:3] box_pose = ob[3:6] target_pose = ob[6:9] #print("new gripper_pose", gripper_pose, "block pose:", box_pose) ## reward function definition # print(np.abs(target_pos[0]-box_pose[0]), np.abs(target_pos[1]-box_pose[1]), np.abs(target_pose[2]-box_pose[2])) reward_reaching_goal = ( np.abs(target_pose[0] - box_pose[0]) < 0.07) and (np.abs(target_pose[1] - box_pose[1]) < 0.07) and (np.abs(target_pose[2] - box_pose[2]) < 0.07) total_reward = -1 * (not reward_reaching_goal) box_x, box_y, box_z = self.data.site_xpos[3] info = {} if reward_reaching_goal == 1: done = True info["done"] = "goal reached" elif box_z < -0.02 or box_x < 0.4 or box_x > 0.8 or box_y < 0.0 or box_y > 0.6: done = True info["done"] = "box out of bounds" total_reward -= (self.max_num_steps - self.num_step) + 5 elif (self.num_step > self.max_num_steps): done = True info["done"] = "max_steps_reached" else: done = False info['absolute_ob'] = ob.copy() relative_ob = np.concatenate( [gripper_pose, box_pose - gripper_pose, target_pose - box_pose]) return relative_ob, total_reward, done, info def apply_hindsight(self, states, actions, goal_state): '''generates hindsight rollout based on the goal ''' goal = states[-1][3:6] + states[ -1][:3] ## this is the absolute goal location her_states, her_rewards = [], [] for i in range(len(actions)): state = states[i] gripper_pose = state[:3] box_pose = state[3:6] + gripper_pose target_pose = state[6:9] + box_pose state[-3:] = goal.copy() - box_pose reward = self.calc_reward(state, goal, actions[i]) her_states.append(state) her_rewards.append(reward) goal_state[-3:] = np.array([0., 0., 0.]) her_states.append(goal_state) return her_states, her_rewards def calc_reward(self, state, goal, action): ## parsing of primitive actions delta_x, delta_y, delta_z, gap = action x, y, z = self.old_state[:3].copy() x += delta_x * 0.05 y += delta_y * 0.05 z += delta_z * 0.05 out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) or (z < 0.1 or z > 0.5) gripper_pose = state[:3] box_pose = state[3:6] + gripper_pose target_pose = goal + box_pose ## reward function definition reward_reaching_goal = np.linalg.norm(box_pose - target_pose) < 0.05 total_reward = -1 * (not reward_reaching_goal) return total_reward
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle): """cts env, 6dim state space: relative state space position of gripper, (block-gripper), euler angle and (target-block) random restarts for block and target on the table reward function: - 1(not reaching) actions: (delta_x, delta_y) 5cm push starting state: (0.63, 0.2, 0.59, 0.27, 0, 0, 0, 0.55, 0.3) max_num_steps = 50 """ def __init__(self): dirname = os.path.dirname(os.path.abspath(__file__)) mujoco_env.MujocoEnv.__init__( self, os.path.join(dirname, "mjc/baxter_orient_left_cts.xml"), 1) utils.EzPickle.__init__(self) ## mujoco things # task space action space low = np.array([-1., -1.]) high = np.array([1., 1.]) self.action_space = spaces.Box(low, high) self.tuck_pose = { 'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50]) } self.start_pose = { 'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52]) } ## starting pose self.init_qpos = self.data.qpos.copy().flatten() self.init_qpos[1:8] = np.array(self.start_pose["left"]).T ## ik setup urdf_filename = osp.join(dirname, "urdf", "baxter.urdf") with open(urdf_filename) as f: urdf = f.read() # mode; Speed, Distance, Manipulation1, Manipulation2 self.ik_solver = TRAC_IK( "base", "left_gripper", urdf, 0.005, # default seconds 1e-5, # default epsilon "Speed") self.old_state = np.zeros((9, )) self.max_num_steps = 50 print("INIT DONE!") ## gym methods def reset_model(self): print("last state:", self.old_state) print("New Episode!") qpos = self.init_qpos + self.np_random.uniform( low=-.005, high=.005, size=self.model.nq) qvel = self.init_qvel + self.np_random.uniform( low=-.005, high=.005, size=self.model.nv) ## random target location qpos[-2:] = qpos[-2:] + self.np_random.uniform( low=-0.15, high=0.15, size=2) ## random box location qpos[8:10] = qpos[8:10] + self.np_random.uniform( low=-0.15, high=0.15, size=2) self.set_state(qpos, qvel) target_pos = np.array([0.6, 0.3, 0.15]) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if action_jt_space is not None: self.apply_action(action_jt_space) ## for calculating velocities # self.old_state = np.zeros((6,)) self.contacted = False self.out_of_bound = 0 self.num_step = 0 ob = self._get_obs() gripper_pose = ob[:2] box_pose = ob[2:4] target_pose = ob[4:6] relative_ob = np.concatenate( [gripper_pose, box_pose - gripper_pose, target_pose - box_pose]) return relative_ob def viewer_setup(self): # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.]) cam_pos = np.array([1.0, 0.0, 0.7, 0.5, -45, 180]) self.set_cam_position(self.viewer, cam_pos) def _get_obs(self): ee_x, ee_y = self.data.site_xpos[0][:2] box_x, box_y = self.data.site_xpos[1][:2] box_orient_x, box_orient_y, box_orient_z = euler_from_matrix( self.data.site_xmat[1].reshape((3, 3)), 'rxyz') target_x, target_y = self.data.site_xpos[2][:2] state = np.array([ ee_x, ee_y, box_x, box_y, box_orient_x, box_orient_y, box_orient_z, target_x, target_y ]) vel = (state - self.old_state) / self.dt self.old_state = state.copy() return state def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) self.model.data.qpos = qpos self.model.data.qvel = qvel self.model._compute_subtree() #pylint: disable=W0212 self.model.forward() ## my methods def apply_action(self, action): ctrl = self.data.ctrl.copy() # print(ctrl.shape) ctrl[:7, 0] = np.array(action) self.data.ctrl = ctrl self.do_simulation(ctrl, 1000) def do_ik(self, ee_target, jt_pos): # print("starting to do ik") # print(ee_target[:3]) # Populate seed with current angles if not provided init_pose_trac = tracik.DoubleVector() for i in range(7): init_pose_trac.push_back(jt_pos[i]) x, y, z, qx, qy, qz, qw = ee_target qout = list( self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw)) if (len(qout) > 0): # print("ik sol:",qout) return qout else: print("!no result found") return None def close_gripper(self, left_gap=0): pass def set_cam_position(self, viewer, cam_pos): for i in range(3): viewer.cam.lookat[i] = cam_pos[i] viewer.cam.distance = cam_pos[3] viewer.cam.elevation = cam_pos[4] viewer.cam.azimuth = cam_pos[5] viewer.cam.trackbodyid = -1 def _step(self, action): ## hack for the init of mujoco.env if (action.shape[0] > 2): return np.zeros((9, 1)), 0, False, {} self.num_step += 1 old_action_jt_space = self.data.qpos[1:8].T.copy() ## parsing of primitive actions delta_x, delta_y = action # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y)) x, y = self.old_state[:2].copy() # print("old x:%.4f, y:%.4f"%(x,y)) x += delta_x * 0.05 y += delta_y * 0.05 # print("x:%.4f, y:%.4f"%(x,y)) # print("x:%.4f,y:%.4f"%(0.2*x + 0.6 , 0.3*y + 0.3)) out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) if np.abs(delta_x * 0.05) > 0.0001 or np.abs(delta_y * 0.05) > 0.0001: target_pos = np.array([x, y, 0.15]) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if (action_jt_space is not None) and (not out_of_bound): # print("ik:", action_jt_space) self.apply_action(action_jt_space) else: action_jt_space = old_action_jt_space.copy() else: action_jt_space = old_action_jt_space.copy() # print("controller:",self.data.qpos[1:8].T) ## getting state ob = self._get_obs() gripper_pose = ob[:2] box_pose = ob[2:4] target_pose = ob[-2:] #print("new gripper_pose", gripper_pose, "block pose:", box_pose) ## reward function definition w = [0.1, 1., 0.01, 1., -1e-1, -1e-3, -1] # reward_grip_box = np.linalg.norm(old_box_pose- old_gripper_pose) - np.linalg.norm(box_pose- gripper_pose) # reward_box_target = np.linalg.norm(old_box_pose- old_target_pose) - np.linalg.norm(box_pose- target_pose) reward_grip_box = -np.linalg.norm(box_pose - gripper_pose) reward_box_target = -np.linalg.norm(box_pose - target_pose) reward_first_contact = (np.linalg.norm(box_pose - gripper_pose) < 0.05) and (not self.contacted) penalty = 0. if out_of_bound: self.out_of_bound += 1 if (x < 0.4): penalty += (x - 0.4)**2 elif (x > 0.8): penalty += (x - 0.8)**2 if (y < 0.): penalty += (y - 0.)**2 elif (y > 0.6): penalty += (y - 0.6)**2 if (reward_first_contact == 1): self.contacted = True reward_reaching_goal = np.linalg.norm( box_pose - target_pose) < 0.02 #assume: my robot has 2cm error # total_reward = w[1]*reward_box_target \ # + w[0]*reward_grip_box \ # + w[6] * out_of_bound \ # + w[5] * np.square(action).sum() \ # + w[2]*reward_first_contact \ # + w[3]*reward_reaching_goal # + w[4]*np.square(action_jt_space-old_action_jt_space).sum() \ total_reward = -1 * (not reward_reaching_goal) box_x, box_y, box_z = self.data.site_xpos[1] info = { "r_grip_box": reward_grip_box, \ "r_box_target" : reward_box_target, \ "r_contact" : reward_first_contact, # "r_reached" : reward_reaching_goal \ } if reward_reaching_goal == 1: done = True info["done"] = "goal reached" elif box_z < -0.02 or box_x < 0.4 or box_x > 0.8 or box_y < 0.0 or box_y > 0.6: done = True info["done"] = "box out of bounds" elif (self.num_step > self.max_num_steps): done = True info["done"] = "max_steps_reached" else: done = False info['absolute_ob'] = ob.copy() relative_ob = ob relative_ob[-2:] -= relative_ob[2:4] relative_ob[2:4] -= relative_ob[:2] return relative_ob, total_reward, done, info def apply_hindsight(self, states, actions, goal_state): '''generates hindsight rollout based on the goal ''' goal = np.array([0., 0.]) her_states, her_rewards = [], [] for i in range(len(actions)): state = states[i] state[-2:] = goal.copy() reward = self.calc_reward(state, goal, actions[i]) her_states.append(state) her_rewards.append(reward) goal_state[-2:] = goal.copy() her_states.append(goal_state) return her_states, her_rewards def calc_reward(self, state, goal, action): ## parsing of primitive actions delta_x, delta_y = action x, y = self.old_state[:2].copy() x += delta_x * 0.05 y += delta_y * 0.05 out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) gripper_pose = state[:2] box_pose = state[2:4] + gripper_pose target_pose = goal + box_pose ## reward function definition w = [0.1, 1., 0.01, 1., -1e-1, -1e-3, -1] reward_grip_box = -np.linalg.norm(box_pose - gripper_pose) reward_box_target = -np.linalg.norm(box_pose - target_pose) reward_first_contact = (np.linalg.norm(box_pose - gripper_pose) < 0.05) and (not self.contacted) reward_reaching_goal = np.linalg.norm( box_pose - target_pose) < 0.02 #assume: my robot has 2cm error # total_reward = w[1]*reward_box_target \ # + w[0]*reward_grip_box \ # + w[6] * out_of_bound \ # + w[5] * np.square(action).sum() \ # + w[3]*reward_reaching_goal # + w[2]*reward_first_contact \ total_reward = -1 * (not reward_reaching_goal) return total_reward
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle): """cts env, 6dim state space: relative state space position of gripper and (target-gripper) random restarts for target on the table reward function: - 1(not reaching) actions: (delta_x, delta_y, delta_z) 5cm push starting state: (0.63, 0.2, 0.55, 0.3) """ def __init__(self, max_len=10): dirname = os.path.dirname(os.path.abspath(__file__)) mujoco_env.MujocoEnv.__init__( self, os.path.join(dirname, "mjc/baxter_orient_left_3dreacher.xml"), 1) utils.EzPickle.__init__(self) ## mujoco things # task space action space low = np.array([-1., -1., -1]) high = np.array([1., 1., 1]) self.action_space = spaces.Box(low, high) self.tuck_pose = { 'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50]) } self.start_pose = { 'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52]) } ## starting pose self.init_qpos = self.data.qpos.copy().flatten() self.init_qpos[1:8] = np.array(self.start_pose["left"]).T ## ik setup urdf_filename = osp.join(dirname, "urdf", "baxter.urdf") with open(urdf_filename) as f: urdf = f.read() # mode; Speed, Distance, Manipulation1, Manipulation2 self.ik_solver = TRAC_IK( "base", "left_gripper", urdf, 0.005, # default seconds 1e-5, # default epsilon "Speed") self.old_state = np.zeros((6, )) self.max_num_steps = max_len print("INIT DONE!") ## gym methods def reset_model(self): print("last state:", self.old_state) print("New Episode!") qpos = self.init_qpos + self.np_random.uniform( low=-.005, high=.005, size=self.model.nq) qvel = self.init_qvel + self.np_random.uniform( low=-.005, high=.005, size=self.model.nv) ## random target location qpos[-3:-1] = qpos[-3:-1] + self.np_random.uniform( low=-0.15, high=0.15, size=2) qpos[-1] = qpos[-1] + self.np_random.uniform( low=-0.1, high=0.1, size=1) self.set_state(qpos, qvel) target_pos = np.array([0.6, 0.3, 0.2]) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if action_jt_space is not None: self.apply_action(action_jt_space) ## for calculating velocities # self.old_state = np.zeros((6,)) self.contacted = False self.out_of_bound = 0 self.num_step = 0 ob = self._get_obs() gripper_pose = ob[:3] target_pose = ob[-3:] relative_ob = np.concatenate( [gripper_pose, target_pose - gripper_pose]) return relative_ob def viewer_setup(self): # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.]) cam_pos = np.array([1.0, 0.0, 0.7, 0.5, -45, 180]) self.set_cam_position(self.viewer, cam_pos) def _get_obs(self): ee_x, ee_y, ee_z = self.data.site_xpos[0][:3] target_x, target_y, target_z = self.data.site_xpos[1][:3] state = np.array([ee_x, ee_y, ee_z, target_x, target_y, target_z]) vel = (state - self.old_state) / self.dt self.old_state = state.copy() return state def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) self.model.data.qpos = qpos self.model.data.qvel = qvel # self.model._compute_subtree() #pylint: disable=W0212 self.model.forward() ## my methods def apply_action(self, action): ctrl = self.data.ctrl.copy() # print(ctrl.shape) ctrl[:7, 0] = np.array(action) self.data.ctrl = ctrl self.do_simulation(ctrl, 1000) def do_ik(self, ee_target, jt_pos): # print("starting to do ik") # print(ee_target[:3]) # Populate seed with current angles if not provided init_pose_trac = tracik.DoubleVector() for i in range(7): init_pose_trac.push_back(jt_pos[i]) x, y, z, qx, qy, qz, qw = ee_target qout = list( self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw)) if (len(qout) > 0): # print("ik sol:",qout) return qout else: print("!no result found") return None def set_cam_position(self, viewer, cam_pos): for i in range(3): viewer.cam.lookat[i] = cam_pos[i] viewer.cam.distance = cam_pos[3] viewer.cam.elevation = cam_pos[4] viewer.cam.azimuth = cam_pos[5] viewer.cam.trackbodyid = -1 def _step(self, action): ## hack for the init of mujoco.env if (action.shape[0] > 3): return np.zeros((6, 1)), 0, False, {} self.num_step += 1 old_action_jt_space = self.data.qpos[1:8].T.copy() ## parsing of primitive actions delta_x, delta_y, delta_z = action # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y)) x, y, z = self.old_state[:3].copy() # print("old x:%.4f, y:%.4f, z:%.4f"%(x,y,z)) # print("delta x:%.4f, y:%.4f, z:%.4f"%(delta_x, delta_y,delta_z)) x += delta_x * 0.05 y += delta_y * 0.05 z += delta_z * 0.05 # print("x:%.4f, y:%.4f, z:%.4f"%(x,y,z)) out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) or (z < 0.08 or z > 0.5) if np.abs(delta_x * 0.05) > 0.0001 or np.abs( delta_y * 0.05) > 0.0001 or np.abs(delta_z * 0.05) > 0.0001: target_pos = np.array([x, y, z]) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if (action_jt_space is not None) and (not out_of_bound): # print("ik:", action_jt_space) self.apply_action(action_jt_space) else: action_jt_space = old_action_jt_space.copy() else: action_jt_space = old_action_jt_space.copy() # print("controller:",self.data.qpos[1:8].T) ## getting state ob = self._get_obs() gripper_pose = ob[:3] target_pose = ob[3:6] ## reward function definition reward_reaching_goal = np.linalg.norm(gripper_pose - target_pose) < 0.05 total_reward = -1 * (not reward_reaching_goal) info = {} if reward_reaching_goal == 1: done = True info["done"] = "goal reached" elif (self.num_step > self.max_num_steps): done = True info["done"] = "max_steps_reached" else: done = False info['absolute_ob'] = ob.copy() relative_ob = np.concatenate( [gripper_pose, target_pose - gripper_pose]) return relative_ob, total_reward, done, info def apply_hindsight(self, states, actions, goal_state): '''generates hindsight rollout based on the goal ''' goal = states[-1][:3] ## this is the absolute goal location her_states, her_rewards = [], [] for i in range(len(actions)): state = states[i] state[-3:] = goal.copy() - state[:3] reward = self.calc_reward(state, goal, actions[i]) her_states.append(state) her_rewards.append(reward) goal_state[-3:] = np.array([0., 0., 0.]) her_states.append(goal_state) return her_states, her_rewards def calc_reward(self, state, goal, action): gripper_pose = state[:3] target_pose = state[-3:] + gripper_pose ## reward function definition reward_reaching_goal = np.linalg.norm( gripper_pose - target_pose) < 0.03 #assume: my robot has 2cm error total_reward = -1 * (not reward_reaching_goal) return total_reward
class BlueIK: def __init__(self): # load in robot kinematic information self.baselink = rospy.get_param("blue_hardware/baselink") self.endlink = rospy.get_param("blue_hardware/endlink") self.posture_target = rospy.get_param("blue_hardware/posture_target") urdf = rospy.get_param("/robot_description") # set up tf2 for transformation lookups self.tf_buffer = tf2_ros.Buffer( rospy.Duration(1.0)) # tf buffer length tf_listener = tf2_ros.TransformListener(self.tf_buffer) # build trac-ik solver self.ik = TRAC_IK(self.baselink, self.endlink, urdf, 0.01, 5e-4, "Distance") # "Manipulation2") # "Manipulation1") rospy.Service('inverse_kinematics', InverseKinematics, self.handle_ik_request) def ik_solution(self, target_pose, seed_joint_positions, solver="trac-ik"): solver = solver.lower() # use the requested solver, default to trac-ik otherwise # currently only trac-ik is supported if solver == "trac-ik": ik_joints = self.ik.CartToJnt( seed_joint_positions, target_pose.position.x, target_pose.position.y, target_pose.position.z, target_pose.orientation.x, target_pose.orientation.y, target_pose.orientation.z, target_pose.orientation.w) else: ik_joints = self.ik.CartToJnt( seed_joint_positions, target_pose.position.x, target_pose.position.y, target_pose.position.z, target_pose.orientation.x, target_pose.orientation.y, target_pose.orientation.z, target_pose.orientation.w) if not len(ik_joints) == len(seed_joint_positions): return False return ik_joints def handle_ik_request(self, request): if len(request.seed_joint_positions) == len(self.posture_target): seed_joint_positions = request.seed_joint_positions else: seed_joint_positions = self.posture_target try: # apply lookup transformation desired pose and apply the transformation transform = self.tf_buffer.lookup_transform( self.baselink, request.end_effector_pose.header.frame_id, rospy.Time(0), # get the tf at first available time rospy.Duration(0.1)) # wait for 0.1 second pose_in_baselink_frame = tf2_geometry_msgs.do_transform_pose( request.end_effector_pose, transform) # get the ik solution ik_joints = self.ik_solution(pose_in_baselink_frame.pose, seed_joint_positions) if ik_joints == False: return InverseKinematicsResponse(False, []) else: return InverseKinematicsResponse(True, ik_joints) except: return InverseKinematicsResponse(False, [])
class BlueIK: def _setup(self): # build tf listener self.tfBuffer = tf2_ros.Buffer() self.listener = tf2_ros.TransformListener(self.tfBuffer) # load in ros parameters self.baselink = rospy.get_param("blue_hardware/baselink") self.endlink = rospy.get_param("blue_hardware/endlink") urdf = rospy.get_param("/robot_description") flag, self.tree = kdl_parser.treeFromString(urdf) # build kinematic chain and fk and jacobian solvers chain_ee = self.tree.getChain(self.baselink, self.endlink) self.fk_ee = kdl.ChainFkSolverPos_recursive(chain_ee) self.jac_ee = kdl.ChainJntToJacSolver(chain_ee) # building robot joint state self.num_joints = chain_ee.getNrOfJoints() self.joint_names = rospy.get_param("blue_hardware/joint_names") self.joint_names = self.joint_names[:self.num_joints] if self.debug: rospy.loginfo(self.joint_names) self.joints = kdl.JntArray(self.num_joints) # todo make seperate in abstract class self.ik = TRAC_IK(self.baselink, self.endlink, urdf, 0.01, 1e-4, "Distance") # "Manipulation2") # "Manipulation1") def publish_ik_sol(self, target, joints): # target is a target pose object # joints is the starting joint seed seed_state = [] for j in joints: seed_state.append(j) result = self.ik.CartToJnt(seed_state, target.position.x, target.position.y, target.position.z, target.orientation.x, target.orientation.y, target.orientation.z, target.orientation.w, ) if not len(result) == self.num_joints: return msg = Float64MultiArray() msg.data = result if self.debug: pass rospy.logerr("ik result") rospy.logerr(result) self.command_pub.publish(msg) self.command_pub_ctc.publish(msg) def update_joints(self, joint_msg): if self.first: return temp_joints = kdl.JntArray(self.num_joints) for i, n in enumerate(self.joint_names): index = joint_msg.name.index(n) temp_joints[i] = joint_msg.position[index] self.joints = temp_joints ## TODO make publishing the result seperately self.publish_ik_sol(self.target_pose, self.joints) def command_callback(self, msg): trans = self.tfBuffer.lookup_transform(self.baselink, msg.header.frame_id, rospy.Time()) msg = tf2_geometry_msgs.do_transform_pose(msg, trans); temp_target = Pose() temp_target.position.x = msg.pose.position.x # x temp_target.position.y = msg.pose.position.y # y temp_target.position.z = msg.pose.position.z # z temp_target.orientation.x = msg.pose.orientation.x # qx temp_target.orientation.y = msg.pose.orientation.y # qy temp_target.orientation.z = msg.pose.orientation.z # qz temp_target.orientation.w = msg.pose.orientation.w # qw self.target_pose = temp_target if self.first: self.first = False def __init__(self, debug=False): rospy.init_node("blue_ik") self.debug = debug if self.debug: self.debug_count = 0 self._setup() self.target_pos = Pose() self.first = True rospy.Subscriber("pose_target/command", PoseStamped, self.command_callback) self.command_pub = rospy.Publisher("blue_controllers/joint_position_controller/command", Float64MultiArray, queue_size=1) self.command_pub_ctc = rospy.Publisher("blue_controllers/joint_ctc/command", Float64MultiArray, queue_size=1) rospy.Subscriber("/joint_states", JointState, self.update_joints)
def main(): urdf = rospy.get_param('/robot_description') # print("urdf: ", urdf) rospy.loginfo('Constructing IK solver...') ik_solver = TRAC_IK("link0", "link7", urdf, 0.005, # default seconds 1e-5, # default epsilon "Speed") # print("Number of joints:") # print(ik_solver.getNrOfJointsInChain()) # print("Joint names:") # print(ik_solver.getJointNamesInChain(urdf)) # print("Link names:") # print(ik_solver.getLinkNamesInChain()) qinit = [0.] * 7 #Initial status of the joints as seed. x = y = z = 0.0 rx = ry = rz = 0.0 rw = 1.0 bx = by = bz = 0.001 brx = bry = brz = 0.1 # Generate a set of random coords in the arm workarea approx NUM_COORDS = 200 rand_coords = [] for _ in range(NUM_COORDS): x = random() * 0.5 y = random() * 0.6 + -0.3 z = random() * 0.7 + -0.35 rand_coords.append((x, y, z)) # Check some random coords with fixed orientation avg_time = 0.0 num_solutions_found = 0 for x, y, z in rand_coords: ini_t = time.time() sol = ik_solver.CartToJnt(qinit, x, y, z, rx, ry, rz, rw, bx, by, bz, brx, bry, brz) fin_t = time.time() call_time = fin_t - ini_t # print "IK call took: " + str(call_time) avg_time += call_time if sol: # print "X, Y, Z: " + str( (x, y, z) ) print "SOL: " + str(sol) num_solutions_found += 1 avg_time = avg_time / NUM_COORDS print print "Found " + str(num_solutions_found) + " of 200 random coords" print "Average IK call time: " + str(avg_time) print # Check if orientation bounds work avg_time = 0.0 num_solutions_found = 0 brx = bry = brz = 9999.0 # We don't care about orientation for x, y, z in rand_coords: ini_t = time.time() sol = ik_solver.CartToJnt(qinit, x, y, z, rx, ry, rz, rw, bx, by, bz, brx, bry, brz) fin_t = time.time() call_time = fin_t - ini_t # print "IK call took: " + str(call_time) avg_time += call_time if sol: # print "X, Y, Z: " + str( (x, y, z) ) # print "SOL: " + str(sol) num_solutions_found += 1 avg_time = avg_time / NUM_COORDS print print "Found " + str(num_solutions_found) + " of 200 random coords ignoring orientation" print "Average IK call time: " + str(avg_time) print # Check if big position and orientation bounds work avg_time = 0.0 num_solutions_found = 0 bx = by = bz = 9999.0 brx = bry = brz = 9999.0 for x, y, z in rand_coords: ini_t = time.time() sol = ik_solver.CartToJnt(qinit, x, y, z, rx, ry, rz, rw, bx, by, bz, brx, bry, brz) fin_t = time.time() call_time = fin_t - ini_t # print "IK call took: " + str(call_time) avg_time += call_time if sol: # print "X, Y, Z: " + str( (x, y, z) ) # print "SOL: " + str(sol) num_solutions_found += 1 avg_time = avg_time / NUM_COORDS print print "Found " + str(num_solutions_found) + " of 200 random coords ignoring everything" print "Average IK call time: " + str(avg_time) print
class IK(object): def __init__(self, base_link, tip_link, timeout=0.005, epsilon=1e-5, solve_type="Speed", urdf_string=None): """ Create a TRAC_IK instance and keep track of it. :param str base_link: Starting link of the chain. :param str tip_link: Last link of the chain. :param float timeout: Timeout in seconds for the IK calls. :param float epsilon: Error epsilon. :param solve_type str: Type of solver, can be: Speed (default), Distance, Manipulation1, Manipulation2 :param urdf_string str: Optional arg, if not given URDF is taken from the param server at /robot_description. """ if urdf_string is None: raise NotImplementedError("Can't load urdf_string") #urdf_string = rospy.get_param('/robot_description') self._urdf_string = urdf_string self._timeout = timeout self._epsilon = epsilon self._solve_type = solve_type self.base_link = base_link self.tip_link = tip_link self._ik_solver = TRAC_IK(self.base_link, self.tip_link, self._urdf_string, self._timeout, self._epsilon, self._solve_type) self.number_of_joints = self._ik_solver.getNrOfJointsInChain() self.joint_names = self._ik_solver.getJointNamesInChain( self._urdf_string) self.link_names = self._ik_solver.getLinkNamesInChain() def get_ik(self, qinit, x, y, z, rx, ry, rz, rw, bx=1e-5, by=1e-5, bz=1e-5, brx=1e-3, bry=1e-3, brz=1e-3): """ Do the IK call. :param list of float qinit: Initial status of the joints as seed. :param float x: X coordinates in base_frame. :param float y: Y coordinates in base_frame. :param float z: Z coordinates in base_frame. :param float rx: X quaternion coordinate. :param float ry: Y quaternion coordinate. :param float rz: Z quaternion coordinate. :param float rw: W quaternion coordinate. :param float bx: X allowed bound. :param float by: Y allowed bound. :param float bz: Z allowed bound. :param float brx: rotation over X allowed bound. :param float bry: rotation over Y allowed bound. :param float brz: rotation over Z allowed bound. :return: joint values or None if no solution found. :rtype: tuple of float. """ if len(qinit) != self.number_of_joints: raise Exception( "qinit has length %i and it should have length %i" % (len(qinit), self.number_of_joints)) solution = self._ik_solver.CartToJnt(qinit, x, y, z, rx, ry, rz, rw, bx, by, bz, brx, bry, brz) if solution: return solution else: return None def get_joint_limits(self): """ Return lower bound limits and upper bound limits for all the joints in the order of the joint names. """ lb = self._ik_solver.getLowerBoundLimits() ub = self._ik_solver.getUpperBoundLimits() return lb, ub def set_joint_limits(self, lower_bounds, upper_bounds): """ Set joint limits for all the joints. :arg list lower_bounds: List of float of the lower bound limits for all joints. :arg list upper_bounds: List of float of the upper bound limits for all joints. """ if len(lower_bounds) != self.number_of_joints: raise Exception( "lower_bounds array size mismatch, it's size %i, should be %i" % (len(lower_bounds), self.number_of_joints)) if len(upper_bounds) != self.number_of_joints: raise Exception( "upper_bounds array size mismatch, it's size %i, should be %i" % (len(upper_bounds), self.number_of_joints)) self._ik_solver.setKDLLimits(lower_bounds, upper_bounds)
from trac_ik_python.trac_ik_wrap import TRAC_IK import rospy from numpy.random import random import time if __name__ == '__main__': # roslaunch pr2_description upload_pr2.launch # Needed beforehand urdf = rospy.get_param('/robot_description') # params of constructor: # base_link, tip_link, urdf_string, timeout, epsilon, solve_type="Speed" # solve_type can be: Distance, Speed, Manipulation1, Manipulation2 ik_solver = TRAC_IK( "torso_lift_link", "r_wrist_roll_link", urdf, 0.005, # default seconds 1e-5, # default epsilon "Speed") print("Number of joints:") print(ik_solver.getNrOfJointsInChain()) print("Joint names:") print(ik_solver.getJointNamesInChain(urdf)) print("Link names:") print(ik_solver.getLinkNamesInChain()) qinit = [0.] * 7 x = y = z = 0.0 rx = ry = rz = 0.0 rw = 1.0 bx = by = bz = 0.001
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle): """cts env, 6dim state space: relative state space position of gripper, (block-gripper) and (target-block) random restarts for block and target on the table reward function: - 1(not reaching) actions: (delta_x, delta_y, delta_z, gap) 5cm push starting state: (0.63, 0.2, 0.59, 0.27, 0.55, 0.3) max_num_steps = 50 """ def __init__(self, max_len=10): dirname = os.path.dirname(os.path.abspath(__file__)) mujoco_env.MujocoEnv.__init__( self, os.path.join( dirname, "mjc/baxter_orient_left_cts_with_grippers_modified.xml"), 1) utils.EzPickle.__init__(self) ## mujoco things # task space action space low = np.array([-1., -1., -1., -1.]) high = np.array([1., 1., 1., 1.]) self.action_space = spaces.Box(low, high) self.tuck_pose = { 'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50]) } self.start_pose = { 'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52]) } ## starting pose self.init_qpos = self.data.qpos.copy().flatten() self.init_qpos[1:8] = np.array(self.start_pose["left"]).T ## ik setup urdf_filename = osp.join(dirname, "urdf", "baxter_modified.urdf") with open(urdf_filename) as f: urdf = f.read() # mode; Speed, Distance, Manipulation1, Manipulation2 self.ik_solver = TRAC_IK( "base", "left_gripper", urdf, 0.005, # default seconds 1e-5, # default epsilon "Speed") self.old_state = np.zeros((9, )) self.max_num_steps = max_len print("INIT DONE!") ## gym methods def reset_model(self): # print("last state:",self.old_state) # print("New Episode!") reset_state = self.np_random.uniform() > 0.5 if reset_state: grasped_qpos = np.array([ 0., 1.85833336e-01, 1.13869066e-01, -1.57743078e+00, 1.87249089e+00, 1.67964818e+00, 1.57880024e+00, 2.23699321e+00, 2.68245581e-02, -2.46668516e-02, 6.09046750e-01, 2.73356216e-01, 2.50803949e-03, 9.99880925e-01, -1.19302114e-02, 9.78048682e-03, 3.85171977e-04, 6.09046750e-01 - 0.55, 2.73356216e-01 - 0.3 ]) # 3.85171977e-04 ,-1.21567676e-01 , 2.00143005e-01]) ## random target location # grasped_qpos[-3:-1] = grasped_qpos[-3:-1] + self.np_random.uniform(low=-0.15, high=0.15, size=2) # grasped_qpos[-1] = grasped_qpos[-1] + self.np_random.uniform(low=0.1, high=0.3, size=1) qvel = self.init_qvel self.set_state(grasped_qpos, qvel) else: qpos = self.init_qpos + self.np_random.uniform( low=-.002, high=.002, size=self.model.nq) qvel = self.init_qvel + self.np_random.uniform( low=-.002, high=.002, size=self.model.nv) ## random box location qpos[10:12] = qpos[10:12] + self.np_random.uniform( low=-0.15, high=0.15, size=2) ## random target location qpos[-2:] = qpos[10:12] - np.array([0.55, 0.3]) # qpos[-3:-1] = qpos[-3:-1] + self.np_random.uniform(low=-0.15, high=0.15, size=2) # qpos[-1] = qpos[-1] + self.np_random.uniform(low=0.1, high=0.3, size=1) self.set_state(qpos, qvel) target_pos = np.array( list(qpos[10:12] + self.np_random.uniform(low=-0.05, high=0.05, size=2)) + [0.1]) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if action_jt_space is not None: self.apply_action(action_jt_space) self.close_gripper(gap=1) ## for calculating velocities # self.old_state = np.zeros((6,)) self.num_step = 0 ob = self._get_obs() gripper_pose = ob[:3] box_pose = ob[3:6] target_pose = ob[6:9] relative_ob = np.concatenate( [gripper_pose, box_pose - gripper_pose, target_pose - box_pose]) return relative_ob def viewer_setup(self): # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.]) cam_pos = np.array([1.0, 0.0, 0.7, 0.5, -45, 180]) self.set_cam_position(self.viewer, cam_pos) def _get_obs(self): ee_x, ee_y, ee_z = self.data.site_xpos[0][:3] box_x, box_y, box_z = self.data.site_xpos[3][:3] target_x, target_y, target_z = self.data.site_xpos[4][:3] state = np.array([ ee_x, ee_y, ee_z, box_x, box_y, box_z, target_x, target_y, target_z ]) vel = (state - self.old_state) / self.dt self.old_state = state.copy() return state def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) self.model.data.qpos = qpos self.model.data.qvel = qvel self.model._compute_subtree() #pylint: disable=W0212 self.model.forward() ## my methods def apply_action(self, action): # print("cmd",action) # print("curr jt pos", self.data.qpos[1:8].T) ctrl = self.data.ctrl.copy() # print(ctrl.shape) ctrl[:7, 0] = np.array(action) self.data.ctrl = ctrl self.do_simulation(ctrl, 1000) # print("next jt pos", self.data.qpos[1:8].T) def do_ik(self, ee_target, jt_pos): # print("starting to do ik") # print(ee_target[:3]) # Populate seed with current angles if not provided init_pose_trac = tracik.DoubleVector() for i in range(7): init_pose_trac.push_back(jt_pos[i]) x, y, z, qx, qy, qz, qw = ee_target qout = list( self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw)) if (len(qout) > 0): # print("ik sol:",qout) return qout else: print("!no result found") return None def close_gripper(self, gap=0): # print("before grip location", self.data.site_xpos[0]) # qpos = self.data.qpos.copy().flatten() # qpos[8] = (gap+1)*0.020833 # qpos[9] = -(gap+1)*0.020833 # qvel = self.data.qvel.copy().flatten() # print(qpos.shape, qvel.shape) # self.set_state(qpos, qvel) # print("after grip location", self.data.site_xpos[0]) # print("before grip", self.data.ctrl) ctrl = self.data.ctrl.copy() ctrl[:7, 0] = self.data.qpos[1:8].flatten() ctrl[7, 0] = (gap + 1) * 0.020833 ctrl[8, 0] = -(gap + 1) * 0.020833 self.data.ctrl = ctrl self.do_simulation(ctrl, 1000) # print("qpos", self.data.qpos[1:8].T) # print("before grip", self.data.ctrl) def set_cam_position(self, viewer, cam_pos): for i in range(3): viewer.cam.lookat[i] = cam_pos[i] viewer.cam.distance = cam_pos[3] viewer.cam.elevation = cam_pos[4] viewer.cam.azimuth = cam_pos[5] viewer.cam.trackbodyid = -1 def _step(self, action): ## hack for the init of mujoco.env if (action.shape[0] > 4): return np.zeros((9, 1)), 0, False, {} self.num_step += 1 old_action_jt_space = self.data.qpos[1:8].T.copy() ## parsing of primitive actions delta_x, delta_y, delta_z, gap = action # print("grip prev controller:",self.data.qpos[1:8].T) self.close_gripper(gap) # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y)) x, y, z = self.old_state[:3].copy() # print("old x:%.4f, y:%.4f, z:%.4f"%(x,y,z)) curr_out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) or ( z < -0.05 or z > 0.5) x += delta_x * 0.05 y += delta_y * 0.05 z += delta_z * 0.05 # print("x:%.4f, y:%.4f, z:%.4f"%(x,y,z)) # print("prev controller:",self.data.qpos[1:8].T) # print("x:%.4f,y:%.4f"%(0.2*x + 0.6 , 0.3*y + 0.3)) out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) or (z < -0.05 or z > 0.5) if np.abs(delta_x * 0.05) > 0.0001 or np.abs( delta_y * 0.05) > 0.0001 or np.abs(delta_z * 0.05) > 0.0001: target_pos = np.array([x, y, z]) target_quat = np.array([1.0, 0.0, 0.0, 0]) target = np.concatenate((target_pos, target_quat)) action_jt_space = self.do_ik(ee_target=target, jt_pos=self.data.qpos[1:8].flat) if (action_jt_space is not None) and (not out_of_bound): # print("ik:", action_jt_space) self.apply_action(action_jt_space) else: action_jt_space = old_action_jt_space.copy() else: action_jt_space = old_action_jt_space.copy() # print("controller:",self.data.qpos[1:8].T) ## getting state ob = self._get_obs() gripper_pose = ob[:3] box_pose = ob[3:6] target_pose = ob[6:9] #print("new gripper_pose", gripper_pose, "block pose:", box_pose) ## reward function definition reward_reaching_goal = np.linalg.norm( box_pose - target_pose) < 0.05 #assume: my robot has 5cm error total_reward = -1 * (not reward_reaching_goal) box_x, box_y, box_z = self.data.site_xpos[3] info = {} if reward_reaching_goal == 1: done = True info["done"] = "goal reached" elif box_z < -0.02 or box_x < 0.4 or box_x > 0.8 or box_y < 0.0 or box_y > 0.6: done = True info["done"] = "box out of bounds" total_reward -= (self.max_num_steps - self.num_step) + 5 elif curr_out_of_bound: ## simulation got weird done = True info["done"] = "simulation unstable" total_reward -= (self.max_num_steps - self.num_step) + 5 elif (self.num_step > self.max_num_steps): done = True info["done"] = "max_steps_reached" else: done = False info['absolute_ob'] = ob.copy() relative_ob = np.concatenate( [gripper_pose, box_pose - gripper_pose, target_pose - box_pose]) return relative_ob, total_reward, done, info def apply_hindsight(self, states, actions, goal_state): '''generates hindsight rollout based on the goal ''' goal = states[-1][3:6] + states[ -1][:3] ## this is the absolute goal location her_states, her_rewards = [], [] for i in range(len(actions)): state = states[i] gripper_pose = state[:3] box_pose = state[3:6] + gripper_pose target_pose = state[6:9] + box_pose state[-3:] = goal.copy() - box_pose reward = self.calc_reward(state, goal, actions[i]) her_states.append(state) her_rewards.append(reward) goal_state[-3:] = np.array([0., 0., 0.]) her_states.append(goal_state) return her_states, her_rewards def calc_reward(self, state, goal, action): gripper_pose = state[:3] box_pose = state[3:6] + gripper_pose target_pose = goal + box_pose ## reward function definition reward_reaching_goal = np.linalg.norm(box_pose - target_pose) < 0.05 total_reward = -1 * (not reward_reaching_goal) return total_reward