예제 #1
0
    def __init__(self,
                 base_link,
                 tip_link,
                 timeout=0.005,
                 epsilon=1e-5,
                 solve_type="Speed",
                 urdf_string=None):
        """
        Create a TRAC_IK instance and keep track of it.

        :param str base_link: Starting link of the chain.
        :param str tip_link: Last link of the chain.
        :param float timeout: Timeout in seconds for the IK calls.
        :param float epsilon: Error epsilon.
        :param solve_type str: Type of solver, can be:
            Speed (default), Distance, Manipulation1, Manipulation2
        :param urdf_string str: Optional arg, if not given URDF is taken from
            the param server at /robot_description.
        """
        if urdf_string is None:
            raise NotImplementedError("Can't load urdf_string")
            #urdf_string = rospy.get_param('/robot_description')
        self._urdf_string = urdf_string
        self._timeout = timeout
        self._epsilon = epsilon
        self._solve_type = solve_type
        self.base_link = base_link
        self.tip_link = tip_link
        self._ik_solver = TRAC_IK(self.base_link, self.tip_link,
                                  self._urdf_string, self._timeout,
                                  self._epsilon, self._solve_type)
        self.number_of_joints = self._ik_solver.getNrOfJointsInChain()
        self.joint_names = self._ik_solver.getJointNamesInChain(
            self._urdf_string)
        self.link_names = self._ik_solver.getLinkNamesInChain()
예제 #2
0
    def _setup(self):
        # set up chebychev points
        self.resolution = 100
        self.duration = 0.8

        # load in ros parameters
        self.baselink = rospy.get_param("blue_hardware/baselink")
        self.endlink = rospy.get_param("blue_hardware/endlink")
        urdf = rospy.get_param("/robot_description")
        flag, self.tree = kdl_parser.treeFromString(urdf)

        # build kinematic chain and fk and jacobian solvers
        chain_ee = self.tree.getChain(self.baselink, self.endlink)
        self.fk_ee = kdl.ChainFkSolverPos_recursive(chain_ee)
        self.jac_ee = kdl.ChainJntToJacSolver(chain_ee)

        # building robot joint state
        self.num_joints = chain_ee.getNrOfJoints()

        self.joint_names = rospy.get_param("blue_hardware/joint_names")
        self.joint_names = self.joint_names[:self.num_joints]
        if self.debug:
          rospy.loginfo(self.joint_names)

        self.joints = kdl.JntArray(self.num_joints)

        # todo make seperate in abstract class
        self.ik = TRAC_IK(self.baselink,
                     self.endlink,
                     urdf,
                     0.005,
                     5e-5,
                    "Distance")
예제 #3
0
파일: arm.py 프로젝트: gustavollps/youbot
 def __init__(self, gripper_pub, joints_pub, urdf):
     self.joints_pub = joints_pub
     self.gripper_pub = gripper_pub
     self.ik_solver = TRAC_IK(
         "base_link",
         "tool",
         urdf,
         0.1,  # default seconds
         1e-5,  # default epsilon
         "Speed")
    def __init__(self, max_len=50, near_init=False, fixed_obj_init=False):
        dirname = os.path.dirname(os.path.abspath(__file__))
        print(dirname)
        mujoco_env.MujocoEnv.__init__(
            self, os.path.join(dirname,
                               "mjc/baxter_orient_left_cts_rotate.xml"), 1)
        self.near_init = near_init
        self.fixed_obj_init = fixed_obj_init

        utils.EzPickle.__init__(self)

        ## mujoco things
        # task space action space

        low = np.array([-1., -1.])
        high = np.array([1., 1.])

        self.action_space = spaces.Box(low, high)

        self.tuck_pose = {
            'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50])
        }

        self.start_pose = {
            'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52])
        }

        ## starting pose
        self.init_qpos = self.data.qpos.copy().flatten()
        self.init_qpos[1:8] = np.array(self.start_pose["left"]).T

        ## ik setup
        urdf_filename = osp.join(dirname, "urdf", "baxter.urdf")

        with open(urdf_filename) as f:
            urdf = f.read()

        # mode; Speed, Distance, Manipulation1, Manipulation2
        self.ik_solver = TRAC_IK(
            "base",
            "left_gripper",
            urdf,
            0.005,  # default seconds
            1e-5,  # default epsilon
            "Speed")

        self.old_state = np.zeros((6, ))
        self.max_num_steps = max_len
        print("INIT DONE!")
예제 #5
0
파일: arm.py 프로젝트: gustavollps/youbot
class Arm:
    gripper_pub = None
    joints_sub = None
    joints_pub = None
    bx = by = bz = 0.001
    brx = bry = brz = 0.001
    kdl_kin = None

    ik_solver = None

    def __init__(self, gripper_pub, joints_pub, urdf):
        self.joints_pub = joints_pub
        self.gripper_pub = gripper_pub
        self.ik_solver = TRAC_IK(
            "base_link",
            "tool",
            urdf,
            0.1,  # default seconds
            1e-5,  # default epsilon
            "Speed")
        # print("Number of joints:")
        # print(self.ik_solver.getNrOfJointsInChain())
        # print("Joint names:")
        # print(self.ik_solver.getJointNamesInChain(urdf))
        # print("Link names:")
        # print(self.ik_solver.getLinkNamesInChain())

    def gripperClose(self, bool):
        msg = Bool()
        msg.data = bool
        self.gripper_pub.publish(msg)

    def solve(self, pos, quat, trans, rotat, q0):
        rotation = euler_from_quaternion(quat)
        # print(trans, dist_3d(pos, trans), dist_3d(rotation, rotat))
        factor = 5
        if trans is not None and (
                abs(pos[0] - trans[0]) > factor * self.bx
                or abs(pos[1] - trans[1]) > factor * self.by
                or abs(pos[2] - trans[2]) > factor * self.bz
                or abs(rotation[0] - rotat[0]) > factor * self.brx
                or abs(rotation[1] - rotat[1]) > factor * self.bry
                or abs(rotation[2] - rotat[2]) > factor * self.brz):
            sol = self.ik_solver.CartToJnt(q0, pos[0], pos[1], pos[2], quat[0],
                                           quat[1], quat[2], quat[3], self.bx,
                                           self.by, self.bz, self.brx,
                                           self.bry, self.brz)
            msg_cmd = Float64MultiArray()
            msg_cmd.data = sol
            if len(sol) != 0:
                self.joints_pub.publish(msg_cmd)
            else:
                # print('fail')
                return True, None
            # print('pub', sol)
            return True, sol
        else:
            return False, None
    def __init__(self):
        # load in robot kinematic information
        self.baselink = rospy.get_param("blue_hardware/baselink")
        self.endlink = rospy.get_param("blue_hardware/endlink")
        self.posture_target = rospy.get_param("blue_hardware/posture_target")
        urdf = rospy.get_param("/robot_description")

        # set up tf2 for transformation lookups
        self.tf_buffer = tf2_ros.Buffer(
            rospy.Duration(1.0))  # tf buffer length
        tf_listener = tf2_ros.TransformListener(self.tf_buffer)

        # build trac-ik solver
        self.ik = TRAC_IK(self.baselink, self.endlink, urdf, 0.01, 5e-4,
                          "Distance")
        # "Manipulation2")
        # "Manipulation1")

        rospy.Service('inverse_kinematics', InverseKinematics,
                      self.handle_ik_request)
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    """cts env, 6dim
    state space: relative state space position of gripper, (block-gripper) and (target-block)
    random restarts for block and target on the table
    reward function: - 1(not reaching)
    actions: (delta_x, delta_y) 5cm push
    starting state: (0.63, 0.2, 0.59, 0.27, 0.55, 0.3)
    max_num_steps = 50
    """
    def __init__(self, max_len=50, near_init=False, fixed_obj_init=False):
        dirname = os.path.dirname(os.path.abspath(__file__))
        print(dirname)
        mujoco_env.MujocoEnv.__init__(
            self, os.path.join(dirname,
                               "mjc/baxter_orient_left_cts_rotate.xml"), 1)
        self.near_init = near_init
        self.fixed_obj_init = fixed_obj_init

        utils.EzPickle.__init__(self)

        ## mujoco things
        # task space action space

        low = np.array([-1., -1.])
        high = np.array([1., 1.])

        self.action_space = spaces.Box(low, high)

        self.tuck_pose = {
            'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50])
        }

        self.start_pose = {
            'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52])
        }

        ## starting pose
        self.init_qpos = self.data.qpos.copy().flatten()
        self.init_qpos[1:8] = np.array(self.start_pose["left"]).T

        ## ik setup
        urdf_filename = osp.join(dirname, "urdf", "baxter.urdf")

        with open(urdf_filename) as f:
            urdf = f.read()

        # mode; Speed, Distance, Manipulation1, Manipulation2
        self.ik_solver = TRAC_IK(
            "base",
            "left_gripper",
            urdf,
            0.005,  # default seconds
            1e-5,  # default epsilon
            "Speed")

        self.old_state = np.zeros((6, ))
        self.max_num_steps = max_len
        print("INIT DONE!")

    ## gym methods

    def reset_model(self):

        target_pos = np.array([0.62, 0.3, 0.1])

        if self.near_init:

            # print("last state:",self.old_state)
            # print("New Episode!")

            box_size = 0.1
            half_size = box_size / 2.0
            min_box_gripper_dist = 0.03  #the box is 3 cm x 3 cm so this seems reasonable

            while 1:
                qpos = self.init_qpos + self.np_random.uniform(
                    low=-.005, high=.005, size=self.model.model.nq)
                qvel = self.init_qvel + self.np_random.uniform(
                    low=-.005, high=.005, size=self.model.model.nv)

                ## random target location
                qpos[-2:] = qpos[-2:] + self.np_random.uniform(
                    low=-box_size, high=box_size, size=2)
                ## random box location
                # qpos[8:10] = qpos[8:10] + self.np_random.uniform(low=-box_size, high=box_size, size=2)
                qpos[8:10] = np.array([0.5, 0.35])

                box_gripper_dist = np.linalg.norm(target_pos[:2] - qpos[8:10])

                if box_gripper_dist > min_box_gripper_dist:
                    break

        else:
            # print("last state:",self.old_state)
            # print("New Episode!")
            qpos = self.init_qpos + self.np_random.uniform(
                low=-.005, high=.005, size=self.model.model.nq)
            qvel = self.init_qvel + self.np_random.uniform(
                low=-.005, high=.005, size=self.model.model.nv)
            ## random target location
            qpos[-2:] = qpos[-2:] + self.np_random.uniform(
                low=-0.15, high=0.15, size=2)
            ## random box location
            qpos[8:10] = qpos[
                8:10]  #+ self.np_random.uniform(low=-0.15, high=0.15, size=2)

        if self.fixed_obj_init:
            qpos[8:10] = self.init_qpos[8:10]

        self.set_state(qpos, qvel)

        target_quat = np.array([1.0, 0.0, 0.0, 0])
        target = np.concatenate((target_pos, target_quat))
        action_jt_space = self.do_ik(ee_target=target,
                                     jt_pos=self.data.qpos[1:8].flat)
        if action_jt_space is not None:
            self.apply_action(action_jt_space)
        ## for calculating velocities
        # self.old_state = np.zeros((6,))
        self.contacted = False
        self.out_of_bound = 0
        self.num_step = 0

        ob = self._get_obs()
        gripper_pose = ob[:2]
        box_pose = ob[2:4]
        target_pose = ob[4:6]

        relative_ob = np.concatenate(
            [gripper_pose, box_pose - gripper_pose, target_pose - box_pose])
        return relative_ob

    def viewer_setup(self):
        # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.])
        cam_pos = np.array([1.0, 0.0, 0.7, 0.5, -45, 180])
        self.set_cam_position(cam_pos)

    def _get_obs(self):
        ee_x, ee_y = self.data.site_xpos[0][:2]
        box_x, box_y = self.data.site_xpos[1][:2]
        target_x, target_y = self.data.site_xpos[2][:2]

        state = np.array([ee_x, ee_y, box_x, box_y, target_x, target_y])
        vel = (state - self.old_state) / self.dt

        self.old_state = state.copy()
        return state

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.model.nq, ) and qvel.shape == (
            self.model.model.nv, )
        self.model.data.qpos[:] = qpos
        self.model.data.qvel[:] = qvel
        # self.model._compute_subtree() #pylint: disable=W0212
        self.model.forward()

    ## my methods
    def apply_action(self, action):
        ctrl = self.data.ctrl.copy()
        # print(ctrl.shape)
        ctrl[:7] = np.array(action)
        self.data.ctrl[:] = ctrl
        self.do_simulation(ctrl, 1000)

    def do_ik(self, ee_target, jt_pos):

        # print("starting to do ik")
        # print(ee_target[:3])

        # Populate seed with current angles if not provided
        init_pose_trac = tracik.DoubleVector()
        for i in range(7):
            init_pose_trac.push_back(jt_pos[i])

        x, y, z, qx, qy, qz, qw = ee_target
        qout = list(
            self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw))

        if (len(qout) > 0):
            # print("ik sol:",qout)
            return qout
        else:
            #print("!no result found")
            return None

    def close_gripper(self, left_gap=0):
        pass

    def _step(self, action):

        ## hack for the init of mujoco.env
        if (action.shape[0] > 2):
            return np.zeros((6, 1)), 0, False, {}

        self.num_step += 1
        old_action_jt_space = self.data.qpos[1:8].T.copy()

        ## parsing of primitive actions
        delta_x, delta_y = action
        # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y))
        x, y = self.old_state[:2].copy()
        # print("old x:%.4f, y:%.4f"%(x,y))
        x += delta_x * 0.05
        y += delta_y * 0.05
        # print("x:%.4f, y:%.4f"%(x,y))
        # print("x:%.4f,y:%.4f"%(0.2*x + 0.6 , 0.3*y + 0.3))

        out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6)

        if np.abs(delta_x * 0.05) > 0.0001 or np.abs(delta_y * 0.05) > 0.0001:
            target_pos = np.array([x, y, 0.15])
            target_quat = np.array([1.0, 0.0, 0.0, 0])
            target = np.concatenate((target_pos, target_quat))
            action_jt_space = self.do_ik(ee_target=target,
                                         jt_pos=self.data.qpos[1:8].flat)
            if (action_jt_space is not None) and (not out_of_bound):
                #print('running ik')
                # print("ik:", action_jt_space)
                self.apply_action(action_jt_space)
            else:
                #print("action is out of bounds")
                action_jt_space = old_action_jt_space.copy()

        else:
            #print("action too small to run ik")
            action_jt_space = old_action_jt_space.copy()

        # print("controller:",self.data.qpos[1:8].T)
        ## getting state
        ob = self._get_obs()
        gripper_pose = ob[:2]
        box_pose = ob[2:4]
        target_pose = ob[4:6]
        #print("new gripper_pose", gripper_pose, "block pose:", box_pose)

        ## reward function definition
        reward_reaching_goal = np.linalg.norm(
            box_pose - target_pose) < 0.05  #assume: my robot has 5cm error
        total_reward = -1 * (not reward_reaching_goal)

        box_x, box_y, box_z = self.data.site_xpos[1]

        info = {}
        if reward_reaching_goal == 1:
            done = True
            info["done"] = "goal reached"
        elif box_z < -0.02 or box_x < 0.4 or box_x > 0.8 or box_y < 0.0 or box_y > 0.6:
            done = True
            info["done"] = "box out of bounds"
            total_reward -= (self.max_num_steps - self.num_step) + 5
        elif (self.num_step > self.max_num_steps):
            done = True
            info["done"] = "max_steps_reached"
        else:
            done = False

        info['absolute_ob'] = ob.copy()

        relative_ob = np.concatenate(
            [gripper_pose, box_pose - gripper_pose, target_pose - box_pose])
        return relative_ob, total_reward, done, info

    def apply_hindsight(self, states, actions, goal_state):
        '''generates hindsight rollout based on the goal
        '''
        #goal = "box (absolute)" + "gripper" (at last time-step)
        goal = states[-1][2:4] + states[
            -1][:2]  ## this is the absolute goal location
        her_states, her_rewards = [], []
        for i in range(len(actions)):
            state = states[i]

            gripper_pose = state[:2]
            box_pose = state[2:4] + gripper_pose
            target_pose = state[4:6] + box_pose

            state[-2:] = goal.copy() - box_pose
            reward = self.calc_reward(state, goal, actions[i])
            her_states.append(state)
            her_rewards.append(reward)

        goal_state[-2:] = np.array([0., 0.])
        her_states.append(goal_state)

        return her_states, her_rewards

    def calc_reward(self, state, goal, action):

        ## parsing of primitive actions
        delta_x, delta_y = action

        x, y = self.old_state[:2].copy()
        x += delta_x * 0.05
        y += delta_y * 0.05

        out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6)

        gripper_pose = state[:2]
        box_pose = state[2:4] + gripper_pose
        target_pose = state[-2:] + box_pose

        ## reward function definition
        reward_reaching_goal = np.linalg.norm(box_pose - target_pose) < 0.05
        total_reward = -1 * (not reward_reaching_goal)
        return total_reward
예제 #8
0
class BlueIK:
    def _setup(self):
        # set up chebychev points
        self.resolution = 100
        self.duration = 0.8

        # load in ros parameters
        self.baselink = rospy.get_param("blue_hardware/baselink")
        self.endlink = rospy.get_param("blue_hardware/endlink")
        urdf = rospy.get_param("/robot_description")
        flag, self.tree = kdl_parser.treeFromString(urdf)

        # build kinematic chain and fk and jacobian solvers
        chain_ee = self.tree.getChain(self.baselink, self.endlink)
        self.fk_ee = kdl.ChainFkSolverPos_recursive(chain_ee)
        self.jac_ee = kdl.ChainJntToJacSolver(chain_ee)

        # building robot joint state
        self.num_joints = chain_ee.getNrOfJoints()

        self.joint_names = rospy.get_param("blue_hardware/joint_names")
        self.joint_names = self.joint_names[:self.num_joints]
        if self.debug:
          rospy.loginfo(self.joint_names)

        self.joints = kdl.JntArray(self.num_joints)

        # todo make seperate in abstract class
        self.ik = TRAC_IK(self.baselink,
                     self.endlink,
                     urdf,
                     0.005,
                     5e-5,
                    "Distance")
                    # "Manipulation2")
                    # "Manipulation1")

    def ik_sol(self, target, joints):
        # target is a target pose object
        # joints is the starting joint seed
        seed_state = []
        for j in joints:
            seed_state.append(j)

        rospy.logerr("TARGET")
        rospy.logerr(target)
        result = self.ik.CartToJnt(seed_state,
                               target.position.x, target.position.y, target.position.z,
                               target.orientation.x, target.orientation.y, target.orientation.z, target.orientation.w)
        if not len(result) == self.num_joints:
            return
        self.command_to_joint_state(result)

    def command_to_joint_state(self, joints):
        start_pos = []
        joint_msg = rospy.wait_for_message("/joint_states", JointState)
        for i, n in enumerate(self.joint_names):
            index = joint_msg.name.index(n)
            start_pos.append(joint_msg.position[index])
        rospy.logerr(start_pos)
        start_pos = np.array(start_pos)
        end_pos = []
        for j in joints:
            end_pos.append(j)
        end_pos = np.array(end_pos)
        joint_diff = end_pos - start_pos
        max_joint_diff = np.linalg.norm(joint_diff, np.inf)
        for j in range(self.resolution + 1):
            step = start_pos + (end_pos - start_pos) * cheb_points(self.resolution, j)
            self._pub_js_msg(np.array(step))
            rospy.sleep( np.abs(max_joint_diff) * self.duration * 1.0 / self.resolution)

    def _pub_js_msg(self, joints):
        # joints is an array of 7 joint angles
        msg = Float64MultiArray()
        msg.data = joints
        if self.debug:
            pass
            # rospy.logerr("ik result")
        rospy.logerr(joints)
        self.command_pub.publish(msg)
        self.command_pub_ctc.publish(msg)


    #  def update_joints(self, joint_msg):
        #  if self.first:
            # TODO: brent added this and doesn't understand what's going on with first
            #  first = False
            # /brent
            #  return
#
        #  temp_joints = kdl.JntArray(self.num_joints)
        #  for i, n in enumerate(self.joint_names):
            #  index = joint_msg.name.index(n)
            #  temp_joints[i] = joint_msg.position[index]
        #  self.joints = temp_joints

    def __init__(self, debug=False):
        self.debug = debug
        if self.debug:
            self.debug_count = 0

        self._setup()

        self.target_pos = Pose()
        #  self.first = True

        self.command_pub = rospy.Publisher("blue_controllers/joint_position_controller/command", Float64MultiArray, queue_size=1)
        self.command_pub_ctc = rospy.Publisher("blue_controllers/joint_ctc/command", Float64MultiArray, queue_size=1)
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    """cts env, 9dim
    state space: relative state space position of gripper, (block-gripper) and (target-block)
    4 restarts for the gripper with pen in hand and 1 without grasped pen
    reward function: - 1(not reaching)
    actions: (delta_x, delta_y, delta_z, gap) 5cm push
    max_num_steps = 20
    """
    def __init__(self, max_len=50):
        dirname = os.path.dirname(os.path.abspath(__file__))
        mujoco_env.MujocoEnv.__init__(
            self,
            os.path.join(dirname,
                         "mjc/baxter_orient_left_cts_with_grippers_pen.xml"),
            1)
        utils.EzPickle.__init__(self)

        ## mujoco things
        # task space action space

        low = np.array([-1., -1., -1., -1.])
        high = np.array([1., 1., 1., 1.])

        self.action_space = spaces.Box(low, high)

        self.tuck_pose = {
            'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50])
        }

        self.start_pose = {
            'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52])
        }

        ## starting pose
        self.init_qpos = self.data.qpos.copy().flatten()
        self.init_qpos[1:8] = np.array(self.start_pose["left"]).T

        ## ik setup
        urdf_filename = osp.join(dirname, "urdf", "baxter.urdf")

        with open(urdf_filename) as f:
            urdf = f.read()

        # mode; Speed, Distance, Manipulation1, Manipulation2
        self.ik_solver = TRAC_IK(
            "base",
            "left_gripper",
            urdf,
            0.005,  # default seconds
            1e-5,  # default epsilon
            "Speed")

        self.old_state = np.zeros((9, ))
        self.max_num_steps = max_len
        print("INIT DONE!")

    ## gym methods

    def reset_model(self):
        # print("last state:",self.old_state)
        # print("New Episode!")
        reset_state = 1  #self.np_random.choice(2)

        if reset_state:
            grasped_qpos = np.array([
                -1.1937e-03, 2.3933e-01, -2.0445e-02, -1.5192e+00, 1.5246e+00,
                1.5489e+00, 1.5099e+00, 2.6392e+00, 2.0560e-02, -2.0560e-02,
                6.5746e-01, 4.1344e-01, 4.9806e-03, 9.9898e-01, -1.3736e-02,
                4.3098e-02, 1.6292e-03, 1.0000e-01, -1.0000e-01
            ])

            reset_state = self.np_random.choice(4)
            if reset_state == 0:
                pass
            elif reset_state == 1:
                grasped_qpos[-2:] = np.array([0.0, -0.1])
            elif reset_state == 2:
                grasped_qpos[-2:] = np.array([-0.1, 0.2])
            elif reset_state == 3:
                grasped_qpos[-2:] = np.array([0.1, 0.3])
            elif reset_state == 4:
                grasped_qpos[10:13] = np.array([0.55, 0.35, 0.0])

            qvel = self.init_qvel
            self.set_state(grasped_qpos, qvel)

        else:
            qpos = self.init_qpos.copy()

            ## random target location
            qpos[-2:] = qpos[-2:] + self.np_random.uniform(
                low=-0.15, high=0.15, size=2)

            ## random object location
            ## qpos[-2:] is the displacement from 0.55 0.3
            ## random box location
            r = 0.2  #np.random.uniform(low=0.1, high=0.2, size=1)
            theta = np.random.uniform(low=-np.pi, high=np.pi, size=1)
            qpos[10] = 0.55 + qpos[-2] + r * np.cos(theta)
            qpos[11] = 0.3 + qpos[-1] + r * np.sin(theta)

            qvel = self.init_qvel
            self.set_state(qpos, qvel)

        self.num_step = 0
        ob = self._get_obs()
        gripper_pose = ob[:3]
        box_pose = ob[3:6]
        target_pose = ob[6:9]

        relative_ob = np.concatenate(
            [gripper_pose, box_pose - gripper_pose, target_pose - box_pose])
        return relative_ob

    def viewer_setup(self):
        # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.])
        cam_pos = np.array([1.0, 0.0, 0.7, 1.5, -45, 180])
        self.set_cam_position(self.viewer, cam_pos)

    def _get_obs(self):
        ee_x, ee_y, ee_z = self.data.site_xpos[0][:3]
        box_x, box_y, box_z = self.data.site_xpos[3][:3]
        target_x, target_y, target_z = self.data.site_xpos[4][:3]

        state = np.array([
            ee_x, ee_y, ee_z, box_x, box_y, box_z, target_x, target_y, target_z
        ])
        vel = (state - self.old_state) / self.dt

        self.old_state = state.copy()
        return state

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        self.model.data.qpos = qpos
        self.model.data.qvel = qvel
        # self.model._compute_subtree() #pylint: disable=W0212
        self.model.forward()

    ## my methods
    def apply_action(self, action):
        # print("cmd",action)
        # print("curr jt pos", self.data.qpos[1:8].T)
        ctrl = self.data.ctrl.copy()
        # print(ctrl.shape)
        ctrl[:7, 0] = np.array(action)
        self.data.ctrl = ctrl
        self.do_simulation(ctrl, 1000)
        # print("next jt pos", self.data.qpos[1:8].T)

    def do_ik(self, ee_target, jt_pos):

        # print("starting to do ik")
        # print(ee_target[:3])

        # Populate seed with current angles if not provided
        init_pose_trac = tracik.DoubleVector()
        for i in range(7):
            init_pose_trac.push_back(jt_pos[i])

        x, y, z, qx, qy, qz, qw = ee_target
        qout = list(
            self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw))

        if (len(qout) > 0):
            # print("ik sol:",qout)
            return qout
        else:
            print("!no result found")
            return None

    def close_gripper(self, gap=0):
        # print("before grip location", self.data.site_xpos[0])
        # print("gap requested:%.4f"%gap)

        # print("after grip location", self.data.site_xpos[0])
        # print("before grip", self.data.ctrl)
        ctrl = self.data.ctrl.copy()
        ctrl[:7, 0] = self.data.qpos[1:8].flatten()
        ctrl[7, 0] = (gap + 1) * 0.020833
        ctrl[8, 0] = -(gap + 1) * 0.020833
        self.data.ctrl = ctrl.copy()

        # print("qpos gripper", self.data.qpos[8:10].T)
        # print("before grip", self.data.ctrl)

    def set_cam_position(self, viewer, cam_pos):
        for i in range(3):
            viewer.cam.lookat[i] = cam_pos[i]
        viewer.cam.distance = cam_pos[3]
        viewer.cam.elevation = cam_pos[4]
        viewer.cam.azimuth = cam_pos[5]
        viewer.cam.trackbodyid = -1

    def _step(self, action):

        ## hack for the init of mujoco.env
        if (action.shape[0] > 4):
            return np.zeros((9, 1)), 0, False, {}

        self.num_step += 1
        old_action_jt_space = self.data.qpos[1:8].T.copy()

        ## parsing of primitive actions
        delta_x, delta_y, delta_z, gap = action

        # print("gap:%.4f"%gap)
        # print("grip prev controller:",self.data.qpos[1:8].T)
        self.close_gripper(gap)
        # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y))
        x, y, z = self.old_state[:3].copy()
        # print("old x:%.4f, y:%.4f"%(x,y))
        x += delta_x * 0.05
        y += delta_y * 0.05
        z += delta_z * 0.05
        # print("x:%.4f, y:%.4f, z:%.4f"%(x,y,z))
        # print("prev controller:",self.data.qpos[1:8].T)
        # print("x:%.4f,y:%.4f"%(0.2*x + 0.6 , 0.3*y + 0.3))

        out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0
                                                or y > 0.6) or (z < 0.1
                                                                or z > 0.5)

        if np.abs(delta_x * 0.05) > 0.0001 or np.abs(
                delta_y * 0.05) > 0.0001 or np.abs(delta_z * 0.05) > 0.0001:
            target_pos = np.array([x, y, z])
            target_quat = np.array([1.0, 0.0, 0.0, 0])
            target = np.concatenate((target_pos, target_quat))
            action_jt_space = self.do_ik(ee_target=target,
                                         jt_pos=self.data.qpos[1:8].flat)
            if (action_jt_space is not None) and (not out_of_bound):
                # print("ik:", action_jt_space)
                self.apply_action(action_jt_space)
            else:
                action_jt_space = old_action_jt_space.copy()

        else:
            action_jt_space = old_action_jt_space.copy()

        # print("controller:",self.data.qpos[1:8].T)
        ## getting state
        ob = self._get_obs()
        gripper_pose = ob[:3]
        box_pose = ob[3:6]
        target_pose = ob[6:9]
        #print("new gripper_pose", gripper_pose, "block pose:", box_pose)

        ## reward function definition
        # print(np.abs(target_pos[0]-box_pose[0]), np.abs(target_pos[1]-box_pose[1]), np.abs(target_pose[2]-box_pose[2]))
        reward_reaching_goal = (
            np.abs(target_pose[0] - box_pose[0]) <
            0.07) and (np.abs(target_pose[1] - box_pose[1]) <
                       0.07) and (np.abs(target_pose[2] - box_pose[2]) < 0.07)
        total_reward = -1 * (not reward_reaching_goal)

        box_x, box_y, box_z = self.data.site_xpos[3]

        info = {}
        if reward_reaching_goal == 1:
            done = True
            info["done"] = "goal reached"
        elif box_z < -0.02 or box_x < 0.4 or box_x > 0.8 or box_y < 0.0 or box_y > 0.6:
            done = True
            info["done"] = "box out of bounds"
            total_reward -= (self.max_num_steps - self.num_step) + 5
        elif (self.num_step > self.max_num_steps):
            done = True
            info["done"] = "max_steps_reached"
        else:
            done = False

        info['absolute_ob'] = ob.copy()
        relative_ob = np.concatenate(
            [gripper_pose, box_pose - gripper_pose, target_pose - box_pose])
        return relative_ob, total_reward, done, info

    def apply_hindsight(self, states, actions, goal_state):
        '''generates hindsight rollout based on the goal
        '''
        goal = states[-1][3:6] + states[
            -1][:3]  ## this is the absolute goal location
        her_states, her_rewards = [], []
        for i in range(len(actions)):
            state = states[i]

            gripper_pose = state[:3]
            box_pose = state[3:6] + gripper_pose
            target_pose = state[6:9] + box_pose

            state[-3:] = goal.copy() - box_pose
            reward = self.calc_reward(state, goal, actions[i])
            her_states.append(state)
            her_rewards.append(reward)

        goal_state[-3:] = np.array([0., 0., 0.])
        her_states.append(goal_state)

        return her_states, her_rewards

    def calc_reward(self, state, goal, action):

        ## parsing of primitive actions
        delta_x, delta_y, delta_z, gap = action

        x, y, z = self.old_state[:3].copy()
        x += delta_x * 0.05
        y += delta_y * 0.05
        z += delta_z * 0.05

        out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0
                                                or y > 0.6) or (z < 0.1
                                                                or z > 0.5)

        gripper_pose = state[:3]
        box_pose = state[3:6] + gripper_pose
        target_pose = goal + box_pose

        ## reward function definition
        reward_reaching_goal = np.linalg.norm(box_pose - target_pose) < 0.05
        total_reward = -1 * (not reward_reaching_goal)
        return total_reward
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    """cts env, 6dim
    state space: relative state space position of gripper, (block-gripper), euler angle and (target-block)
    random restarts for block and target on the table
    reward function: - 1(not reaching)
    actions: (delta_x, delta_y) 5cm push
    starting state: (0.63, 0.2, 0.59, 0.27, 0, 0, 0, 0.55, 0.3)
    max_num_steps = 50
    """
    def __init__(self):
        dirname = os.path.dirname(os.path.abspath(__file__))
        mujoco_env.MujocoEnv.__init__(
            self, os.path.join(dirname, "mjc/baxter_orient_left_cts.xml"), 1)
        utils.EzPickle.__init__(self)

        ## mujoco things
        # task space action space

        low = np.array([-1., -1.])
        high = np.array([1., 1.])

        self.action_space = spaces.Box(low, high)

        self.tuck_pose = {
            'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50])
        }

        self.start_pose = {
            'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52])
        }

        ## starting pose
        self.init_qpos = self.data.qpos.copy().flatten()
        self.init_qpos[1:8] = np.array(self.start_pose["left"]).T

        ## ik setup
        urdf_filename = osp.join(dirname, "urdf", "baxter.urdf")

        with open(urdf_filename) as f:
            urdf = f.read()

        # mode; Speed, Distance, Manipulation1, Manipulation2
        self.ik_solver = TRAC_IK(
            "base",
            "left_gripper",
            urdf,
            0.005,  # default seconds
            1e-5,  # default epsilon
            "Speed")

        self.old_state = np.zeros((9, ))
        self.max_num_steps = 50
        print("INIT DONE!")

    ## gym methods

    def reset_model(self):
        print("last state:", self.old_state)
        print("New Episode!")
        qpos = self.init_qpos + self.np_random.uniform(
            low=-.005, high=.005, size=self.model.nq)
        qvel = self.init_qvel + self.np_random.uniform(
            low=-.005, high=.005, size=self.model.nv)
        ## random target location
        qpos[-2:] = qpos[-2:] + self.np_random.uniform(
            low=-0.15, high=0.15, size=2)
        ## random box location
        qpos[8:10] = qpos[8:10] + self.np_random.uniform(
            low=-0.15, high=0.15, size=2)

        self.set_state(qpos, qvel)

        target_pos = np.array([0.6, 0.3, 0.15])
        target_quat = np.array([1.0, 0.0, 0.0, 0])
        target = np.concatenate((target_pos, target_quat))
        action_jt_space = self.do_ik(ee_target=target,
                                     jt_pos=self.data.qpos[1:8].flat)
        if action_jt_space is not None:
            self.apply_action(action_jt_space)
        ## for calculating velocities
        # self.old_state = np.zeros((6,))
        self.contacted = False
        self.out_of_bound = 0
        self.num_step = 0
        ob = self._get_obs()
        gripper_pose = ob[:2]
        box_pose = ob[2:4]
        target_pose = ob[4:6]

        relative_ob = np.concatenate(
            [gripper_pose, box_pose - gripper_pose, target_pose - box_pose])
        return relative_ob

    def viewer_setup(self):
        # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.])
        cam_pos = np.array([1.0, 0.0, 0.7, 0.5, -45, 180])
        self.set_cam_position(self.viewer, cam_pos)

    def _get_obs(self):
        ee_x, ee_y = self.data.site_xpos[0][:2]

        box_x, box_y = self.data.site_xpos[1][:2]
        box_orient_x, box_orient_y, box_orient_z = euler_from_matrix(
            self.data.site_xmat[1].reshape((3, 3)), 'rxyz')

        target_x, target_y = self.data.site_xpos[2][:2]

        state = np.array([
            ee_x, ee_y, box_x, box_y, box_orient_x, box_orient_y, box_orient_z,
            target_x, target_y
        ])
        vel = (state - self.old_state) / self.dt

        self.old_state = state.copy()
        return state

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        self.model.data.qpos = qpos
        self.model.data.qvel = qvel
        self.model._compute_subtree()  #pylint: disable=W0212
        self.model.forward()

    ## my methods
    def apply_action(self, action):
        ctrl = self.data.ctrl.copy()
        # print(ctrl.shape)
        ctrl[:7, 0] = np.array(action)
        self.data.ctrl = ctrl
        self.do_simulation(ctrl, 1000)

    def do_ik(self, ee_target, jt_pos):

        # print("starting to do ik")
        # print(ee_target[:3])

        # Populate seed with current angles if not provided
        init_pose_trac = tracik.DoubleVector()
        for i in range(7):
            init_pose_trac.push_back(jt_pos[i])

        x, y, z, qx, qy, qz, qw = ee_target
        qout = list(
            self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw))

        if (len(qout) > 0):
            # print("ik sol:",qout)
            return qout
        else:
            print("!no result found")
            return None

    def close_gripper(self, left_gap=0):
        pass

    def set_cam_position(self, viewer, cam_pos):
        for i in range(3):
            viewer.cam.lookat[i] = cam_pos[i]
        viewer.cam.distance = cam_pos[3]
        viewer.cam.elevation = cam_pos[4]
        viewer.cam.azimuth = cam_pos[5]
        viewer.cam.trackbodyid = -1

    def _step(self, action):

        ## hack for the init of mujoco.env
        if (action.shape[0] > 2):
            return np.zeros((9, 1)), 0, False, {}

        self.num_step += 1
        old_action_jt_space = self.data.qpos[1:8].T.copy()

        ## parsing of primitive actions
        delta_x, delta_y = action
        # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y))
        x, y = self.old_state[:2].copy()
        # print("old x:%.4f, y:%.4f"%(x,y))
        x += delta_x * 0.05
        y += delta_y * 0.05
        # print("x:%.4f, y:%.4f"%(x,y))
        # print("x:%.4f,y:%.4f"%(0.2*x + 0.6 , 0.3*y + 0.3))

        out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6)

        if np.abs(delta_x * 0.05) > 0.0001 or np.abs(delta_y * 0.05) > 0.0001:
            target_pos = np.array([x, y, 0.15])
            target_quat = np.array([1.0, 0.0, 0.0, 0])
            target = np.concatenate((target_pos, target_quat))
            action_jt_space = self.do_ik(ee_target=target,
                                         jt_pos=self.data.qpos[1:8].flat)
            if (action_jt_space is not None) and (not out_of_bound):
                # print("ik:", action_jt_space)
                self.apply_action(action_jt_space)
            else:
                action_jt_space = old_action_jt_space.copy()

        else:
            action_jt_space = old_action_jt_space.copy()

        # print("controller:",self.data.qpos[1:8].T)
        ## getting state
        ob = self._get_obs()
        gripper_pose = ob[:2]
        box_pose = ob[2:4]
        target_pose = ob[-2:]
        #print("new gripper_pose", gripper_pose, "block pose:", box_pose)

        ## reward function definition
        w = [0.1, 1., 0.01, 1., -1e-1, -1e-3, -1]
        # reward_grip_box = np.linalg.norm(old_box_pose- old_gripper_pose) - np.linalg.norm(box_pose- gripper_pose)
        # reward_box_target = np.linalg.norm(old_box_pose- old_target_pose) - np.linalg.norm(box_pose- target_pose)
        reward_grip_box = -np.linalg.norm(box_pose - gripper_pose)
        reward_box_target = -np.linalg.norm(box_pose - target_pose)
        reward_first_contact = (np.linalg.norm(box_pose - gripper_pose) <
                                0.05) and (not self.contacted)

        penalty = 0.
        if out_of_bound:
            self.out_of_bound += 1
            if (x < 0.4): penalty += (x - 0.4)**2
            elif (x > 0.8): penalty += (x - 0.8)**2

            if (y < 0.): penalty += (y - 0.)**2
            elif (y > 0.6): penalty += (y - 0.6)**2

        if (reward_first_contact == 1):
            self.contacted = True
        reward_reaching_goal = np.linalg.norm(
            box_pose - target_pose) < 0.02  #assume: my robot has 2cm error

        # total_reward = w[1]*reward_box_target \
        #                 + w[0]*reward_grip_box   \
        #                 + w[6] * out_of_bound \
        #                 + w[5] * np.square(action).sum()  \
        #                     + w[2]*reward_first_contact \
        #                     + w[3]*reward_reaching_goal
        # + w[4]*np.square(action_jt_space-old_action_jt_space).sum() \

        total_reward = -1 * (not reward_reaching_goal)

        box_x, box_y, box_z = self.data.site_xpos[1]
        info = {
                "r_grip_box": reward_grip_box, \
                "r_box_target" :  reward_box_target, \
                "r_contact" : reward_first_contact,
                # "r_reached" : reward_reaching_goal  \
                }
        if reward_reaching_goal == 1:
            done = True
            info["done"] = "goal reached"
        elif box_z < -0.02 or box_x < 0.4 or box_x > 0.8 or box_y < 0.0 or box_y > 0.6:
            done = True
            info["done"] = "box out of bounds"
        elif (self.num_step > self.max_num_steps):
            done = True
            info["done"] = "max_steps_reached"
        else:
            done = False

        info['absolute_ob'] = ob.copy()
        relative_ob = ob
        relative_ob[-2:] -= relative_ob[2:4]
        relative_ob[2:4] -= relative_ob[:2]

        return relative_ob, total_reward, done, info

    def apply_hindsight(self, states, actions, goal_state):
        '''generates hindsight rollout based on the goal
        '''
        goal = np.array([0., 0.])
        her_states, her_rewards = [], []
        for i in range(len(actions)):
            state = states[i]
            state[-2:] = goal.copy()
            reward = self.calc_reward(state, goal, actions[i])
            her_states.append(state)
            her_rewards.append(reward)

        goal_state[-2:] = goal.copy()
        her_states.append(goal_state)

        return her_states, her_rewards

    def calc_reward(self, state, goal, action):

        ## parsing of primitive actions
        delta_x, delta_y = action

        x, y = self.old_state[:2].copy()
        x += delta_x * 0.05
        y += delta_y * 0.05

        out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6)

        gripper_pose = state[:2]
        box_pose = state[2:4] + gripper_pose
        target_pose = goal + box_pose

        ## reward function definition
        w = [0.1, 1., 0.01, 1., -1e-1, -1e-3, -1]
        reward_grip_box = -np.linalg.norm(box_pose - gripper_pose)
        reward_box_target = -np.linalg.norm(box_pose - target_pose)
        reward_first_contact = (np.linalg.norm(box_pose - gripper_pose) <
                                0.05) and (not self.contacted)
        reward_reaching_goal = np.linalg.norm(
            box_pose - target_pose) < 0.02  #assume: my robot has 2cm error

        # total_reward = w[1]*reward_box_target \
        #                 + w[0]*reward_grip_box   \
        #                 + w[6] * out_of_bound \
        #                 + w[5] * np.square(action).sum()  \
        #                     + w[3]*reward_reaching_goal
        # + w[2]*reward_first_contact \

        total_reward = -1 * (not reward_reaching_goal)

        return total_reward
예제 #11
0
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    """cts env, 6dim
    state space: relative state space position of gripper and (target-gripper)
    random restarts for target on the table
    reward function: - 1(not reaching)
    actions: (delta_x, delta_y, delta_z) 5cm push
    starting state: (0.63, 0.2, 0.55, 0.3)
    """
    def __init__(self, max_len=10):
        dirname = os.path.dirname(os.path.abspath(__file__))
        mujoco_env.MujocoEnv.__init__(
            self, os.path.join(dirname,
                               "mjc/baxter_orient_left_3dreacher.xml"), 1)
        utils.EzPickle.__init__(self)

        ## mujoco things
        # task space action space

        low = np.array([-1., -1., -1])
        high = np.array([1., 1., 1])

        self.action_space = spaces.Box(low, high)

        self.tuck_pose = {
            'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50])
        }

        self.start_pose = {
            'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52])
        }

        ## starting pose
        self.init_qpos = self.data.qpos.copy().flatten()
        self.init_qpos[1:8] = np.array(self.start_pose["left"]).T

        ## ik setup
        urdf_filename = osp.join(dirname, "urdf", "baxter.urdf")

        with open(urdf_filename) as f:
            urdf = f.read()

        # mode; Speed, Distance, Manipulation1, Manipulation2
        self.ik_solver = TRAC_IK(
            "base",
            "left_gripper",
            urdf,
            0.005,  # default seconds
            1e-5,  # default epsilon
            "Speed")

        self.old_state = np.zeros((6, ))
        self.max_num_steps = max_len
        print("INIT DONE!")

    ## gym methods

    def reset_model(self):
        print("last state:", self.old_state)
        print("New Episode!")
        qpos = self.init_qpos + self.np_random.uniform(
            low=-.005, high=.005, size=self.model.nq)
        qvel = self.init_qvel + self.np_random.uniform(
            low=-.005, high=.005, size=self.model.nv)
        ## random target location
        qpos[-3:-1] = qpos[-3:-1] + self.np_random.uniform(
            low=-0.15, high=0.15, size=2)
        qpos[-1] = qpos[-1] + self.np_random.uniform(
            low=-0.1, high=0.1, size=1)

        self.set_state(qpos, qvel)

        target_pos = np.array([0.6, 0.3, 0.2])
        target_quat = np.array([1.0, 0.0, 0.0, 0])
        target = np.concatenate((target_pos, target_quat))
        action_jt_space = self.do_ik(ee_target=target,
                                     jt_pos=self.data.qpos[1:8].flat)
        if action_jt_space is not None:
            self.apply_action(action_jt_space)
        ## for calculating velocities
        # self.old_state = np.zeros((6,))
        self.contacted = False
        self.out_of_bound = 0
        self.num_step = 0
        ob = self._get_obs()
        gripper_pose = ob[:3]
        target_pose = ob[-3:]

        relative_ob = np.concatenate(
            [gripper_pose, target_pose - gripper_pose])
        return relative_ob

    def viewer_setup(self):
        # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.])
        cam_pos = np.array([1.0, 0.0, 0.7, 0.5, -45, 180])
        self.set_cam_position(self.viewer, cam_pos)

    def _get_obs(self):
        ee_x, ee_y, ee_z = self.data.site_xpos[0][:3]
        target_x, target_y, target_z = self.data.site_xpos[1][:3]

        state = np.array([ee_x, ee_y, ee_z, target_x, target_y, target_z])
        vel = (state - self.old_state) / self.dt

        self.old_state = state.copy()
        return state

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        self.model.data.qpos = qpos
        self.model.data.qvel = qvel
        # self.model._compute_subtree() #pylint: disable=W0212
        self.model.forward()

    ## my methods
    def apply_action(self, action):
        ctrl = self.data.ctrl.copy()
        # print(ctrl.shape)
        ctrl[:7, 0] = np.array(action)
        self.data.ctrl = ctrl
        self.do_simulation(ctrl, 1000)

    def do_ik(self, ee_target, jt_pos):

        # print("starting to do ik")
        # print(ee_target[:3])

        # Populate seed with current angles if not provided
        init_pose_trac = tracik.DoubleVector()
        for i in range(7):
            init_pose_trac.push_back(jt_pos[i])

        x, y, z, qx, qy, qz, qw = ee_target
        qout = list(
            self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw))

        if (len(qout) > 0):
            # print("ik sol:",qout)
            return qout
        else:
            print("!no result found")
            return None

    def set_cam_position(self, viewer, cam_pos):
        for i in range(3):
            viewer.cam.lookat[i] = cam_pos[i]
        viewer.cam.distance = cam_pos[3]
        viewer.cam.elevation = cam_pos[4]
        viewer.cam.azimuth = cam_pos[5]
        viewer.cam.trackbodyid = -1

    def _step(self, action):

        ## hack for the init of mujoco.env
        if (action.shape[0] > 3):
            return np.zeros((6, 1)), 0, False, {}

        self.num_step += 1
        old_action_jt_space = self.data.qpos[1:8].T.copy()

        ## parsing of primitive actions
        delta_x, delta_y, delta_z = action
        # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y))
        x, y, z = self.old_state[:3].copy()
        # print("old x:%.4f, y:%.4f, z:%.4f"%(x,y,z))
        # print("delta x:%.4f, y:%.4f, z:%.4f"%(delta_x, delta_y,delta_z))
        x += delta_x * 0.05
        y += delta_y * 0.05
        z += delta_z * 0.05
        # print("x:%.4f, y:%.4f, z:%.4f"%(x,y,z))

        out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0
                                                or y > 0.6) or (z < 0.08
                                                                or z > 0.5)

        if np.abs(delta_x * 0.05) > 0.0001 or np.abs(
                delta_y * 0.05) > 0.0001 or np.abs(delta_z * 0.05) > 0.0001:
            target_pos = np.array([x, y, z])
            target_quat = np.array([1.0, 0.0, 0.0, 0])
            target = np.concatenate((target_pos, target_quat))
            action_jt_space = self.do_ik(ee_target=target,
                                         jt_pos=self.data.qpos[1:8].flat)
            if (action_jt_space is not None) and (not out_of_bound):
                # print("ik:", action_jt_space)
                self.apply_action(action_jt_space)
            else:
                action_jt_space = old_action_jt_space.copy()

        else:
            action_jt_space = old_action_jt_space.copy()

        # print("controller:",self.data.qpos[1:8].T)
        ## getting state
        ob = self._get_obs()
        gripper_pose = ob[:3]
        target_pose = ob[3:6]

        ## reward function definition
        reward_reaching_goal = np.linalg.norm(gripper_pose -
                                              target_pose) < 0.05
        total_reward = -1 * (not reward_reaching_goal)

        info = {}

        if reward_reaching_goal == 1:
            done = True
            info["done"] = "goal reached"
        elif (self.num_step > self.max_num_steps):
            done = True
            info["done"] = "max_steps_reached"
        else:
            done = False

        info['absolute_ob'] = ob.copy()

        relative_ob = np.concatenate(
            [gripper_pose, target_pose - gripper_pose])
        return relative_ob, total_reward, done, info

    def apply_hindsight(self, states, actions, goal_state):
        '''generates hindsight rollout based on the goal
        '''
        goal = states[-1][:3]  ## this is the absolute goal location
        her_states, her_rewards = [], []
        for i in range(len(actions)):
            state = states[i]
            state[-3:] = goal.copy() - state[:3]
            reward = self.calc_reward(state, goal, actions[i])
            her_states.append(state)
            her_rewards.append(reward)

        goal_state[-3:] = np.array([0., 0., 0.])
        her_states.append(goal_state)

        return her_states, her_rewards

    def calc_reward(self, state, goal, action):

        gripper_pose = state[:3]
        target_pose = state[-3:] + gripper_pose

        ## reward function definition
        reward_reaching_goal = np.linalg.norm(
            gripper_pose - target_pose) < 0.03  #assume: my robot has 2cm error
        total_reward = -1 * (not reward_reaching_goal)
        return total_reward
class BlueIK:
    def __init__(self):
        # load in robot kinematic information
        self.baselink = rospy.get_param("blue_hardware/baselink")
        self.endlink = rospy.get_param("blue_hardware/endlink")
        self.posture_target = rospy.get_param("blue_hardware/posture_target")
        urdf = rospy.get_param("/robot_description")

        # set up tf2 for transformation lookups
        self.tf_buffer = tf2_ros.Buffer(
            rospy.Duration(1.0))  # tf buffer length
        tf_listener = tf2_ros.TransformListener(self.tf_buffer)

        # build trac-ik solver
        self.ik = TRAC_IK(self.baselink, self.endlink, urdf, 0.01, 5e-4,
                          "Distance")
        # "Manipulation2")
        # "Manipulation1")

        rospy.Service('inverse_kinematics', InverseKinematics,
                      self.handle_ik_request)

    def ik_solution(self, target_pose, seed_joint_positions, solver="trac-ik"):
        solver = solver.lower()

        # use the requested solver, default to trac-ik otherwise
        # currently only trac-ik is supported
        if solver == "trac-ik":
            ik_joints = self.ik.CartToJnt(
                seed_joint_positions, target_pose.position.x,
                target_pose.position.y, target_pose.position.z,
                target_pose.orientation.x, target_pose.orientation.y,
                target_pose.orientation.z, target_pose.orientation.w)
        else:
            ik_joints = self.ik.CartToJnt(
                seed_joint_positions, target_pose.position.x,
                target_pose.position.y, target_pose.position.z,
                target_pose.orientation.x, target_pose.orientation.y,
                target_pose.orientation.z, target_pose.orientation.w)

        if not len(ik_joints) == len(seed_joint_positions):
            return False
        return ik_joints

    def handle_ik_request(self, request):
        if len(request.seed_joint_positions) == len(self.posture_target):
            seed_joint_positions = request.seed_joint_positions
        else:
            seed_joint_positions = self.posture_target

        try:
            # apply lookup transformation desired pose and apply the transformation
            transform = self.tf_buffer.lookup_transform(
                self.baselink,
                request.end_effector_pose.header.frame_id,
                rospy.Time(0),  # get the tf at first available time
                rospy.Duration(0.1))  # wait for 0.1 second
            pose_in_baselink_frame = tf2_geometry_msgs.do_transform_pose(
                request.end_effector_pose, transform)

            # get the ik solution
            ik_joints = self.ik_solution(pose_in_baselink_frame.pose,
                                         seed_joint_positions)

            if ik_joints == False:
                return InverseKinematicsResponse(False, [])
            else:
                return InverseKinematicsResponse(True, ik_joints)
        except:
            return InverseKinematicsResponse(False, [])
예제 #13
0
class BlueIK:
    def _setup(self):
        # build tf listener
        self.tfBuffer = tf2_ros.Buffer()
        self.listener = tf2_ros.TransformListener(self.tfBuffer)

        # load in ros parameters
        self.baselink = rospy.get_param("blue_hardware/baselink")
        self.endlink = rospy.get_param("blue_hardware/endlink")
        urdf = rospy.get_param("/robot_description")
        flag, self.tree = kdl_parser.treeFromString(urdf)

        # build kinematic chain and fk and jacobian solvers
        chain_ee = self.tree.getChain(self.baselink, self.endlink)
        self.fk_ee = kdl.ChainFkSolverPos_recursive(chain_ee)
        self.jac_ee = kdl.ChainJntToJacSolver(chain_ee)

        # building robot joint state
        self.num_joints = chain_ee.getNrOfJoints()

        self.joint_names = rospy.get_param("blue_hardware/joint_names")
        self.joint_names = self.joint_names[:self.num_joints]
        if self.debug:
          rospy.loginfo(self.joint_names)

        self.joints = kdl.JntArray(self.num_joints)

        # todo make seperate in abstract class
        self.ik = TRAC_IK(self.baselink,
                     self.endlink,
                     urdf,
                     0.01,
                     1e-4,
                    "Distance")
                    # "Manipulation2")
                    # "Manipulation1")

    def publish_ik_sol(self, target, joints):
        # target is a target pose object
        # joints is the starting joint seed
        seed_state = []
        for j in joints:
            seed_state.append(j)

        result = self.ik.CartToJnt(seed_state,
                               target.position.x, target.position.y, target.position.z,
                               target.orientation.x, target.orientation.y, target.orientation.z, target.orientation.w, )
        if not len(result) == self.num_joints:
            return
        msg = Float64MultiArray()
        msg.data = result
        if self.debug:
            pass
            rospy.logerr("ik result")
            rospy.logerr(result)
        self.command_pub.publish(msg)
        self.command_pub_ctc.publish(msg)

    def update_joints(self, joint_msg):
        if self.first:
            return

        temp_joints = kdl.JntArray(self.num_joints)
        for i, n in enumerate(self.joint_names):
            index = joint_msg.name.index(n)
            temp_joints[i] = joint_msg.position[index]
        self.joints = temp_joints

        ## TODO make publishing the result seperately
        self.publish_ik_sol(self.target_pose, self.joints)


    def command_callback(self, msg):
        trans = self.tfBuffer.lookup_transform(self.baselink, msg.header.frame_id, rospy.Time())
        msg = tf2_geometry_msgs.do_transform_pose(msg, trans);

        temp_target = Pose()

        temp_target.position.x = msg.pose.position.x # x
        temp_target.position.y = msg.pose.position.y # y
        temp_target.position.z = msg.pose.position.z # z

        temp_target.orientation.x = msg.pose.orientation.x # qx
        temp_target.orientation.y = msg.pose.orientation.y # qy
        temp_target.orientation.z = msg.pose.orientation.z # qz
        temp_target.orientation.w = msg.pose.orientation.w # qw

        self.target_pose = temp_target

        if self.first:
            self.first = False

    def __init__(self, debug=False):
        rospy.init_node("blue_ik")
        self.debug = debug
        if self.debug:
            self.debug_count = 0

        self._setup()

        self.target_pos = Pose()
        self.first = True

        rospy.Subscriber("pose_target/command", PoseStamped, self.command_callback)
        self.command_pub = rospy.Publisher("blue_controllers/joint_position_controller/command", Float64MultiArray, queue_size=1)
        self.command_pub_ctc = rospy.Publisher("blue_controllers/joint_ctc/command", Float64MultiArray, queue_size=1)
        rospy.Subscriber("/joint_states", JointState, self.update_joints)
예제 #14
0
def main():
	urdf = rospy.get_param('/robot_description')
	# print("urdf: ", urdf)
	rospy.loginfo('Constructing IK solver...')
	ik_solver = TRAC_IK("link0", "link7", urdf,
						0.005,  # default seconds
						1e-5,  # default epsilon
						"Speed")
	# print("Number of joints:")
	# print(ik_solver.getNrOfJointsInChain())
	# print("Joint names:")
	# print(ik_solver.getJointNamesInChain(urdf))
	# print("Link names:")
	# print(ik_solver.getLinkNamesInChain())

	qinit = [0.] * 7  #Initial status of the joints as seed.
	x = y = z = 0.0
	rx = ry = rz = 0.0
	rw = 1.0
	bx = by = bz = 0.001
	brx = bry = brz = 0.1

	# Generate a set of random coords in the arm workarea approx
	NUM_COORDS = 200
	rand_coords = []
	for _ in range(NUM_COORDS):
		x = random() * 0.5
		y = random() * 0.6 + -0.3
		z = random() * 0.7 + -0.35
		rand_coords.append((x, y, z))

	# Check some random coords with fixed orientation
	avg_time = 0.0
	num_solutions_found = 0
	for x, y, z in rand_coords:
		ini_t = time.time()
		sol = ik_solver.CartToJnt(qinit,
								  x, y, z,
								  rx, ry, rz, rw,
								  bx, by, bz,
								  brx, bry, brz)
		fin_t = time.time()
		call_time = fin_t - ini_t
		# print "IK call took: " + str(call_time)
		avg_time += call_time
		if sol:
			# print "X, Y, Z: " + str( (x, y, z) )
			print "SOL: " + str(sol)
			num_solutions_found += 1
	avg_time = avg_time / NUM_COORDS

	print
	print "Found " + str(num_solutions_found) + " of 200 random coords"
	print "Average IK call time: " + str(avg_time)
	print

# Check if orientation bounds work
	avg_time = 0.0
	num_solutions_found = 0
	brx = bry = brz = 9999.0  # We don't care about orientation
	for x, y, z in rand_coords:
		ini_t = time.time()
		sol = ik_solver.CartToJnt(qinit,
								  x, y, z,
								  rx, ry, rz, rw,
								  bx, by, bz,
								  brx, bry, brz)
		fin_t = time.time()
		call_time = fin_t - ini_t
		# print "IK call took: " + str(call_time)
		avg_time += call_time
		if sol:
			# print "X, Y, Z: " + str( (x, y, z) )
			# print "SOL: " + str(sol)
			num_solutions_found += 1

	avg_time = avg_time / NUM_COORDS
	print
	print "Found " + str(num_solutions_found) + " of 200 random coords ignoring orientation"
	print "Average IK call time: " + str(avg_time)
	print

	# Check if big position and orientation bounds work
	avg_time = 0.0
	num_solutions_found = 0
	bx = by = bz = 9999.0
	brx = bry = brz = 9999.0
	for x, y, z in rand_coords:
		ini_t = time.time()
		sol = ik_solver.CartToJnt(qinit,
								  x, y, z,
								  rx, ry, rz, rw,
								  bx, by, bz,
								  brx, bry, brz)
		fin_t = time.time()
		call_time = fin_t - ini_t
		# print "IK call took: " + str(call_time)
		avg_time += call_time
		if sol:
			# print "X, Y, Z: " + str( (x, y, z) )
			# print "SOL: " + str(sol)
			num_solutions_found += 1

	avg_time = avg_time / NUM_COORDS

	print
	print "Found " + str(num_solutions_found) + " of 200 random coords ignoring everything"
	print "Average IK call time: " + str(avg_time)
	print
예제 #15
0
class IK(object):
    def __init__(self,
                 base_link,
                 tip_link,
                 timeout=0.005,
                 epsilon=1e-5,
                 solve_type="Speed",
                 urdf_string=None):
        """
        Create a TRAC_IK instance and keep track of it.

        :param str base_link: Starting link of the chain.
        :param str tip_link: Last link of the chain.
        :param float timeout: Timeout in seconds for the IK calls.
        :param float epsilon: Error epsilon.
        :param solve_type str: Type of solver, can be:
            Speed (default), Distance, Manipulation1, Manipulation2
        :param urdf_string str: Optional arg, if not given URDF is taken from
            the param server at /robot_description.
        """
        if urdf_string is None:
            raise NotImplementedError("Can't load urdf_string")
            #urdf_string = rospy.get_param('/robot_description')
        self._urdf_string = urdf_string
        self._timeout = timeout
        self._epsilon = epsilon
        self._solve_type = solve_type
        self.base_link = base_link
        self.tip_link = tip_link
        self._ik_solver = TRAC_IK(self.base_link, self.tip_link,
                                  self._urdf_string, self._timeout,
                                  self._epsilon, self._solve_type)
        self.number_of_joints = self._ik_solver.getNrOfJointsInChain()
        self.joint_names = self._ik_solver.getJointNamesInChain(
            self._urdf_string)
        self.link_names = self._ik_solver.getLinkNamesInChain()

    def get_ik(self,
               qinit,
               x,
               y,
               z,
               rx,
               ry,
               rz,
               rw,
               bx=1e-5,
               by=1e-5,
               bz=1e-5,
               brx=1e-3,
               bry=1e-3,
               brz=1e-3):
        """
        Do the IK call.

        :param list of float qinit: Initial status of the joints as seed.
        :param float x: X coordinates in base_frame.
        :param float y: Y coordinates in base_frame.
        :param float z: Z coordinates in base_frame.
        :param float rx: X quaternion coordinate.
        :param float ry: Y quaternion coordinate.
        :param float rz: Z quaternion coordinate.
        :param float rw: W quaternion coordinate.
        :param float bx: X allowed bound.
        :param float by: Y allowed bound.
        :param float bz: Z allowed bound.
        :param float brx: rotation over X allowed bound.
        :param float bry: rotation over Y allowed bound.
        :param float brz: rotation over Z allowed bound.

        :return: joint values or None if no solution found.
        :rtype: tuple of float.
        """
        if len(qinit) != self.number_of_joints:
            raise Exception(
                "qinit has length %i and it should have length %i" %
                (len(qinit), self.number_of_joints))
        solution = self._ik_solver.CartToJnt(qinit, x, y, z, rx, ry, rz, rw,
                                             bx, by, bz, brx, bry, brz)
        if solution:
            return solution
        else:
            return None

    def get_joint_limits(self):
        """
        Return lower bound limits and upper bound limits for all the joints
        in the order of the joint names.
        """
        lb = self._ik_solver.getLowerBoundLimits()
        ub = self._ik_solver.getUpperBoundLimits()
        return lb, ub

    def set_joint_limits(self, lower_bounds, upper_bounds):
        """
        Set joint limits for all the joints.

        :arg list lower_bounds: List of float of the lower bound limits for
            all joints.
        :arg list upper_bounds: List of float of the upper bound limits for
            all joints.
        """
        if len(lower_bounds) != self.number_of_joints:
            raise Exception(
                "lower_bounds array size mismatch, it's size %i, should be %i"
                % (len(lower_bounds), self.number_of_joints))

        if len(upper_bounds) != self.number_of_joints:
            raise Exception(
                "upper_bounds array size mismatch, it's size %i, should be %i"
                % (len(upper_bounds), self.number_of_joints))
        self._ik_solver.setKDLLimits(lower_bounds, upper_bounds)
예제 #16
0
from trac_ik_python.trac_ik_wrap import TRAC_IK
import rospy
from numpy.random import random
import time

if __name__ == '__main__':
    # roslaunch pr2_description upload_pr2.launch
    # Needed beforehand
    urdf = rospy.get_param('/robot_description')
    # params of constructor:
    # base_link, tip_link, urdf_string, timeout, epsilon, solve_type="Speed"
    # solve_type can be: Distance, Speed, Manipulation1, Manipulation2
    ik_solver = TRAC_IK(
        "torso_lift_link",
        "r_wrist_roll_link",
        urdf,
        0.005,  # default seconds
        1e-5,  # default epsilon
        "Speed")
    print("Number of joints:")
    print(ik_solver.getNrOfJointsInChain())
    print("Joint names:")
    print(ik_solver.getJointNamesInChain(urdf))
    print("Link names:")
    print(ik_solver.getLinkNamesInChain())

    qinit = [0.] * 7
    x = y = z = 0.0
    rx = ry = rz = 0.0
    rw = 1.0
    bx = by = bz = 0.001
예제 #17
0
class BaxterEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    """cts env, 6dim
    state space: relative state space position of gripper, (block-gripper) and (target-block)
    random restarts for block and target on the table
    reward function: - 1(not reaching)
    actions: (delta_x, delta_y, delta_z, gap) 5cm push
    starting state: (0.63, 0.2, 0.59, 0.27, 0.55, 0.3)
    max_num_steps = 50
    """
    def __init__(self, max_len=10):
        dirname = os.path.dirname(os.path.abspath(__file__))
        mujoco_env.MujocoEnv.__init__(
            self,
            os.path.join(
                dirname,
                "mjc/baxter_orient_left_cts_with_grippers_modified.xml"), 1)
        utils.EzPickle.__init__(self)

        ## mujoco things
        # task space action space

        low = np.array([-1., -1., -1., -1.])
        high = np.array([1., 1., 1., 1.])

        self.action_space = spaces.Box(low, high)

        self.tuck_pose = {
            'left': np.array([-0.08, -1.0, -1.19, 1.94, 0.67, 1.03, -0.50])
        }

        self.start_pose = {
            'left': np.array([-0.21, -0.75, -1.4, 1.61, 0.60, 0.81, -0.52])
        }

        ## starting pose
        self.init_qpos = self.data.qpos.copy().flatten()
        self.init_qpos[1:8] = np.array(self.start_pose["left"]).T

        ## ik setup
        urdf_filename = osp.join(dirname, "urdf", "baxter_modified.urdf")

        with open(urdf_filename) as f:
            urdf = f.read()

        # mode; Speed, Distance, Manipulation1, Manipulation2
        self.ik_solver = TRAC_IK(
            "base",
            "left_gripper",
            urdf,
            0.005,  # default seconds
            1e-5,  # default epsilon
            "Speed")

        self.old_state = np.zeros((9, ))
        self.max_num_steps = max_len
        print("INIT DONE!")

    ## gym methods

    def reset_model(self):
        # print("last state:",self.old_state)
        # print("New Episode!")

        reset_state = self.np_random.uniform() > 0.5

        if reset_state:
            grasped_qpos = np.array([
                0., 1.85833336e-01, 1.13869066e-01, -1.57743078e+00,
                1.87249089e+00, 1.67964818e+00, 1.57880024e+00, 2.23699321e+00,
                2.68245581e-02, -2.46668516e-02, 6.09046750e-01,
                2.73356216e-01, 2.50803949e-03, 9.99880925e-01,
                -1.19302114e-02, 9.78048682e-03, 3.85171977e-04,
                6.09046750e-01 - 0.55, 2.73356216e-01 - 0.3
            ])
            # 3.85171977e-04  ,-1.21567676e-01  , 2.00143005e-01])

            ## random target location
            # grasped_qpos[-3:-1] = grasped_qpos[-3:-1] + self.np_random.uniform(low=-0.15, high=0.15, size=2)
            # grasped_qpos[-1] = grasped_qpos[-1] + self.np_random.uniform(low=0.1, high=0.3, size=1)

            qvel = self.init_qvel
            self.set_state(grasped_qpos, qvel)

        else:
            qpos = self.init_qpos + self.np_random.uniform(
                low=-.002, high=.002, size=self.model.nq)
            qvel = self.init_qvel + self.np_random.uniform(
                low=-.002, high=.002, size=self.model.nv)

            ## random box location
            qpos[10:12] = qpos[10:12] + self.np_random.uniform(
                low=-0.15, high=0.15, size=2)

            ## random target location
            qpos[-2:] = qpos[10:12] - np.array([0.55, 0.3])
            # qpos[-3:-1] = qpos[-3:-1] + self.np_random.uniform(low=-0.15, high=0.15, size=2)
            # qpos[-1] = qpos[-1] + self.np_random.uniform(low=0.1, high=0.3, size=1)

            self.set_state(qpos, qvel)

            target_pos = np.array(
                list(qpos[10:12] +
                     self.np_random.uniform(low=-0.05, high=0.05, size=2)) +
                [0.1])
            target_quat = np.array([1.0, 0.0, 0.0, 0])
            target = np.concatenate((target_pos, target_quat))
            action_jt_space = self.do_ik(ee_target=target,
                                         jt_pos=self.data.qpos[1:8].flat)
            if action_jt_space is not None:
                self.apply_action(action_jt_space)

            self.close_gripper(gap=1)
        ## for calculating velocities
        # self.old_state = np.zeros((6,))

        self.num_step = 0
        ob = self._get_obs()
        gripper_pose = ob[:3]
        box_pose = ob[3:6]
        target_pose = ob[6:9]

        relative_ob = np.concatenate(
            [gripper_pose, box_pose - gripper_pose, target_pose - box_pose])
        return relative_ob

    def viewer_setup(self):
        # cam_pos = np.array([0.1, 0.0, 0.7, 0.01, -45., 0.])
        cam_pos = np.array([1.0, 0.0, 0.7, 0.5, -45, 180])
        self.set_cam_position(self.viewer, cam_pos)

    def _get_obs(self):
        ee_x, ee_y, ee_z = self.data.site_xpos[0][:3]
        box_x, box_y, box_z = self.data.site_xpos[3][:3]
        target_x, target_y, target_z = self.data.site_xpos[4][:3]

        state = np.array([
            ee_x, ee_y, ee_z, box_x, box_y, box_z, target_x, target_y, target_z
        ])
        vel = (state - self.old_state) / self.dt

        self.old_state = state.copy()
        return state

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        self.model.data.qpos = qpos
        self.model.data.qvel = qvel
        self.model._compute_subtree()  #pylint: disable=W0212
        self.model.forward()

    ## my methods
    def apply_action(self, action):
        # print("cmd",action)
        # print("curr jt pos", self.data.qpos[1:8].T)
        ctrl = self.data.ctrl.copy()
        # print(ctrl.shape)
        ctrl[:7, 0] = np.array(action)
        self.data.ctrl = ctrl
        self.do_simulation(ctrl, 1000)
        # print("next jt pos", self.data.qpos[1:8].T)

    def do_ik(self, ee_target, jt_pos):

        # print("starting to do ik")
        # print(ee_target[:3])

        # Populate seed with current angles if not provided
        init_pose_trac = tracik.DoubleVector()
        for i in range(7):
            init_pose_trac.push_back(jt_pos[i])

        x, y, z, qx, qy, qz, qw = ee_target
        qout = list(
            self.ik_solver.CartToJnt(init_pose_trac, x, y, z, qx, qy, qz, qw))

        if (len(qout) > 0):
            # print("ik sol:",qout)
            return qout
        else:
            print("!no result found")
            return None

    def close_gripper(self, gap=0):
        # print("before grip location", self.data.site_xpos[0])
        # qpos = self.data.qpos.copy().flatten()
        # qpos[8] = (gap+1)*0.020833
        # qpos[9] = -(gap+1)*0.020833
        # qvel = self.data.qvel.copy().flatten()
        # print(qpos.shape, qvel.shape)
        # self.set_state(qpos, qvel)
        # print("after grip location", self.data.site_xpos[0])
        # print("before grip", self.data.ctrl)
        ctrl = self.data.ctrl.copy()
        ctrl[:7, 0] = self.data.qpos[1:8].flatten()
        ctrl[7, 0] = (gap + 1) * 0.020833
        ctrl[8, 0] = -(gap + 1) * 0.020833
        self.data.ctrl = ctrl

        self.do_simulation(ctrl, 1000)
        # print("qpos", self.data.qpos[1:8].T)
        # print("before grip", self.data.ctrl)

    def set_cam_position(self, viewer, cam_pos):
        for i in range(3):
            viewer.cam.lookat[i] = cam_pos[i]
        viewer.cam.distance = cam_pos[3]
        viewer.cam.elevation = cam_pos[4]
        viewer.cam.azimuth = cam_pos[5]
        viewer.cam.trackbodyid = -1

    def _step(self, action):

        ## hack for the init of mujoco.env
        if (action.shape[0] > 4):
            return np.zeros((9, 1)), 0, False, {}

        self.num_step += 1
        old_action_jt_space = self.data.qpos[1:8].T.copy()

        ## parsing of primitive actions
        delta_x, delta_y, delta_z, gap = action

        # print("grip prev controller:",self.data.qpos[1:8].T)
        self.close_gripper(gap)
        # print("delta x:%.4f, y:%.4f"%(delta_x, delta_y))
        x, y, z = self.old_state[:3].copy()
        # print("old x:%.4f, y:%.4f, z:%.4f"%(x,y,z))

        curr_out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0 or y > 0.6) or (
            z < -0.05 or z > 0.5)

        x += delta_x * 0.05
        y += delta_y * 0.05
        z += delta_z * 0.05
        # print("x:%.4f, y:%.4f, z:%.4f"%(x,y,z))
        # print("prev controller:",self.data.qpos[1:8].T)
        # print("x:%.4f,y:%.4f"%(0.2*x + 0.6 , 0.3*y + 0.3))

        out_of_bound = (x < 0.4 or x > 0.8) or (y < 0.0
                                                or y > 0.6) or (z < -0.05
                                                                or z > 0.5)

        if np.abs(delta_x * 0.05) > 0.0001 or np.abs(
                delta_y * 0.05) > 0.0001 or np.abs(delta_z * 0.05) > 0.0001:
            target_pos = np.array([x, y, z])
            target_quat = np.array([1.0, 0.0, 0.0, 0])
            target = np.concatenate((target_pos, target_quat))
            action_jt_space = self.do_ik(ee_target=target,
                                         jt_pos=self.data.qpos[1:8].flat)
            if (action_jt_space is not None) and (not out_of_bound):
                # print("ik:", action_jt_space)
                self.apply_action(action_jt_space)
            else:
                action_jt_space = old_action_jt_space.copy()

        else:
            action_jt_space = old_action_jt_space.copy()

        # print("controller:",self.data.qpos[1:8].T)
        ## getting state
        ob = self._get_obs()
        gripper_pose = ob[:3]
        box_pose = ob[3:6]
        target_pose = ob[6:9]
        #print("new gripper_pose", gripper_pose, "block pose:", box_pose)

        ## reward function definition
        reward_reaching_goal = np.linalg.norm(
            box_pose - target_pose) < 0.05  #assume: my robot has 5cm error
        total_reward = -1 * (not reward_reaching_goal)

        box_x, box_y, box_z = self.data.site_xpos[3]

        info = {}
        if reward_reaching_goal == 1:
            done = True
            info["done"] = "goal reached"
        elif box_z < -0.02 or box_x < 0.4 or box_x > 0.8 or box_y < 0.0 or box_y > 0.6:
            done = True
            info["done"] = "box out of bounds"
            total_reward -= (self.max_num_steps - self.num_step) + 5
        elif curr_out_of_bound:
            ## simulation got weird
            done = True
            info["done"] = "simulation unstable"
            total_reward -= (self.max_num_steps - self.num_step) + 5
        elif (self.num_step > self.max_num_steps):
            done = True
            info["done"] = "max_steps_reached"
        else:
            done = False

        info['absolute_ob'] = ob.copy()
        relative_ob = np.concatenate(
            [gripper_pose, box_pose - gripper_pose, target_pose - box_pose])
        return relative_ob, total_reward, done, info

    def apply_hindsight(self, states, actions, goal_state):
        '''generates hindsight rollout based on the goal
        '''
        goal = states[-1][3:6] + states[
            -1][:3]  ## this is the absolute goal location
        her_states, her_rewards = [], []
        for i in range(len(actions)):
            state = states[i]

            gripper_pose = state[:3]
            box_pose = state[3:6] + gripper_pose
            target_pose = state[6:9] + box_pose

            state[-3:] = goal.copy() - box_pose
            reward = self.calc_reward(state, goal, actions[i])
            her_states.append(state)
            her_rewards.append(reward)

        goal_state[-3:] = np.array([0., 0., 0.])
        her_states.append(goal_state)

        return her_states, her_rewards

    def calc_reward(self, state, goal, action):

        gripper_pose = state[:3]
        box_pose = state[3:6] + gripper_pose
        target_pose = goal + box_pose

        ## reward function definition
        reward_reaching_goal = np.linalg.norm(box_pose - target_pose) < 0.05
        total_reward = -1 * (not reward_reaching_goal)
        return total_reward