def _set_action(self, action): assert action.shape == (3,) self.counter += 1 action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([-1, -1]) assert gripper_ctrl.shape == (2,) if self.counter <= 1: action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) action[2] = 0.56402952 for _ in range(20): utils.mocap_set_action_abs(self.sim, action) self.sim.step() else: pos_ctrl *= 0.05 # limit maximum change in position action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action(self.sim, action) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 3: # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025: action = np.array([0,0,-0.05,1,0,1,0,1,1]) utils.mocap_set_action(self.sim, action) for _ in range(5): utils.ctrl_set_action(self.sim, action) self.sim.step()
def _set_action(self, action): assert action.shape == (3, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([1, 1]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) if self.counter <= 1: action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) if self.use_close_loop: action[2] = 0.46470766 for _ in range(20): utils.mocap_set_action_abs(self.sim, action) self.sim.step() else: pos_ctrl *= 0.05 # limit maximum change in position action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action(self.sim, action) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 2: # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025: self.sim.step() pos_ctrl = np.array([0.0, 0.0, 0.0]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) # logging # grip_pos = self.sim.data.get_site_xpos('robot0:grip') # obs_pos = self.sim.data.get_site_xpos('object0') # offset = obs_pos - grip_pos # print(offset, np.linalg.norm(offset, axis = -1)) for _ in range(20): utils.ctrl_set_action(self.sim, action) self.sim.step() for _ in range(10): pos_ctrl = np.array([0.0, 0.02, 0.0]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.sim.step()
def _set_action(self, action): assert action.shape == (3, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([0, 0]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) if self.counter <= 1: action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) if self.use_close_loop: action[2] = 0.41644691 for _ in range(20): utils.mocap_set_action_abs(self.sim, action) self.sim.step() else: pos_ctrl *= 0.05 # limit maximum change in position action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action(self.sim, action) if self.counter >= 3: for _ in range(10): self.sim.step() pos_ctrl = np.array([0.0, 0.02, 0.0]) gripper_ctrl = np.array([0, 0]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.sim.step()
def _reset_sim(self): self.sim.set_state(self.initial_state) if self.random_obj: min_color_diff = 0 while min_color_diff < 0.1: rgba = np.random.uniform(0, 0.5, size=3) if self.train_random: rgba[2] = 1 elif self.test_random: rgba[1] = 1 else: rgba *= 2 color_diff = np.abs(self.sim.model.geom_rgba.copy()[:-1, :3] - rgba) min_color_diff = min(np.sum(color_diff, axis=1)) self.sim.model.geom_rgba[-1][:3] = rgba # Randomize start position of hole. if self.goal_type == "fixed": offset = np.array([0.02, 0.02]) else: if self.limit_dir: if self.train_random: offset = np.array([ self.np_random.uniform(0, self.obj_range), self.np_random.uniform(-self.obj_range, self.obj_range) ]) elif self.test_random: offset = np.array([ self.np_random.uniform(-self.obj_range, 0), self.np_random.uniform(-self.obj_range, self.obj_range) ]) else: offset = self.np_random.uniform(-self.obj_range, self.obj_range, size=2) norm = np.linalg.norm(offset, axis=-1) if norm < 0.05: offset = offset / norm * 0.05 hole_qpos = self.sim.data.get_joint_qpos('table_top:joint') assert hole_qpos.shape == (7, ) hole_qpos[0] = hole_qpos[0] + offset[0] hole_qpos[1] = hole_qpos[1] + offset[1] self.sim.data.set_joint_qpos('table_top:joint', hole_qpos) # Randomize start position of object. offset = np.array([0.1, 0.1]) object_xpos = self.initial_gripper_xpos[:2] + offset object_qpos = self.sim.data.get_joint_qpos('object0:joint') assert object_qpos.shape == (7, ) object_qpos[:2] = object_xpos self.sim.data.set_joint_qpos('object0:joint', object_qpos) self.sim.forward() # move gripper to grasp box rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([1, 1]) pos_ctrl = np.array([0, 0, 0]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) for _ in range(10): utils.ctrl_set_action(self.sim, action) self.sim.step() box_pose = self.sim.data.get_site_xpos('handle0').copy() pos_ctrl = box_pose.copy() pos_ctrl[2] = box_pose[2] + 0.2 action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action_abs(self.sim, action) box_pose = self.sim.data.get_site_xpos('handle0').copy() pos_ctrl = box_pose.copy() pos_ctrl[2] = box_pose[2] action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action_abs(self.sim, action) action = np.array([0, 0, 0, 1, 0, 1, 0, -1, -1]) for _ in range(20): utils.ctrl_set_action(self.sim, action) self.sim.step() pos_ctrl = self.initial_gripper_xpos.copy() action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action_abs(self.sim, action) return True