def _reset_sim(self): self.sim.set_state(self.initial_state) # Randomize start position of object. if self.has_object: object_xpos = self.initial_gripper_xpos[:2] while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1: object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2) object_xpos = np.array([0.7, -0.5]) # + self.np_random.uniform(-0.02, 0.07, size=2) object_xpos[0] += self.np_random.uniform(-0.07, 0.4) object_xpos[1] += self.np_random.uniform(-0.25, 0.2) object_qpos = self.sim.data.get_joint_qpos('object0:joint') # object_qpos1 = self.sim.data.get_joint_qpos('object1:joint') assert object_qpos.shape == (7,) if self.debug_print: print("object_xpos0: ", object_xpos) # print("object1 pos: ", object_qpos1) object_qpos[:2] = object_xpos object_qpos[2] = 0.5 # object_qpos[0] += 0.3 # object_qpos[1] -= 0.1 if self.debug_print: print("set_joint_qpos object_qpos: ", object_qpos) self.sim.data.set_joint_qpos('object0:joint', object_qpos) # print("get_body_xquat: ", self.sim.data.get_body_xquat('r_gripper_palm_link')) # set random gripper position # for i in range(3): # gripper_target[i] += self.np_random.uniform(-0.2, 0.2) # print("gripper target random: ", gripper_target) # set random husky initial position base_ctrl = [0.0, 0.0] base_ctrl[0] += self.np_random.uniform(-1.0, -0.6) # position base_ctrl[1] += self.np_random.uniform(-0.2, 0.2) # rotation gripper_control = self.np_random.uniform(-1.0, 1.0) gripper_control = self.gripper_format_action(gripper_control) gripper_target = np.array([0.5, -0.3, 0.6]) gripper_target[0] += base_ctrl[0] gripper_rotation = np.array([0, 0.707, 0.707, 0]) #(0, 0, -90) # for i in range(3): gripper_target[0] += self.np_random.uniform(-0.0, 0.1) # x gripper_target[1] += self.np_random.uniform(-0.1, 0.1) # y gripper_target[2] += self.np_random.uniform(-0.1, 0.1) # z self.sim.data.set_mocap_pos('gripper_r:mocap', gripper_target) self.sim.data.set_mocap_quat('gripper_r:mocap', gripper_rotation) action = np.concatenate([gripper_target, gripper_rotation, base_ctrl, gripper_control]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) # base control + gripper control # utils.mocap_set_action(self.sim, action) # arm control in cartesion (x, y, z) for _ in range(15): self.sim.step() self.sim.forward() return True
def _set_action(self, action): assert action.shape == (6,) action = action.copy() # ensure that we don't change the action outside of this scope deviation = sum(abs(self.sim.data.qpos - self.sim_ctrl_q)) # print(deviation ) if deviation > 0.35: # reset control to current position if deviation too high self.set_force_for_q(self.sim.data.qpos) #print('deviation compensated') if self.ctrl_type == "joint": action *= 0.05 # limit maximum change in position # Apply action #scalarsto simulation. utils.ctrl_set_action(self.sim, action) elif self.ctrl_type == "cartesian": dx = action.reshape(6, ) max_limit = self.dx_max # limitation of operation space, we only allow small rotations adjustments in x and z directions, moving in y direction x_now = numpy.concatenate((self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) x_then = x_now[:3] + dx[:3]*max_limit #diff_now = numpy.array(x_now - self.init_x).reshape(7,) diff_then = numpy.array(x_then[:3] - self.init_x[:3]) barriers_min = numpy.array([-0.4, -0.8, -0.4]) barriers_max = numpy.array([0.4, 0.8, 0.4]) #for i in range(3): # if (barriers_min[i] < diff_then[i] < barriers_max[i]): # dx[i] = dx[i] * max_limit # elif barriers_min[i] > diff_then[i]: # dx[i] = + max_limit # elif barriers_max[i] < diff_then[i]: # dx[i] = - max_limit for i in range(6): dx[i] = dx[i] * max_limit if self.corrective: # bias in direction of assembly bias_dir = -self.last_obs[:6] # print(bias_dir) for i in range(3,6): if bias_dir[i] > 0.5: print(i, bias_dir[i]) bias_dir[i] = bias_dir[i] # slower rotations bias_dir /= numpy.linalg.norm(bias_dir) # print(bias_dir) dx += bias_dir * max_limit * 0.5 dx.reshape(6, 1) rot_mat = self.sim.data.get_body_xmat('gripper_dummy_heg') dx_ = numpy.concatenate([rot_mat.dot(dx[:3]), rot_mat.dot(dx[3:])]) ## transform to right coordinate system #dx_[2]+= 1 dq = self.get_dq(dx_) dq += numpy.random.normal(self.dq_mean_si, self.dq_std_si, 6) q = self.sim_ctrl_q + dq self.set_force_for_q(q)
def _set_action(self, action): assert action.shape == (self.n_actions, ) # 6 mobile base action = action.copy( ) # ensure that we don't change the action outside of this scope print("_set_action:", action) pos_ctrl, base_ctrl, gripper_ctrl = action[:3], action[3:-1], action[ -1] # pos_ctrl, gripper_ctrl = action[:3], action[3:] pos_ctrl *= 0.03 # limit maximum change in position base_ctrl *= 0.01 # rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion rot_ctrl = [0, 0.707, 0.707, 0] #(0 0 0) # rot_ctrl = [0.707, 0.0, 0.0, -0.707] # (0 0 -90) # rot_ctrl = np.array([0.5, -0.5, 0.5, -0.5]) #(-90, 90, 0) # rot_ctrl = np.array([0.5, 0.5, 0.5, -0.5]) #(90, 0, 90) gripper down # rot_ctrl = np.array([0.707, 0.0, 0.0, -0.707]) #(0, 0, -90) # gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) if self.gripper_close: gripper_ctrl = -1.0 else: gripper_ctrl = 1.0 gripper_ctrl = self.gripper_format_action(gripper_ctrl) # assert gripper_ctrl.shape == (2,) assert gripper_ctrl.shape == (self.gripper_actual_dof, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, base_ctrl, gripper_ctrl]) # action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) # base control + gripper control utils.mocap_set_action(self.sim, action) # arm control in cartesion (x, y, z)
def _set_action(self, ac): current_grid_cell = self._get_obs()['observation'] pos_ctrl, gripper_ctrl = np.zeros(3), np.zeros(1) if ac == 0: if current_grid_cell[0] >= 1: pos_ctrl[0] = -self.cell_size elif ac == 1: if current_grid_cell[0] <= self.grid_size - 1: pos_ctrl[0] = self.cell_size elif ac == 2: if current_grid_cell[1] >= 1: pos_ctrl[1] = -self.cell_size elif ac == 3: if current_grid_cell[1] <= self.grid_size - 1: pos_ctrl[1] = self.cell_size else: raise Exception('Invalid action') rot_ctrl = [1.0, 0.0, 1.0, 0.0] # Force the gripper to be at table height gripper_pos = self.sim.data.get_site_xpos('robot0:grip') pos_ctrl[2] = self.height_offset - gripper_pos[2] action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action in the simulator using IK utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (6, ) action = action.copy( ) # ensure that we don't change the action outside of this scope deviation = sum(abs(self.sim.data.qpos - self.sim.data.ctrl)) # print(deviation ) if deviation > 0.35: # reset control to current position if deviation too high self.sim.data.ctrl[:] = self.sim.data.qpos + self.get_dq( [0, 0, 0.005, 0, 0, 0]) print('deviation compensated') if self.ctrl_type == "joint": action *= 0.05 # limit maximum change in position # Apply action #scalarsto simulation. utils.ctrl_set_action(self.sim, action) elif self.ctrl_type == "cartesian": dx = action.reshape(6, ) max_limit = 0.0001 * 10 # limitation of operation space, we only allow small rotations adjustments in x and z directions, moving in y direction x_now = numpy.concatenate( (self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) x_then = x_now[:3] + dx[:3] * max_limit #diff_now = numpy.array(x_now - self.init_x).reshape(7,) diff_then = numpy.array(x_then[:3] - self.init_x[:3]) barriers_min = numpy.array([-0.1, -0.05, -0.1]) barriers_max = numpy.array([0.1, 0.2, 0.1]) for i in range(3): if (barriers_min[i] < diff_then[i] < barriers_max[i]): dx[i] = dx[i] * max_limit elif barriers_min[i] > diff_then[i]: dx[i] = +max_limit elif barriers_max[i] < diff_then[i]: dx[i] = -max_limit for i in range(3, 6): dx[i] = dx[i] * max_limit if self.corrective: # bias in direction of assembly bias_dir = -self.last_obs[:6] # print(bias_dir) for i in range(3, 6): if bias_dir[i] > 0.5: print(i, bias_dir[i]) bias_dir[i] = bias_dir[i] # slower rotations bias_dir /= np.linalg.norm(bias_dir) # print(bias_dir) dx += bias_dir * max_limit * 0.5 dx.reshape(6, 1) dq = self.get_dq(dx) # print(sum(abs(sim.data.qpos-sim.data.ctrl))) for i in range(6): self.sim.data.ctrl[i] += dq[i]
def _set_action(self, action): assert action.shape == (4, ) action = action.copy() pos_discrete_ctrl, gripper_discrete_ctrl = action[:3], action[3] assert np.all(pos_discrete_ctrl >= 0) scale = 0.04 # Convert discrete to continuous control # 0 - nothing # 1 - (n_bins - 1) / 2 - +scale # (n_bins+1) / 2 - (n_bins - 1) - -scale pos_ctrl = np.zeros_like(pos_discrete_ctrl, dtype=np.float32) for ac in range(3): if pos_discrete_ctrl[ac] == 0: pos_ctrl[ac] = 0. elif pos_discrete_ctrl[ac] > 0 and pos_discrete_ctrl[ac] <= ( self.n_bins - 1) / 2: pos_ctrl[ac] = scale * pos_discrete_ctrl[ac] else: pos_ctrl[ac] = -scale * \ (pos_discrete_ctrl[ac] - (self.n_bins - 1)/2) # Fixed rotation of the end effector, expressed as a quaternion rot_ctrl = [1., 0., 1., 0.] # 0 means close the gripper = -1, 1 means open the gripper = +1 gripper_ctrl = 2 * gripper_discrete_ctrl - 1 gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def step(self, action: np.ndarray): action = np.clip(action, self.action_space.low, self.action_space.high) if self._control_method == 'torque_control': self.forward_dynamics(action) elif self._control_method == 'position_control': assert action.shape == (4, ) action = action.copy() pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.1 # limit the action rot_ctrl = np.array([0., 1., 1., 0.]) gripper_ctrl = -50 if gripper_ctrl < 0 else 50 gripper_ctrl = np.array([gripper_ctrl, -gripper_ctrl]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) ctrl_set_action(self.sim, action) # For gripper mocap_set_action(self.sim, action) # For pos control of the end effector self.sim.step() obs = self.get_current_obs() next_obs = obs['observation'] achieved_goal = obs['achieved_goal'] goal = obs['desired_goal'] gripper_pos = obs['gripper_pos'] reward = self._compute_reward(achieved_goal, goal, gripper_pos) collided = self._is_collided() if collided: reward -= 200 done = (self._goal_distance(achieved_goal, goal) < self._distance_threshold) or collided return Step(next_obs, reward, done)
def _reset_sim(self): self.sim.set_state(self.initial_state) action = np.array([0, 0, 0, 1, 0, 1, 0, 1, 1]) for _ in range(10): utils.ctrl_set_action(self.sim, action) self.sim.step() if self.random_obj: self.sim.model.geom_type[-1] = np.random.choice(2) + 5 min_color_diff = 0 while min_color_diff < 0.1: rgba = np.random.uniform(0, 0.5, size=3) if self.train_random: rgba[1] = 1 size = np.random.uniform(0.02, 0.025, size=3) elif self.test_random: rgba[0] = 1 size = np.random.uniform(0.025, 0.03, size=3) else: rgba *= 2 size = np.random.uniform(0.02, 0.03, size=3) color_diff = np.abs(self.sim.model.geom_rgba.copy()[:-1, :3] - rgba) min_color_diff = min(np.sum(color_diff, axis=1)) self.sim.model.geom_rgba[-1][:3] = rgba self.sim.model.geom_size[-1] = size # Randomize start position of object. if self.goal_type == "fixed": offset = np.array([0.04387432, -0.02397519]) else: if self.limit_dir: if self.train_random: offset = np.array([ self.np_random.uniform(-self.obj_range, 0), self.np_random.uniform(-self.obj_range, self.obj_range) ]) elif self.test_random: offset = np.array([ self.np_random.uniform(0, self.obj_range), self.np_random.uniform(-self.obj_range, self.obj_range) ]) else: offset = self.np_random.uniform(-self.obj_range, self.obj_range, size=2) norm = np.linalg.norm(offset, axis=-1) if norm < 0.1: offset = offset / norm * 0.1 object_xpos = self.initial_gripper_xpos[:2] + offset object_qpos = self.sim.data.get_joint_qpos('object0:joint') assert object_qpos.shape == (7, ) object_qpos[:2] = object_xpos self.sim.data.set_joint_qpos('object0:joint', object_qpos) self.sim.forward() return True
def _set_action(self, action): assert action.shape == (3,) self.counter += 1 action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([-1, -1]) assert gripper_ctrl.shape == (2,) if self.counter <= 1: action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) action[2] = 0.56402952 for _ in range(20): utils.mocap_set_action_abs(self.sim, action) self.sim.step() else: pos_ctrl *= 0.05 # limit maximum change in position action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action(self.sim, action) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 3: # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025: action = np.array([0,0,-0.05,1,0,1,0,1,1]) utils.mocap_set_action(self.sim, action) for _ in range(5): utils.ctrl_set_action(self.sim, action) self.sim.step()
def _set_action(self, action): assert action.shape == (4, ) if self.mode == "controller": self.viewer.joystick_callback(0) action = self.viewer.action.copy() # get action from user action = action.copy( ) # ensure that we don't change the action outside of this scope if self.mode == "controller": self.actions.append(action.reshape(1, 4).copy()) # hardcoded reshape pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): action = action.copy( ) # ensure that we don't change the action outside of this scope if not self.rotational_control_z: assert action.shape == (4, ) pos_ctrl, gripper_ctrl = action[:3], action[3] rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion else: assert action.shape == (5, ) pos_ctrl, rot_z, gripper_ctrl = action[:3], action[3], action[4] rot_ctrl = Rotation.from_euler('xyz', [180, -90, rot_z * 90], degrees=True).as_quat() pos_ctrl *= 0.05 # limit maximum change in position gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. # gripper finger control (symmetric) utils.ctrl_set_action(self.sim, action) # gripper xyz position control utils.mocap_set_action(self.sim, action)
def _reset_sim(self): self.sim.set_state(self.initial_state) # Randomize start position of object. if self.has_object: for i in range(self.num_objs): obj_id = self.sim.model.geom_name2id('object%i' % i) object_size = self.sim.model.geom_size[obj_id][0] object_xpos = self.initial_agent_position[:2] / 2 if self.deterministic: n = self.np_random.uniform(self.gRange - 0.05, 0, size=1)[0] while n > self.goal.flatten()[0]: n = self.np_random.uniform(self.gRange - 0.05, 0, size=1)[0] object_xpos = np.array([np.array([n, 0])]) elif self.fixed_obj: object_xpos = np.array([np.array([-0.1, 0])]) else: while (np.linalg.norm(object_xpos - self.initial_agent_position[:2]) < min(0.2, self.gRange / 4)): object_xpos = self.np_random.uniform(-self.target_range, self.target_range, size=2) object_qpos = self.sim.data.get_joint_qpos('object%d:joint' % i) assert object_qpos.shape == (7,) object_qpos[:2] = object_xpos self.sim.data.set_joint_qpos('object%d:joint' % i, object_qpos) action = np.array([0, 0]) utils.ctrl_set_action(self.sim, action) self.sim.forward() return True
def _set_action(self, action): action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] stiffness_ctrl = action[4] if action.shape == (5, ) else 0.0 pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion stiffness_ctrl *= 50.0 if action.shape == (5, ) else 0.0 # self.sim.model.actuator_gainprm[self.sim.model.actuator_name2id('robot0:l_gripper_finger_joint'), 0] += stiffness_ctrl # self.sim.model.actuator_gainprm[self.sim.model.actuator_name2id('robot0:r_gripper_finger_joint'), 0] += stiffness_ctrl # self.sim.model.actuator_biasprm[self.sim.model.actuator_name2id('robot0:l_gripper_finger_joint'), 1] += -stiffness_ctrl # self.sim.model.actuator_biasprm[self.sim.model.actuator_name2id('robot0:r_gripper_finger_joint'), 1] += -stiffness_ctrl if action.shape == (5, ): stiffness_ctrl += self.prev_stiffness stiffness_ctrl = np.max( [np.min([stiffness_ctrl, self.psv_prev_stiffness]), 0.0]) self.prev_stiffness = stiffness_ctrl gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate( [pos_ctrl, rot_ctrl, gripper_ctrl, [self.prev_stiffness]]) # print("Gripper pose:{}, stiffness:{}".format(gripper_ctrl, self.prev_stiffness)) # self.prev_stiffness = self.sim.model.actuator_gainprm[self.sim.model.actuator_name2id('robot0:l_gripper_finger_joint'), 0] # Apply action to simulation. utils.ctrl_set_action(self.sim, action, self.stiffness_on) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (4,) action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [0., 1., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion {Vertical} #rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion {Horizontal} gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) # Apply action to simulation # Determine the closest cloth node to the gripper closest, dist_closest = self.find_closest_indice(self.grip_pos) # Only allow gripping if in proximity if dist_closest<=0.001: utils.grasp(self.sim, gripper_ctrl, closest) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (3, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([0, 0]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 2: for _ in range(10): self.sim.step() pos_ctrl = np.array([0.0, 0.02, 0.0]) gripper_ctrl = np.array([0, 0]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.sim.step()
def _set_action(self, action): if action.shape != (4, ): try: action = np.reshape(action, (4, )) except: print('Action with shape ', action.shape, ' could not be reshaped to (4,)') assert action.shape == (4, ) action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _sample_goal(self): self.current_state = copy.deepcopy(self.sim.get_state()) action = np.random.uniform(-1,1,4) pos_ctrl, gripper_ctrl = action[:3], action[3] # pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. for i in range(1000): utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.sample_goal_state = copy.deepcopy(self.sim.get_state()) self.sim.set_state(self.current_state) goal = self.render(mode='rgb_array') # if self.has_object: # goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3) # goal += self.target_offset # goal[2] = self.height_offset # if self.target_in_the_air and self.np_random.uniform() < 0.5: # goal[2] += self.np_random.uniform(0, 0.45) # else: # goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-0.15, 0.15, size=3) return goal.copy()
def _set_action(self, action): if self._use_real_robot: assert action.shape == (self.n_actions,) # 6 mobile base action = action.copy() # ensure that we don't change the action outside of this scope if self.debug_print: print("_set_action:", action) pos_ctrl, gripper_ctrl = action[:3], action[3:] pos_ctrl *= 0.03 # limit maximum change in position rot_ctrl = [0.5, 0.5, -0.5, -0.5] #(0 0 0) if self.gripper_close: gripper_ctrl = 1.0 else: gripper_ctrl = -1.0 gripper_ctrl = self.gripper_format_action(gripper_ctrl) assert gripper_ctrl.shape == (self.gripper_actual_dof,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) # action = np.concatenate([pos_ctrl, rot_ctrl, base_ctrl, gripper_ctrl]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) ee_pose = self.sia_7f_arm_robot.arm_get_ee_pose() arm_action = pos_ctrl print("arm_action: ", arm_action) # Applay action to real robot self.sia_7f_arm_robot.arm_set_ee_pose_relative(arm_action) if self.gripper_close: self.sia_7f_arm_robot.gripper_close() else: self.sia_7f_arm_robot.gripper_open() else: assert action.shape == (self.n_actions,) # 6 mobile base action = action.copy() if self.debug_print: print("_set_action:", action) pos_ctrl, gripper_ctrl = action[:3], action[3:] pos_ctrl *= 0.03 # limit maximum change in position rot_ctrl = [0.5, 0.5, -0.5, -0.5] #(0 0 0) if self.gripper_close: gripper_ctrl = 1.0 else: gripper_ctrl = -1.0 gripper_ctrl = self.gripper_format_action(gripper_ctrl) # assert gripper_ctrl.shape == (2,) assert gripper_ctrl.shape == (self.gripper_actual_dof,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) # base control + gripper control utils.mocap_set_action(self.sim, action) # arm control in cartesion (x, y, z)
def _set_action(self, action): if not self.discrete: # Continuous assert action.shape == (4,) action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position # fixed rotation of the end effector, expressed as a quaternion rot_ctrl = [1., 0., 1., 0.] gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) else: # Discrete assert action.shape == (4,) action = action.copy() pos_discrete_ctrl, gripper_discrete_ctrl = action[:3], action[3] assert np.all(pos_discrete_ctrl >= 0) scale = 0.04 # Convert discrete to continuous control # 0 - nothing # 1 - (n_bins - 1) / 2 - +scale # (n_bins+1) / 2 - (n_bins - 1) - -scale pos_ctrl = np.zeros_like(pos_discrete_ctrl, dtype=np.float32) for ac in range(3): if pos_discrete_ctrl[ac] == 0: pos_ctrl[ac] = 0. elif pos_discrete_ctrl[ac] > 0 and pos_discrete_ctrl[ac] <= (self.n_bins - 1) / 2: pos_ctrl[ac] = scale * pos_discrete_ctrl[ac] else: pos_ctrl[ac] = -scale * \ (pos_discrete_ctrl[ac] - (self.n_bins - 1)/2) # Fixed rotation of the end effector, expressed as a quaternion rot_ctrl = [1., 0., 1., 0.] # -1 means close the gripper, 0 means do nothing, 1 means open the gripper gripper_ctrl = np.array( [gripper_discrete_ctrl, gripper_discrete_ctrl]) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Force gripper to be at table height obs = self._get_obs() gripper_pos = obs['observation'][0:3] pos_ctrl[2] = self.height_offset - gripper_pos[2] action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (4, ) action = action.copy() pos_ctrl, gripper_ctrl = action[:3], action[3] rot_ctrl = [1., 0., 1., 0.] gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) r_utils.ctrl_set_action(self.sim, action) r_utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (self.n_actions,) self.action = action # energy correction h = self.sim.data.get_site_xpos('tar')[2] if h < 0.8: self.energy_corrected = False # each time the target crosses the z == 0.8 line its energy gets corrected if not self.energy_corrected: if h > 0.8: self.sim.data.set_joint_qvel('tar:z', self.sqrt_2_g * np.sqrt(self.initial_h - h)) self.give_reflection_reward = True self.energy_corrected = True if not self.no_movement: a = np.zeros([8], dtype=np.float32) a[0] = 0.015 * action[0] a[1] = 0.015 * action[1] euler = rotations.mat2euler(self.sim.data.get_site_xmat('robot0:grip')) a_4 = euler[0] a_5 = euler[1] # adjusting z position to a plane (otherwise the arm has a downwards drift) if self.sim.data.get_site_xpos('robot0:grip')[2] < 0.705: a[2] = 0.665 - self.sim.data.get_site_xpos('robot0:grip')[2] else: a[2] = 0.665 - self.sim.data.get_site_xpos('robot0:grip')[2] a[4] = 0.015 * action[2] a[5] = 0.015 * action[3] # Restricting rotation action: if a_4 > 0.3 and action[2] > 0.: a[4] = 0. if a_4 < -0.3 and action[2] < 0.: a[4] = 0. if a_5 > 0.3 and action[3] > 0.: a[5] = 0. if a_5 < -0.3 and action[3] < 0.: a[5] = 0. # Apply action to simulation. utils.ctrl_set_action(self.sim, a) utils.mocap_set_action(self.sim, a) else: pass
def _set_action(self, action): assert action.shape == (6, ) action = action.copy( ) # ensure that we don't change the action outside of this scope deviation = sum(abs(self.sim.data.qpos - self.sim.data.ctrl)) # print(deviation ) if deviation > 0.35: # reset control to current position if deviation too high self.sim.data.ctrl[:] = self.sim.data.qpos +self.get_dq([0, 0, 0.005, 0, 0, 0]) #print('deviation compensated') if self.ctrl_type == "joint": action *= 0.05 # limit maximum change in position # Apply action #scalarsto simulation. utils.ctrl_set_action(self.sim, action) elif self.ctrl_type == "cartesian": dx = action.reshape(6, ) max_limit = self.dx_max # limitation of operation space, we only allow small rotations adjustments in x and z directions, moving in y direction x_now = numpy.concatenate( (self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) x_then = x_now[:3] + dx[:3] * max_limit #diff_now = numpy.array(x_now - self.init_x).reshape(7,) diff_then = numpy.array(x_then[:3] - self.init_x[:3]) barriers_min = numpy.array([-0.4, -0.8, -0.4]) barriers_max = numpy.array([0.4, 0.8, 0.4]) #for i in range(3): # if (barriers_min[i] < diff_then[i] < barriers_max[i]): # dx[i] = dx[i] * max_limit # elif barriers_min[i] > diff_then[i]: # dx[i] = + max_limit # elif barriers_max[i] < diff_then[i]: # dx[i] = - max_limit for i in range(6): dx[i] = dx[i] * max_limit #rot_mat = self.sim.data.get_body_xmat('gripper_dummy_heg') #dx_ = np.concatenate([rot_mat.dot(dx[:3]), rot_mat.dot(dx[3:])]) #dx_[2]+= 1 dq = self.get_dq(dx) for i in range(6): self.sim.data.ctrl[i] += dq[i]
def _set_action(self, action): action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (4,) action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (self.n_actions, ) self.action = action if not self.no_movement: while action.shape < (4, ): action = np.append(action, [0.]) if self.sim.data.get_site_xpos( 'robot0:grip')[0] < 1.3 and action[0] < 0.: action[0] = 0. if self.sim.data.get_site_xpos( 'robot0:grip')[0] > 1.8 and action[0] > 0.: action[0] = 0. if self.sim.data.get_site_xpos( 'robot0:grip')[1] < 0.4 and action[1] < 0.: action[1] = 0. if self.sim.data.get_site_xpos( 'robot0:grip')[1] > 1.1 and action[1] > 0.: action[1] = 0. if self.sim.data.get_site_xpos( 'robot0:grip')[2] < 0.47 and action[2] < 0.: action[2] = 0. if self.sim.data.get_site_xpos( 'robot0:grip')[2] > 0.87 and action[2] > 0.: action[2] = 0. action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position # rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion rot_ctrl = [1., 0., 0., 0.] gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) # add matrix to one action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) else: pass
def _set_action(self, action): assert action.shape == (6, ) action = action.copy( ) # ensure that we don't change the action outside of this scope if self.ctrl_type == "joint": action *= 0.05 # limit maximum change in position # Apply action to simulation. utils.ctrl_set_action(self.sim, action) elif self.ctrl_type == "cartesian": dx = action.reshape(6, ) max_limit = 0.00005 # limitation of operation space, we only allow small rotations adjustments in x and z directions, moving in y direction x_now = numpy.concatenate( (self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) x_then = x_now[:3] + dx[:3] * max_limit #diff_now = numpy.array(x_now - self.init_x).reshape(7,) diff_then = numpy.array(x_then[:3] - self.init_x[:3]) barriers_min = numpy.array([-0.1, -0.05, -0.1]) barriers_max = numpy.array([0.1, 0.2, 0.1]) for i in range(3): if (barriers_min[i] < diff_then[i] < barriers_max[i]): dx[i] = dx[i] * max_limit elif barriers_min[i] > diff_then[i]: dx[i] = +max_limit elif barriers_max[i] < diff_then[i]: dx[i] = -max_limit for i in range(3, 6): dx[i] = dx[i] * max_limit * 0.01 #slower rotations # dx[1] += 0.5*max_limit #bias in direction of assembly dx.reshape(6, 1) jacp = self.sim.data.get_body_jacp( name="gripper_dummy_heg").reshape(3, 6) jacr = self.sim.data.get_body_jacr( name="gripper_dummy_heg").reshape(3, 6) jac = numpy.vstack((jacp, jacr)) dq = numpy.linalg.lstsq(jac, dx)[0].reshape(6, ) # print(sum(abs(sim.data.qpos-sim.data.ctrl))) for i in range(6): self.sim.data.ctrl[i] += dq[i]
def _set_action(self, action): assert action.shape == (4,) action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): # TODO: set action, n_action (= number of actuators) and action control assert action.shape == (4,) action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [0., 0., 0., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (self.n_actions, ) # 7 self.action = action if not self.no_movement: while action.shape < (4, ): action = np.append(action, [0.]) if self.sim.data.get_site_xpos( 'gripperpalm')[0] < 1.3 and action[0] < 0.: action[0] = 0. if self.sim.data.get_site_xpos( 'gripperpalm')[0] > 1.8 and action[0] > 0.: action[0] = 0. if self.sim.data.get_site_xpos( 'gripperpalm')[1] < 0.4 and action[1] < 0.: action[1] = 0. if self.sim.data.get_site_xpos( 'gripperpalm')[1] > 1.1 and action[1] > 0.: action[1] = 0. if self.sim.data.get_site_xpos( 'gripperpalm')[2] < 0.47 and action[2] < 0.: action[2] = 0. if self.sim.data.get_site_xpos( 'gripperpalm')[2] > 0.87 and action[2] > 0.: action[2] = 0. action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [1., 0., 0., 0.] gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) print("action:", action) # print("action size:", action.size()) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) else: pass
def _set_action(self, action): assert action.shape == (4,) action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] # print("------>",action) #self.sim.data.set_joint_qpos('robot0:wrist_roll_joint', 0.) pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (3, ) action = ( action.copy() ) # ensure that we don't change the action outside of this scope action *= 0.05 # limit maximum change in position rot_ctrl = [ 1.0, 0.0, 1.0, 0.0, ] # fixed rotation of the end effector, expressed as a quaternion action = np.concatenate([action, rot_ctrl, np.zeros(2)]) # Apply action to simulation. robot_utils.ctrl_set_action(self.sim, action) robot_utils.mocap_set_action(self.sim, action)