def render_pose(pose: np.ndarray, viewer, label="", size=0.2, unique_id=None, unique_label=False): if viewer is None: return rgba = np.r_[np.random.uniform(0.2, 1.0, 3), 1.] extra_kwargs = dict() if unique_label: unique_id = hash(label) % np.iinfo(np.int32).max if unique_id is not None: extra_kwargs['dataid'] = unique_id with viewer._gui_lock: existing = [ i for i, m in enumerate(viewer._markers) if m.get('dataid') == unique_id ] if len(existing) == 1: rgba = viewer._markers[existing[0]]['rgba'] for i in existing[::-1]: del viewer._markers[i] pos = pose[:3] if pose.size == 6: quat = rotations.euler2quat(pose[3:]) elif pose.size == 7: quat = pose[3:] else: viewer.add_marker(pos=pos, label=label, type=mj_const.GEOM_SPHERE, size=np.ones(3) * 0.01, rgba=rgba, specular=0., **extra_kwargs) return for i in range(3): rgba = np.zeros(4) rgba[[i, 3]] = 1. tf = [ np.r_[0., np.pi / 2, 0.], np.r_[np.pi / 2, np.pi / 1, 0.], np.r_[0., 0., 0.] ][i] rot = rotations.quat_mul(quat, rotations.euler2quat(tf)) viewer.add_marker(pos=pos, mat=rotations.quat2mat(rot).flatten(), label=(label if i == 0 else ""), type=mj_const.GEOM_ARROW, size=np.r_[0.01, 0.01, size], rgba=rgba, specular=0., **extra_kwargs)
def set_initial_pose(self): self.data.set_joint_qpos('yumi_joint_1_l', 1.) self.data.set_joint_qpos('yumi_joint_1_r', -1.) self.data.set_joint_qpos('yumi_joint_2_l', 0.1) self.data.set_joint_qpos('yumi_joint_2_r', 0.1) ''' randomize goal goal_id = self.model.body_name2id('goal') pos = self.model.body_pos[goal_id] pos[0:2] = [0.5, 0] pos[0:2] += np.random.uniform(-0.05, 0.05, size=(2,)) self.model.body_pos[goal_id] = pos ''' self.sim.forward() target_start, target_end = self.model.get_joint_qpos_addr('target') target_qpos = self.data.qpos[target_start:target_end] target_quat = target_qpos[3:] if self.task == 0: # rotate y=-90 deg quat = rotations.euler2quat(np.array([0, -np.pi / 2, 0])) z_idx = 0 elif self.task == 1: # rotate x=90 deg quat = rotations.euler2quat(np.array([np.pi / 2, 0, 0])) z_idx = 1 elif self.task == 2: # do nothing quat = rotations.euler2quat(np.array([0, 0, 0])) z_idx = 2 elif self.task == 3: # rotate z=-90 deg quat = rotations.euler2quat(np.array([0, 0, -np.pi / 2])) z_idx = 2 else: raise Exception('Additional tasks not implemented.') target_id = self.model.geom_name2id('target') target_qpos[0] = 0.5 + np.random.uniform(-0.01, 0.01) target_qpos[1] = np.random.uniform(0.14, 0.15) height = self.model.geom_size[target_id][z_idx] target_qpos[2] = 0.051 + height goal_id = self.model.body_name2id('goal') body_pos = self.model.body_pos[goal_id] body_quat = self.model.body_quat[goal_id] body_pos[2] = target_qpos[2] body_quat[:] = quat perturbation = np.zeros(3) perturbation[z_idx] = np.random.uniform(-0.2, 0.2) euler = rotations.quat2euler(quat) euler = rotations.subtract_euler(euler, perturbation) target_qpos[3:] = rotations.euler2quat(euler)
def _goal_distance(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_pos = np.zeros_like(goal_a[..., 0]) d_rot = np.zeros_like(goal_b[..., 0]) if self.target_position != 'ignore': delta_pos = goal_a[..., :3] - goal_b[..., :3] d_pos = np.linalg.norm(delta_pos, axis=-1) if self.target_rotation != 'ignore': quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if self.ignore_z_target_rotation: # Special case: We want to ignore the Z component of the rotation. # This code here assumes Euler angles with xyz convention. We first transform # to euler, then set the Z component to be equal between the two, and finally # transform back into quaternions. euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff assert d_pos.shape == d_rot.shape return d_pos, d_rot
def __init__(self, target_position, target_rotation, skip_frame=20, model_path='rubik_cube_shadow_wo_target.xml', initial_robot_pos_dict=DEFAULT_INITIAL_QPOS, reward_type='sparse', randomize_initial_position=True, randomize_initial_rotation=True, distance_threshold=0.01, rotation_threshold=0.1, target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)])): """Initializes a new ManipulateEnv environment. Args: model_path (string): path to the environment XML file initial_robot_pos_dict (dict): a dictionary of joint names and values that define the initial configuration target_position (string): the type of target porisiton: - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily - fixed: target position is set to the initial position of the target - random: target position is fully randomized according to target_postion_range target_rotation (string): the type of target rotation: - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily - fixed: target rotation is set to the initial rotation of the object - xyz: fully randomized target rotation around the X, Y and Z axis - z: fully randomized target rotation around the Z axis - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y target_position_range (np.array of shape (3,2)): range of the target_position randomization reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense randomize_initial_position (boolean): whether or not to randomize the initial position of the object randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved """ self.target_position = target_position self.target_rotation = target_rotation self.target_position_range = target_position_range self.reward_type = reward_type self.randomize_initial_position = randomize_initial_position self.randomize_initial_rotation = randomize_initial_rotation self.distance_threshold = distance_threshold self.rotation_threshold = rotation_threshold self.parallel_quats = [ rotations.euler2quat(r) for r in rotations.get_parallel_rotations() ] assert self.reward_type in ['dense', 'sparse'] assert self.target_position in ['ignore', 'fixed', 'random'] assert self.target_rotation in [ 'ignore', 'fixed', 'xyz', 'z', 'parallel', 'rotFace' ] ShadowGoalBaseEnv.__init__(self, model_path, initial_robot_pos_dict, skip_frame)
def __init__( self, model_path, target_position, target_rotation, target_position_range, reward_type, initial_qpos={}, randomize_initial_position=True, randomize_initial_rotation=True, distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False, ignore_z_target_rotation=False, ): """Initializes a new Hand manipulation environment. Args: model_path (string): path to the environments XML file target_position (string): the type of target position: - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily - fixed: target position is set to the initial position of the object - random: target position is fully randomized according to target_position_range target_rotation (string): the type of target rotation: - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily - fixed: target rotation is set to the initial rotation of the object - xyz: fully randomized target rotation around the X, Y and Z axis - z: fully randomized target rotation around the Z axis - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored target_position_range (np.array of shape (3, 2)): range of the target_position randomization reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense initial_qpos (dict): a dictionary of joint names and values that define the initial configuration randomize_initial_position (boolean): whether or not to randomize the initial position of the object randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved n_substeps (int): number of substeps the simulation runs on every call to step relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state """ self.target_position = target_position self.target_rotation = target_rotation self.target_position_range = target_position_range self.parallel_quats = [rotations.euler2quat(r) for r in rotations.get_parallel_rotations()] self.randomize_initial_rotation = randomize_initial_rotation self.randomize_initial_position = randomize_initial_position self.distance_threshold = distance_threshold self.rotation_threshold = rotation_threshold self.reward_type = reward_type self.ignore_z_target_rotation = ignore_z_target_rotation assert self.target_position in ['ignore', 'fixed', 'random'] assert self.target_rotation in ['ignore', 'fixed', 'xyz', 'z', 'parallel'] hand_env.HandEnv.__init__( self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos, relative_control=relative_control) utils.EzPickle.__init__(self)
def __init__( self, model_path, target_position, target_rotation, target_position_range, reward_type, initial_qpos=None, randomize_initial_position=True, randomize_initial_rotation=True, distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False, ignore_z_target_rotation=False, ): """Initializes a new Hand manipulation environment. Args: model_path (string): path to the environments XML file target_position (string): the type of target position: - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily - fixed: target position is set to the initial position of the object - random: target position is fully randomized according to target_position_range target_rotation (string): the type of target rotation: - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily - fixed: target rotation is set to the initial rotation of the object - xyz: fully randomized target rotation around the X, Y and Z axis - z: fully randomized target rotation around the Z axis - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored target_position_range (np.array of shape (3, 2)): range of the target_position randomization reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense initial_qpos (dict): a dictionary of joint names and values that define the initial configuration randomize_initial_position (boolean): whether or not to randomize the initial position of the object randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved n_substeps (int): number of substeps the simulation runs on every call to step relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state """ self.target_position = target_position self.target_rotation = target_rotation self.target_position_range = target_position_range self.parallel_quats = [rotations.euler2quat(r) for r in rotations.get_parallel_rotations()] self.randomize_initial_rotation = randomize_initial_rotation self.randomize_initial_position = randomize_initial_position self.distance_threshold = distance_threshold self.rotation_threshold = rotation_threshold self.reward_type = reward_type self.ignore_z_target_rotation = ignore_z_target_rotation assert self.target_position in ['ignore', 'fixed', 'random'] assert self.target_rotation in ['ignore', 'fixed', 'xyz', 'z', 'parallel'] initial_qpos = initial_qpos or {} hand_env.HandEnv.__init__( self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos, relative_control=relative_control)
def _sample_goal(self): # Select a goal for the object position. if self.target_position == 'random': assert self.target_position_range.shape == (3, 2) offset = self.np_random.uniform(self.target_position_range[:, 0], self.target_position_range[:, 1]) assert offset.shape == (3,) target_pos = self.sim.data.get_site_xpos('rubik:cube_center') + offset elif self.target_position in ['ignore', 'fixed']: # target_pos = self.sim.data.get_site_xpos('rubik:cube_center') target_pos = np.array([1, 0.87, 0.2]) else: raise error.Error('Unknown target_position option "{}".'.format(self.target_position)) assert target_pos.shape == (3,) # Select a goal for the object rotation if self.target_rotation == 'z': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) target_quat = quat_from_angle_and_axis(angle, axis) elif self.target_rotation == 'parallel': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) target_quat = quat_from_angle_and_axis(angle, axis) parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))] target_quat = rotations.quat_mul(target_quat, parallel_quat) elif self.target_rotation == 'xyz': angle = self.np_random.uniform(-np.pi, np.pi) axis = self.np_random.uniform(-1., 1., size=3) target_quat = quat_from_angle_and_axis(angle, axis) elif self.target_rotation in ['ignore', 'fixed']: target_quat = self.sim.data.get_joint_qpos('rubik:free_joint_0_0_0')[-4:] elif self.target_rotation in ['rotFace']: target_quats = [rotations.euler2quat(r) for r in np.array([[0, 0, np.pi/2.], [0, 0, -np.pi/2.], [np.pi / 2., 0, 0], [-np.pi / 2., 0, 0], [0, np.pi/2., 0], [0, -np.pi/2., 0]])] parallel_quat = target_quats[self.np_random.randint(len(target_quats))] target_quat = self.sim.data.get_joint_qpos('rubik:free_joint_0_0_0')[-4:] target_quat = rotations.quat_mul(target_quat, parallel_quat) else: raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation)) assert target_quat.shape == (4,) target_quat /= np.linalg.norm(target_quat) goal = np.concatenate([target_pos, target_quat]) return goal
def camera_randomization(self): cam_pos = [[0.99, 0.5, 1.0], [0.5, 0.0, 1.99]] #1m x 1m cam_ori = [[0.0, 1.57, 1.57], [0, 0, 0]] cam_fovy = [60, 60] for i in range(len(cam_pos)): for j in range(len(cam_pos[0])): cam_pos[i][j] += randrange(-15, 15) / 1000 ori_dim = len(cam_ori[0]) for i in range(len(cam_ori)): for j in range(ori_dim): cam_ori[i][j] += randrange(-17, 17) / 1000 for i in range(len(cam_fovy)): cam_fovy[i] += randrange(-100, 100) / 100 self.model.cam_pos[:, :] = np.array(cam_pos) self.model.cam_quat[:, :] = np.array(euler2quat(cam_ori)) self.model.cam_fovy[:] = np.array(cam_fovy)
def _goal_distance_single(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_rot = np.zeros_like(goal_b[..., 0]) d_pos = np.linalg.norm(goal_a[..., :3] - goal_b[..., :3], axis=-1) if self.target_rotation != 'ignore': quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if self.ignore_z_target_rotation: euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff return d_pos, d_rot
def step(cls, viewer): q1 = rotations.euler2quat(np.r_[0.01, 0.01, 0.05]) q0 = cls.ob1_pose[3:].copy() q_common = rotations.quat_mul(q0, q1) cls.ob1_pose[3:] = q_common.copy() render_pose(cls.ob1_pose, viewer, unique_id=cls.unique_ids[0].item()) cls.ob2_pose[3:] = q_common.copy() render_pose(cls.ob2_pose, viewer, unique_id=cls.unique_ids[1].item()) if cls.i < 37: cls.tf = get_tf(cls.ob1_pose, cls.ob2_pose) # 2_to_1 cls.i += 1 else: ob1_pose_back = apply_tf(cls.tf, cls.ob2_pose) ob1_pose_back[2] -= 0.0 render_pose(apply_tf(cls.tf, cls.ob2_pose), viewer, unique_id=cls.unique_ids[2].item())
def rotation_goal_distance(goal_a, goal_b, ignore_z_target_rotation = False): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_rot = np.zeros_like(goal_b[..., 0]) quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if ignore_z_target_rotation: # Special case: We want to ignore the Z component of the rotation. # This code here assumes Euler angles with xyz convention. We first transform # to euler, then set the Z component to be equal between the two, and finally # transform back into quaternions. euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff return d_rot
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self._reset(new_goal) # TFDebugger.reset() # TFDebugger.step(self._raw_env.viewer) curr_grp_poses = { a: np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'), rotations.mat2quat( self._raw_env.sim.data. get_site_xmat(f'gripper_{a}_center')), ] for a in ('l', 'r') } pos_errors = [] for arm in ('l', 'r'): if arm == 'l': solver = self._ikp_solver_l jac_solver = self._jac_solver_l joint_lims = self._raw_env.arm_l_joint_lims achieved_pos = obs['observation'][32:35] obj_l_rel_pos = obs['observation'][44:47] obj_pos = obj_l_rel_pos + achieved_pos target_pose = np.r_[obj_pos, np.pi - 0.9, 0.01, 0.01] target_pose = np.r_[target_pose[:3], rotations.euler2quat(target_pose[3:])] if self._phase == 3: target_pose[:3] = self._raw_env.sim.data.get_site_xpos( 'target0:left').copy() q = rotations.mat2quat( self._raw_env.sim.data.get_site_xmat('target0:left')) target_pose[3:] = rotations.quat_mul(target_pose[3:], q) prev_err = self._prev_err_l curr_q = obs['observation'][:7] u_masked = u[:7] elif arm == 'r': solver = self._ikp_solver_r jac_solver = self._jac_solver_r joint_lims = self._raw_env.arm_r_joint_lims achieved_pos = obs['observation'][35:38] obj_r_rel_pos = obs['observation'][47:50] obj_pos = obj_r_rel_pos + achieved_pos target_pose = np.r_[obj_pos, -np.pi + 0.9, 0.01, np.pi] target_pose = np.r_[target_pose[:3], rotations.euler2quat(target_pose[3:])] if self._phase == 3: target_pose[:3] = self._raw_env.sim.data.get_site_xpos( 'target0:right').copy() q = rotations.mat2quat( self._raw_env.sim.data.get_site_xmat('target0:right')) target_pose[3:] = rotations.quat_mul(q, target_pose[3:]) prev_err = self._prev_err_r curr_q = obs['observation'][16:23] u_masked = u[8:15] else: continue if self._phase == 0: u[7] = -1.0 u[15] = -1.0 target_pose[2] += 0.1 elif self._phase == 1: u[7] = -1.0 u[15] = -1.0 target_pose[2] -= 0.002 elif self._phase == 2: u[7] = -1 + self._phase_steps / 3. u[15] = -1 + self._phase_steps / 3. elif self._phase == 3: u[7] = 1.0 u[15] = 1.0 if self._raw_env.viewer is not None: tf.render_pose(target_pose.copy(), self._raw_env.viewer) curr_pose = curr_grp_poses[arm].copy() err_pose = curr_pose - target_pose err_pos = np.linalg.norm(err_pose[:3]) pos_errors.append(err_pos) if self.use_mocap_ctrl: if self._phase < 1: controller_k = 3.0 else: controller_k = 2.5 err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:]) target_q = self._raw_env.mocap_ik(-err_pose, arm) else: controller_k = 0.1 err_rot = 0.0 target_pose[:3] -= self._base_offset if self.use_qp_solver: if not self.check_joint_limits: joint_lims = None curr_pose[:3] -= self._base_offset target_q = self._position_ik_qp(curr_pose, target_pose, curr_q, jac_solver, joint_lims) else: target_q = self._position_ik(target_pose, curr_q, solver) if target_q is not None: err_q = curr_q - target_q u_masked[:] = self._controller(err_q, prev_err, controller_k) self._phase_steps += 1 if self._phase == 0 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.05: self._phase = 1 self._phase_steps = 0 elif self._phase == 1 and np.all( np.array(pos_errors) < 0.007) and err_rot < 0.05: self._phase = 2 self._phase_steps = 0 elif self._phase == 2: if self._phase_steps > 6: self._phase = 3 self._phase_steps = 0 self._locked_l_to_r_tf = tf.get_tf(curr_grp_poses['r'], curr_grp_poses['l']) u = np.clip(u, self._env.action_space.low, self._env.action_space.high) return u
def __init__(self, model_path, reward_type, initial_qpos=None, relative_control=False, has_object=False, randomize_initial_arm_pos=False, randomize_initial_object_pos=True, ignore_rotation_ctrl=False, distance_threshold=0.05, rotation_threshold=0.1, n_substeps=20, ignore_target_rotation=False, success_on_grasp_only=False, grasp_state=None, grasp_state_reset_p=0.0, target_in_the_air_p=0.5, object_id='original', object_cage=False, cage_opacity=0.1, weld_fingers=False): self.target_in_the_air_p = target_in_the_air_p self.has_object = has_object self.ignore_target_rotation = ignore_target_rotation self.randomize_initial_arm_pos = randomize_initial_arm_pos self.randomize_initial_object_pos = randomize_initial_object_pos self.ignore_rotation_ctrl = ignore_rotation_ctrl self.distance_threshold = distance_threshold self.rotation_threshold = rotation_threshold self.reward_type = reward_type self.success_on_grasp_only = success_on_grasp_only self.object_id = object_id self.forearm_bounds = (np.r_[0.65, 0.3, 0.42], np.r_[1.75, 1.2, 1.0]) self.table_safe_bounds = (np.r_[1.10, 0.43], np.r_[1.49, 1.05]) self._initial_arm_mocap_pose = np.r_[1.05, 0.75, 0.65, rotations.euler2quat(np.r_[0., 1.59, 1.57])] if isinstance(grasp_state, bool) and grasp_state: suffix = "" if object_id == 'original' else f'_{object_id}' p = os.path.join(os.path.dirname(__file__), f'../assets/states/grasp_state{suffix}.pkl') if not os.path.exists(p): raise IOError('File {} does not exist'.format(p)) grasp_state = pickle.load(open(p, 'rb')) if grasp_state is not None and grasp_state_reset_p <= 0.0: raise ValueError( 'grasp_state_reset_p must be greater than zero if grasp_state is specified!' ) self.grasp_state = grasp_state self.grasp_state_reset_p = grasp_state_reset_p if ignore_rotation_ctrl and not ignore_target_rotation: raise ValueError( 'Target rotation must be ignored if arm cannot rotate! Set ignore_target_rotation=True' ) if success_on_grasp_only: if reward_type != 'sparse': raise ValueError( 'Parameter success_on_grasp_only requires sparse rewards!') if not has_object: raise ValueError( 'Parameter success_on_grasp_only requires object to be grasped!' ) default_qpos = dict() xml_format = None if self.has_object: default_qpos['object:joint'] = [1.25, 0.53, 0.4, 1., 0., 0., 0.] xml_format = dict( object_geom= '<geom name="{name}" {props} material="material:object" condim="4"' 'friction="1 0.95 0.01" solimp="0.99 0.99 0.01" solref="0.01 1"/>', target_geom= '<geom name="{name}" {props} material="material:target" condim="4" group="2"' 'contype="0" conaffinity="0"/>', ) obj = dict(OBJECTS[object_id]) if 'mass' not in obj.keys(): obj['mass'] = 0.2 mesh_parts = obj.get('mesh_parts') if mesh_parts is not None and isinstance(mesh_parts, int): del obj['mesh_parts'] object_geom, target_geom = '', '' if isinstance(obj['mass'], Sequence): masses = list(obj['mass']) assert len(masses) == mesh_parts del obj['mass'] else: masses = [obj['mass']] * mesh_parts for i in range(mesh_parts): obj_part = dict(obj) obj_part['mesh'] += f'_part{i}' obj_part['mass'] = masses[i] props = " ".join( [f'{k}="{v}"' for k, v in obj_part.items()]) object_geom += xml_format['object_geom'].format( name=f'object_part{i}', props=props) target_geom += xml_format['target_geom'].format( name=f'target_part{i}', props=props) xml_format = dict(object_geom=object_geom, target_geom=target_geom) else: props = " ".join([f'{k}="{v}"' for k, v in obj.items()]) xml_format = dict( object_geom=xml_format['object_geom'].format(name='object', props=props), target_geom=xml_format['target_geom'].format(name='target', props=props), ) if object_cage: rgba = f"1 0 0 {cage_opacity}" xml_format['cage'] = ''' <geom pos="0 0.55 0.4" quat="0.924 0.3826 0 0" size="1 0.75 0.01" type="box" mass="200" rgba="{}" solimp="0.99 0.99 0.01" solref="0.01 1"/> <geom pos="0 -0.55 0.4" quat="0.924 -0.3826 0 0" size="1 0.75 0.01" type="box" mass="200" rgba="{}" solimp="0.99 0.99 0.01" solref="0.01 1"/> <geom pos="-0.45 0 0.4" quat="0.924 0 0.3826 0" size="0.75 1 0.01" type="box" mass="200" rgba="{}" solimp="0.99 0.99 0.01" solref="0.01 1"/> <geom pos="0.45 0 0.4" quat="0.924 0 -0.3826 0" size="0.75 1 0.01" type="box" mass="200" rgba="{}" solimp="0.99 0.99 0.01" solref="0.01 1"/> '''.format(*[rgba] * 4) else: xml_format['cage'] = '' if weld_fingers: mocap_tp = ''' <body mocap="true" name="{}:mocap" pos="0 0 0"> <geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.7" size="0.005 0.005 0.005" type="box"/> <geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="1 0.005 0.005" type="box"/> <geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 1 0.001" type="box"/> <geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 0.005 1" type="box"/> </body> ''' weld_tp = '<weld body1="{}:mocap" body2="{}" solimp="0.9 0.95 0.001" solref="0.02 1"/>' fingertip_names = [ x.replace('robot0:', '') for x in FINGERTIP_BODY_NAMES ] xml_format['finger_mocaps'] = '\n'.join( [mocap_tp.format(n) for n in fingertip_names]) xml_format['finger_welds'] = '\n'.join([ weld_tp.format(m, f) for m, f in zip(fingertip_names, FINGERTIP_BODY_NAMES) ]) else: xml_format['finger_welds'] = '' xml_format['finger_mocaps'] = '' initial_qpos = initial_qpos or default_qpos hand_env.HandEnv.__init__(self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos, relative_control=relative_control, arm_control=True, xml_format=xml_format) utils.EzPickle.__init__(self)
def goToGoal(env, lastObs): #goal = self.sampleGoal() goal = lastObs['desired_goal'] #objectPosition objectPos = lastObs['observation'][3:6] gripperPos = lastObs['observation'][:3] gripperState = lastObs['observation'][9:11] objectRot = lastObs['observation'][12:15] # Euler angles object_rel_pos = lastObs['observation'][6:9] #print("relative position ", object_rel_pos) #print("Goal position ", goal) #print("gripper Position ", gripperPos) #print("Object Position ", objectPos) #print("Object rotation ", objectRot) #print("Gripper state ", gripperState) episodeAcs = [] episodeObs = [] episodeInfo = [] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[2] += 0.03 print("Max episode steps ", env._max_episode_steps) timeStep = 0 episodeObs.append(lastObs) while np.linalg.norm(object_oriented_goal ) >= 0.0025 and timeStep <= env._max_episode_steps: env.render() action = [0, 0, 0, 0] + [1., 0., 1., 0.] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[2] += 0.03 object_rot_goal = rotations.euler2quat([0, 0, objectRot[1]]) for i in range(len(object_oriented_goal)): action[i] = object_oriented_goal[i] * 6 action[4:] = object_rot_goal action[3] = 0.05 obsDataNew, reward, done, info = env.step(action) timeStep += 1 episodeAcs.append(action) episodeInfo.append(info) episodeObs.append(obsDataNew) # note: don't need to reobserve object orientation, no online correction objectPos = obsDataNew['observation'][3:6] gripperPos = obsDataNew['observation'][:3] gripperState = obsDataNew['observation'][9:11] object_rel_pos = obsDataNew['observation'][6:9] while np.linalg.norm( object_rel_pos) >= 0.0025 and timeStep <= env._max_episode_steps: env.render() action = [0, 0, 0, 0] + [1., 0., 1., 0.] object_rot_goal = rotations.euler2quat([0, 0, objectRot[1]]) for i in range(len(object_rel_pos)): action[i] = object_rel_pos[i] * 6 action[4:] = object_rot_goal action[3] = -0.005 obsDataNew, reward, done, info = env.step(action) timeStep += 1 episodeAcs.append(action) episodeInfo.append(info) episodeObs.append(obsDataNew) objectPos = obsDataNew['observation'][3:6] gripperPos = obsDataNew['observation'][:3] gripperState = obsDataNew['observation'][9:11] object_rel_pos = obsDataNew['observation'][6:9] while np.linalg.norm( goal - objectPos) >= 0.001 and timeStep <= env._max_episode_steps: env.render() action = [0, 0, 0, 0] + [1., 0., 1., 0.] object_rot_goal = rotations.euler2quat([0, 0, objectRot[1]]) for i in range(len(goal - objectPos)): action[i] = (goal - objectPos)[i] * 6 action[4:] = object_rot_goal action[3] = -0.005 obsDataNew, reward, done, info = env.step(action) timeStep += 1 episodeAcs.append(action) episodeInfo.append(info) episodeObs.append(obsDataNew) objectPos = obsDataNew['observation'][3:6] gripperPos = obsDataNew['observation'][:3] gripperState = obsDataNew['observation'][9:11] object_rel_pos = obsDataNew['observation'][6:9] while True: env.render() action = [0, 0, 0, 0] + [1., 0., 1., 0.] action[3] = -0.005 obsDataNew, reward, done, info = env.step(action) timeStep += 1 episodeAcs.append(action) episodeInfo.append(info) episodeObs.append(obsDataNew) objectPos = obsDataNew['observation'][3:6] gripperPos = obsDataNew['observation'][:3] gripperState = obsDataNew['observation'][9:11] object_rel_pos = obsDataNew['observation'][6:9] if timeStep >= env._max_episode_steps: break #print("Toatal timesteps taken ", timeStep) actions.append(episodeAcs) observations.append(episodeObs) infos.append(episodeInfo)
def randomize_ur10_xml(var_mass=0.0, var_damp=0.0, var_fr=0.0, var_grav_x_y=0.0, var_grav_z=0.0, var_body_pos=0.00, var_body_rot=0.0, worker_id=1): # parameters: # # var_mass : mass variance relative to actual mass # # var_damp : damping variance relative to the standard value # var_fr : friction variance relative to the standard value # # var_grav_x_y : gravity variance in x- and y-direction (absolute) # var_grav_z : gravity variance in z-direction (absolute) # # var_body_pos : variance of body position in m per direction # var_body_rot : variance of body rotation in degree per axis # ______________________________________________________________________ # # return : offset of car_body to consider for goal position # # ______________________________________________________________________ main_xml_temp = os.path.join(*[ur_path, "ur10_assembly_setup_rand_temp_{}.xml".format(worker_id)]) robot_body_rel = os.path.join("ur10_heg", "ur10_body_heg_rand_temp_{}.xml".format(worker_id)) car_body_rel = os.path.join("ur10_heg", "car_body_rand_temp_{}.xml".format(worker_id)) defaults_rel = os.path.join("ur10_heg", "ur10_default_rand_temp_{}.xml".format(worker_id)) robot_body_xml_temp = os.path.join(ur_path, robot_body_rel) car_body_xml_temp = os.path.join(ur_path, car_body_rel) defaults_xml_temp = os.path.join(ur_path, defaults_rel) # load robot body xml and randomize body masses tree = ET.parse(robot_body_xml) inertials = [] for elem in tree.iter(): if elem.tag == 'inertial': # find inertial elements in xml inertials.append(elem) for inertial in inertials: # change masses inertial.attrib['mass'] = str((np.random.uniform(-var_mass, +var_mass) + 1) * float(inertial.attrib['mass'])) tree.write(robot_body_xml_temp) # save new xml in temp file to be loaded in gym env # load defaults xml and randomize joint damping and surface friction #damping_arm_rand = damping_arm * (1 + np.random.uniform(-var_damp, +var_damp)) #damping_wrist_rand = damping_wrist * (1 + np.random.uniform(-var_damp, +var_damp)) damping_pan_rand = damping_pan * (1 + np.random.uniform(-var_damp, +var_damp)) damping_lift_rand = damping_lift * (1 + np.random.uniform(-var_damp, +var_damp)) damping_elbow_rand = damping_elbow * (1 + np.random.uniform(-var_damp, +var_damp)) damping_wrist1_rand = damping_wrist1 * (1 + np.random.uniform(-var_damp, +var_damp)) damping_wrist2_rand = damping_wrist2 * (1 + np.random.uniform(-var_damp, +var_damp)) damping_wrist3_rand = damping_wrist3 * (1 + np.random.uniform(-var_damp, +var_damp)) friction_body_rand = friction_body * (1 + np.random.uniform(-var_fr, +var_fr, (3,))) friction_heg_rand = friction_heg * (1 + np.random.uniform(-var_fr, +var_fr, (3,))) friction_body_string = "{} {} {}".format(*friction_body_rand) friction_heg_string = "{} {} {}".format(*friction_heg_rand) tree = ET.parse(defaults_xml) for default in tree.findall('default'): if default.attrib['class'] == 'ur10:pan': default.findall('joint')[0].attrib['damping'] = str(damping_pan_rand) if default.attrib['class'] == 'ur10:lift': default.findall('joint')[0].attrib['damping'] = str(damping_lift_rand) if default.attrib['class'] == 'ur10:elbow': default.findall('joint')[0].attrib['damping'] = str(damping_elbow_rand) if default.attrib['class'] == 'ur10:wrist1': default.findall('joint')[0].attrib['damping'] = str(damping_wrist1_rand) if default.attrib['class'] == 'ur10:wrist2': default.findall('joint')[0].attrib['damping'] = str(damping_wrist2_rand) if default.attrib['class'] == 'ur10:wrist3': default.findall('joint')[0].attrib['damping'] = str(damping_wrist3_rand) if default.attrib['class'] == 'body': default.findall('geom')[0].attrib['friction'] = friction_body_string if default.attrib['class'] == 'heg': default.findall('geom')[0].attrib['friction'] = friction_heg_string tree.write(defaults_xml_temp) # load main xml and randomize gravity, also adapt includes to worker_id grav_x = np.random.uniform(-var_grav_x_y, var_grav_x_y) grav_y = np.random.uniform(-var_grav_x_y, var_grav_x_y) grav_z = np.random.uniform(-var_grav_z, var_grav_z) - 9.81 grav_string = "{} {} {}".format(grav_x, grav_y, grav_z) tree = ET.parse(main_xml) tree.findall("option")[0].attrib['gravity'] = grav_string for include in tree.findall("default")[0].iter("include"): include.attrib['file'] = defaults_rel world_temp_files = [car_body_rel, robot_body_rel] for i, include in enumerate(tree.findall("worldbody")[0].iter("include")): include.attrib['file'] = world_temp_files[i] tree.write(main_xml_temp) # load car body xml and change position (offset is returned by this function) body_euler = normalize_rad(rotations.quat2euler(body_quat)) body_pos_offset = np.random.uniform(-var_body_pos, var_body_pos, (3,)) body_euler_offset = np.random.uniform(-var_body_rot/180*np.pi, var_body_rot/180*np.pi, (3,)) body_pos_rand = body_pos + body_pos_offset body_euler_rand = body_euler + body_euler_offset body_quat_rand = rotations.euler2quat(body_euler_rand) body_pos_rand_string = "{} {} {}".format(*body_pos_rand) body_quat_rand_string = "{} {} {} {}".format(*body_quat_rand) tree = ET.parse(car_body_xml) for body in tree.findall('body'): if body.attrib['name'] == 'body_link': body.attrib['pos'] = body_pos_rand_string body.attrib['quat'] = body_quat_rand_string tree.write(car_body_xml_temp) offset = { 'body_pos_offset': body_pos_offset, 'body_euler_offset': body_euler_offset } return offset
total_numsteps) writer.add_scalar('loss/policy', policy_loss, total_numsteps) writer.add_scalar('loss/entropy_loss', ent_loss, total_numsteps) writer.add_scalar('entropy_temprature/alpha', alpha, total_numsteps) updates += 1 ############## if args.pos_control: # print("current pos: ",current_pos) next_a += current_pos elif args.ik_control: # print("orig ", next_a) # print("euler ", next_a[3:-1]) quat = euler2quat(next_a[3:-1]) ee_pos = np.concatenate((next_a[:3], quat)) joint_pos = c.IK(ee_pos) if joint_pos == 0: print("no solution") #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Implement get_current_joint_pos() #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Implement fps() current_joint_pos = np.zeros(8) next_a = current_joint_pos else: print("got solution") next_a = joint_pos + [next_a[-1]] # print("joint_pos ", joint_pos) # print("next_a: ", next_a) for _ in range(args.action_repeat): next_state, reward, done, _ = env.step(next_a) # Step
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self.reset(new_goal) obj_radius = self._object_size[0] obj_achieved_alt = obs['achieved_goal'].item() obj_achieved_pose = obs['observation'][44:51] if np.all(obj_achieved_pose[3:] == 0): obj_achieved_pose[3] = 1.0 # tf.render_pose(obj_achieved_pose, self._raw_env.viewer, label="O") grp_xrot = 0.9 + obj_achieved_alt * 2.0 curr_grp_poses = { a: np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'), rotations.mat2quat( self._raw_env.sim.data. get_site_xmat(f'gripper_{a}_center')), ] for a in ('l', 'r') } pos_errors = [] for arm in ('l', 'r'): if arm == 'l': transf = np.r_[0., obj_radius, 0., 1., 0., 0., 0.] if self._phase == 3: transf[1] *= 0.9 transf[2] = 0.05 pose = tf.apply_tf(transf, obj_achieved_pose) # tf.render_pose(pose, self._raw_env.viewer, label="L") grp_target_pos = pose[:3] grp_target_rot = np.r_[np.pi - grp_xrot, 0.01, 0.01] target_pose = np.r_[grp_target_pos, rotations.euler2quat(grp_target_rot)] prev_err = self._prev_err_l curr_q = obs['observation'][:7] u_masked = u[:7] elif arm == 'r': transf = np.r_[0., -obj_radius, 0., 1., 0., 0., 0.] if self._phase == 3: transf[1] *= 0.9 transf[2] = 0.05 pose = tf.apply_tf(transf, obj_achieved_pose) # tf.render_pose(pose, self._raw_env.viewer, label="R") grp_target_pos = pose[:3] grp_target_rot = np.r_[-np.pi + grp_xrot, 0.01, np.pi] target_pose = np.r_[grp_target_pos, rotations.euler2quat(grp_target_rot)] prev_err = self._prev_err_r curr_q = obs['observation'][16:23] u_masked = u[8:15] else: continue u[7] = -1.0 u[15] = -1.0 if self._phase == 0: target_pose[2] += 0.1 target_pose[1] += 0.05 * np.sign(target_pose[1]) elif self._phase == 1: target_pose[2] += 0.01 target_pose[1] += 0.05 * np.sign(target_pose[1]) elif self._phase == 2: target_pose[2] += 0.01 elif self._phase == 3: target_pose[2] += 0.00 if self._raw_env.viewer is not None: tf.render_pose(target_pose.copy(), self._raw_env.viewer) curr_pose = curr_grp_poses[arm].copy() err_pose = curr_pose - target_pose err_pos = np.linalg.norm(err_pose[:3]) pos_errors.append(err_pos) controller_k = 2.0 err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:]) target_q = self._raw_env.mocap_ik(-err_pose, arm) err_q = curr_q - target_q u_masked[:] = self._controller(err_q, prev_err, controller_k) self._phase_steps += 1 if self._phase == 0 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.1: self._phase = 1 self._phase_steps = 0 elif self._phase == 1 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.1: self._phase = 2 self._phase_steps = 0 elif self._phase == 2: if self._phase_steps > 30: self._phase = 3 self._phase_steps = 0 u = np.clip(u, self._env.action_space.low, self._env.action_space.high) return u