def _goal_distance(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_pos = np.zeros_like(goal_a[..., 0]) d_rot = np.zeros_like(goal_b[..., 0]) if self.target_position != 'ignore': delta_pos = goal_a[..., :3] - goal_b[..., :3] d_pos = np.linalg.norm(delta_pos, axis=-1) if self.target_rotation != 'ignore': quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if self.ignore_z_target_rotation: # Special case: We want to ignore the Z component of the rotation. # This code here assumes Euler angles with xyz convention. We first transform # to euler, then set the Z component to be equal between the two, and finally # transform back into quaternions. euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff assert d_pos.shape == d_rot.shape return d_pos, d_rot
def _sample_goal(self): home_path = os.getenv("HOME") goal_path = os.path.join(*[ home_path, "DRL_AI4RoMoCo", "code", "environment", "experiment_configs", "goal_ur10_simpheg_conf2.json"]) with open(goal_path, encoding='utf-8') as file: goal = json.load(file) xpos = goal['xpos'] + self.offset['body_pos_offset'] xquat = goal['xquat'] rpy = normalize_rad(rotations.quat2euler(xquat)+self.offset['body_euler_offset']) return numpy.concatenate([xpos, rpy]).copy()
def _get_obs(self): # positions force = self.sim.data.sensordata x_pos = self.sim.data.get_body_xpos("gripper_dummy_heg") x_quat = self.sim.data.get_body_xquat("gripper_dummy_heg") rpy = rotations.quat2euler(x_quat) obs = np.concatenate( [x_pos - self.goal[:3], rpy - self.goal[3:], force]) return obs '''
def _sample_goal(self): home_path = os.getenv("HOME") goal_path = os.path.join(*[ home_path, "DRL_SetBot-RearVentilation", "experiment_configs", "goal_ur10_simpheg_conf2.json" ]) with open(goal_path, encoding='utf-8') as file: goal = json.load(file) xpos = goal['xpos'] xquat = goal['xquat'] rpy = normalize_rad(rotations.quat2euler(xquat)) return numpy.concatenate([xpos, rpy]).copy()
def rotation_goal_distance(goal_a, goal_b, ignore_z_target_rotation = False): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_rot = np.zeros_like(goal_b[..., 0]) quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if ignore_z_target_rotation: # Special case: We want to ignore the Z component of the rotation. # This code here assumes Euler angles with xyz convention. We first transform # to euler, then set the Z component to be equal between the two, and finally # transform back into quaternions. euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff return d_rot
def generate_next_action(self, curr_state, end_goal, imagination, env, num_acs=5000, noise=0.0, average=True): if average == False: num_acs = 3 if env.name.startswith("fetch_push") or env.name.startswith( "fetch_pick"): state = env.save_state() quaternions = np.tile(state.qpos[-4:], (len(self.quaternion_invariants), 1)) quaternion_rot = quat_mul(quaternions, self.quaternion_invariants) euler_angles = quat2euler(quaternion_rot) scores = np.linalg.norm(euler_angles[:, :2], axis=-1) + np.abs( euler_angles[:, -1] - np.tile(self.prev_angle[-1], len(euler_angles))) ind = np.argmin(scores) self.prev_angle = euler_angles[ind, :] state.qpos[-4:] = quaternion_rot[ind, :] state.qvel[-3:] = quat_rot_vec( quat_conjugate(self.quaternion_invariants[ind]), state.qvel[-3:]) env.restore_state(state) curr_state = env._get_obs()["observation"] init_states = np.tile(curr_state, (num_acs, 1)) init_states += noise * np.random.randn(*init_states.shape) _, gen_actions = imagination.test_traj_rand_gan(init_states, np.tile( end_goal, (num_acs, 1)), num_steps=1) if average: return np.mean(gen_actions[:, 0, :], axis=0), 1 else: return gen_actions[0, 0, :], 1
def _solve_qp_ik_pos(current_pose, target_pose, jac, joint_pos, joint_lims=None, duration=None, margin=0.2): pos_delta = target_pose[:3] - current_pose[:3] q_diff = rotations.quat_mul(target_pose[3:], rotations.quat_conjugate(current_pose[3:])) ang_delta = rotations.quat2euler(q_diff) vel = np.r_[pos_delta, ang_delta * 2.0] jac += np.random.uniform(size=jac.shape) * 1e-5 qvel, optimal = _solve_qp_ik_vel(vel, jac, joint_pos, joint_lims, duration, margin) qvel = np.clip(qvel, -1.0, 1.0) if not optimal: qvel *= 0.1 new_q = joint_pos + qvel return new_q
def get_ori_diff(self): return self.get_finger_ori() - quat2euler(self.sim.data.get_body_xquat("knob_link"))
def _get_achieved_goal(self): """ Cube center position (3), cube center rotation (4), layer angle (1) total 8. """ # Cube center position and rotation (quaternion). cube_qpos = self.sim.data.get_site_xpos('rubik:cube_center') cube_rot = rotations.mat2quat( self.sim.data.get_site_xmat('rubik:cube_center')) # Cube ball joint angles cube_joint_pos, cube_joint_vel = shadow_get_obs(self.sim, name='rubik') assert cube_joint_pos.shape == (8, ) if self.rot_layer in ["up", "up'"]: box_0_0_1_euler = rotations.quat2euler( np.round(cube_joint_pos[4], 2)) box_1_0_1_euler = rotations.quat2euler( np.round(cube_joint_pos[5], 2)) box_0_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[6], 2)) box_1_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[7], 2)) for data in [ box_0_0_1_euler, box_1_0_1_euler, box_0_1_1_euler, box_1_1_1_euler ]: data[data < -3] += 6.28 U_x_angle = [ box_1_1_1_euler[0], box_1_0_1_euler[0], box_0_1_1_euler[0], box_0_0_1_euler[0] ] U_y_angle = [ box_1_1_1_euler[1], box_1_0_1_euler[1], box_0_1_1_euler[1], box_0_0_1_euler[1] ] U_z_angle = [ box_1_1_1_euler[2], box_1_0_1_euler[2], box_0_1_1_euler[2], box_0_0_1_euler[2] ] U_bool = ((np.max(U_x_angle) - np.min(U_x_angle)) < 0.1) & \ ((np.max(U_y_angle) - np.min(U_y_angle)) < 0.1) & \ ((np.max(U_z_angle) - np.min(U_z_angle)) < 0.1) if U_bool: angle = [U_z_angle[0]] else: angle = [0] elif self.rot_layer in ["right", "right'"]: box_0_1_0_euler = rotations.quat2euler( np.round(cube_joint_pos[2], 2)) box_1_1_0_euler = rotations.quat2euler( np.round(cube_joint_pos[3], 2)) box_0_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[6], 2)) box_1_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[7], 2)) for data in [ box_0_1_0_euler, box_1_1_0_euler, box_0_1_1_euler, box_1_1_1_euler ]: data[data < -3] += 6.28 R_x_angle = [ box_1_1_1_euler[0], box_1_1_0_euler[0], box_0_1_1_euler[0], box_0_1_0_euler[0] ] R_y_angle = [ box_1_1_1_euler[1], box_1_1_0_euler[1], box_0_1_1_euler[1], box_0_1_0_euler[1] ] R_z_angle = [ box_1_1_1_euler[2], box_1_1_0_euler[2], box_0_1_1_euler[2], box_0_1_0_euler[2] ] R_bool = ((np.max(R_x_angle) - np.min(R_x_angle)) < 0.1) & \ ((np.max(R_y_angle) - np.min(R_y_angle)) < 0.1) & \ ((np.max(R_z_angle) - np.min(R_z_angle)) < 0.1) if R_bool: angle = [R_y_angle[0]] else: angle = [0] elif self.rot_layer in ["front", "front'"]: box_1_0_0_euler = rotations.quat2euler( np.round(cube_joint_pos[1], 2)) box_1_1_0_euler = rotations.quat2euler( np.round(cube_joint_pos[3], 2)) box_1_0_1_euler = rotations.quat2euler( np.round(cube_joint_pos[5], 2)) box_1_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[7], 2)) for data in [ box_1_0_0_euler, box_1_1_0_euler, box_1_0_1_euler, box_1_1_1_euler ]: data[data < -3] += 6.28 F_x_angle = [ box_1_0_1_euler[0], box_1_0_0_euler[0], box_1_1_1_euler[0], box_1_1_0_euler[0] ] F_y_angle = [ box_1_0_1_euler[1], box_1_0_0_euler[1], box_1_1_1_euler[1], box_1_1_0_euler[1] ] F_z_angle = [ box_1_0_1_euler[2], box_1_0_0_euler[2], box_1_1_1_euler[2], box_1_1_0_euler[2] ] F_bool = ((np.max(F_x_angle) - np.min(F_x_angle)) < 0.1) & \ ((np.max(F_y_angle) - np.min(F_y_angle)) < 0.1) & \ ((np.max(F_z_angle) - np.min(F_z_angle)) < 0.1) if F_bool: angle = [F_x_angle[0]] else: angle = [0] cube_qpos = np.concatenate([cube_qpos, cube_rot, angle]) assert cube_qpos.shape == (8, ) return cube_qpos
def get_finger_ori(self): finger_rel_quat = self.sim.data.get_body_xquat("rightfinger") hand_quat = self.sim.data.get_body_xquat("right_hand") finger_world_quat = quat_mul(finger_rel_quat, hand_quat) # TODO: which one at first? return quat2euler(finger_world_quat)
def generate_next_action(self, curr_state, end_goal, imagination, env, num_acs=50, max_steps=50, num_copies=100, num_reps=5, num_iterations=2, alpha=1.0, osm_frac=0.0, tol=0.05, return_average=True, noise=0.1): if env.name.startswith("fetch_push") or env.name.startswith( "fetch_pick"): state = env.save_state() quaternions = np.tile(state.qpos[-4:], (len(self.quaternion_invariants), 1)) quaternion_rot = quat_mul(quaternions, self.quaternion_invariants) euler_angles = quat2euler(quaternion_rot) scores = np.linalg.norm(euler_angles[:, :2], axis=-1) + np.abs( euler_angles[:, -1] - np.tile(self.prev_angle[-1], len(euler_angles))) ind = np.argmin(scores) self.prev_angle = euler_angles[ind, :] state.qpos[-4:] = quaternion_rot[ind, :] state.qvel[-3:] = quat_rot_vec( quat_conjugate(self.quaternion_invariants[ind]), state.qvel[-3:]) env.restore_state(state) curr_state = env._get_obs()["observation"] gen_states, gen_actions = imagination.test_traj_rand_gan( np.tile(curr_state, (num_acs, 1)), np.tile(end_goal, (num_acs, 1)), num_steps=1, frac_replaced_with_osm_pred=0.0) curr_states = gen_states[:, 0, :] curr_init_actions = gen_actions[:, 0, :] start_states = np.repeat(curr_states, num_copies, axis=0) end_goals = np.tile(end_goal, (start_states.shape[0], 1)) num_osms = len(imagination.one_step_model.networks) best_action = None for it in range(num_iterations): inds = np.array_split(np.random.permutation(start_states.shape[0]), num_osms) rep_acs = np.repeat(curr_init_actions, num_copies, axis=0) success_fracs = [] for j in range(num_reps): next_states = np.zeros_like(start_states) for i in range(num_osms): next_states[inds[i], ...] = imagination.one_step_model.predict( start_states[inds[i], ...], rep_acs[inds[i], ...], osm_ind=i, normed_input=False) gen_states, _ = imagination.test_traj_rand_gan( next_states, end_goals, num_steps=max_steps - 1, frac_replaced_with_osm_pred=osm_frac) success_mat = env.batch_goal_achieved(gen_states, end_goals[:, np.newaxis, :], tol=tol) success_fracs.append( np.sum(success_mat, axis=-1).reshape(num_acs, num_copies) / max_steps) success_fracs = np.concatenate(success_fracs, axis=1) pseudo_rewards = np.mean(success_fracs, axis=-1) succ_frac = pseudo_rewards.copy() if return_average: #pseudo_rewards = np.exp(alpha * np.clip((pseudo_rewards - pseudo_rewards.mean()) / (pseudo_rewards.std() + 1e-4), -2.0, 2.0)) pseudo_rewards = (pseudo_rewards - pseudo_rewards.mean()) / ( pseudo_rewards.std() + 1e-4) pseudo_rewards = np.exp( alpha * (pseudo_rewards - pseudo_rewards.max())) best_action = np.sum( curr_init_actions * pseudo_rewards[:, np.newaxis], axis=0) / np.sum(pseudo_rewards) else: best_action = curr_init_actions[np.argmax(pseudo_rewards), :] curr_init_actions = np.tile(best_action, (num_acs, 1)) curr_init_actions[1:] = np.clip( curr_init_actions[1:] + noise * np.random.randn(*curr_init_actions[1:].shape), -1.0, 1.0) return best_action, 1
def get_finger_ori(self): return quat2euler(self.sim.data.get_body_xquat("robotwrist_rolllink"))
def randomize_ur10_xml(var_mass=0.0, var_damp=0.0, var_fr=0.0, var_grav_x_y=0.0, var_grav_z=0.0, var_body_pos=0.00, var_body_rot=0.0, worker_id=1): # parameters: # # var_mass : mass variance relative to actual mass # # var_damp : damping variance relative to the standard value # var_fr : friction variance relative to the standard value # # var_grav_x_y : gravity variance in x- and y-direction (absolute) # var_grav_z : gravity variance in z-direction (absolute) # # var_body_pos : variance of body position in m per direction # var_body_rot : variance of body rotation in degree per axis # ______________________________________________________________________ # # return : offset of car_body to consider for goal position # # ______________________________________________________________________ main_xml_temp = os.path.join(*[ur_path, "ur10_assembly_setup_rand_temp_{}.xml".format(worker_id)]) robot_body_rel = os.path.join("ur10_heg", "ur10_body_heg_rand_temp_{}.xml".format(worker_id)) car_body_rel = os.path.join("ur10_heg", "car_body_rand_temp_{}.xml".format(worker_id)) defaults_rel = os.path.join("ur10_heg", "ur10_default_rand_temp_{}.xml".format(worker_id)) robot_body_xml_temp = os.path.join(ur_path, robot_body_rel) car_body_xml_temp = os.path.join(ur_path, car_body_rel) defaults_xml_temp = os.path.join(ur_path, defaults_rel) # load robot body xml and randomize body masses tree = ET.parse(robot_body_xml) inertials = [] for elem in tree.iter(): if elem.tag == 'inertial': # find inertial elements in xml inertials.append(elem) for inertial in inertials: # change masses inertial.attrib['mass'] = str((np.random.uniform(-var_mass, +var_mass) + 1) * float(inertial.attrib['mass'])) tree.write(robot_body_xml_temp) # save new xml in temp file to be loaded in gym env # load defaults xml and randomize joint damping and surface friction #damping_arm_rand = damping_arm * (1 + np.random.uniform(-var_damp, +var_damp)) #damping_wrist_rand = damping_wrist * (1 + np.random.uniform(-var_damp, +var_damp)) damping_pan_rand = damping_pan * (1 + np.random.uniform(-var_damp, +var_damp)) damping_lift_rand = damping_lift * (1 + np.random.uniform(-var_damp, +var_damp)) damping_elbow_rand = damping_elbow * (1 + np.random.uniform(-var_damp, +var_damp)) damping_wrist1_rand = damping_wrist1 * (1 + np.random.uniform(-var_damp, +var_damp)) damping_wrist2_rand = damping_wrist2 * (1 + np.random.uniform(-var_damp, +var_damp)) damping_wrist3_rand = damping_wrist3 * (1 + np.random.uniform(-var_damp, +var_damp)) friction_body_rand = friction_body * (1 + np.random.uniform(-var_fr, +var_fr, (3,))) friction_heg_rand = friction_heg * (1 + np.random.uniform(-var_fr, +var_fr, (3,))) friction_body_string = "{} {} {}".format(*friction_body_rand) friction_heg_string = "{} {} {}".format(*friction_heg_rand) tree = ET.parse(defaults_xml) for default in tree.findall('default'): if default.attrib['class'] == 'ur10:pan': default.findall('joint')[0].attrib['damping'] = str(damping_pan_rand) if default.attrib['class'] == 'ur10:lift': default.findall('joint')[0].attrib['damping'] = str(damping_lift_rand) if default.attrib['class'] == 'ur10:elbow': default.findall('joint')[0].attrib['damping'] = str(damping_elbow_rand) if default.attrib['class'] == 'ur10:wrist1': default.findall('joint')[0].attrib['damping'] = str(damping_wrist1_rand) if default.attrib['class'] == 'ur10:wrist2': default.findall('joint')[0].attrib['damping'] = str(damping_wrist2_rand) if default.attrib['class'] == 'ur10:wrist3': default.findall('joint')[0].attrib['damping'] = str(damping_wrist3_rand) if default.attrib['class'] == 'body': default.findall('geom')[0].attrib['friction'] = friction_body_string if default.attrib['class'] == 'heg': default.findall('geom')[0].attrib['friction'] = friction_heg_string tree.write(defaults_xml_temp) # load main xml and randomize gravity, also adapt includes to worker_id grav_x = np.random.uniform(-var_grav_x_y, var_grav_x_y) grav_y = np.random.uniform(-var_grav_x_y, var_grav_x_y) grav_z = np.random.uniform(-var_grav_z, var_grav_z) - 9.81 grav_string = "{} {} {}".format(grav_x, grav_y, grav_z) tree = ET.parse(main_xml) tree.findall("option")[0].attrib['gravity'] = grav_string for include in tree.findall("default")[0].iter("include"): include.attrib['file'] = defaults_rel world_temp_files = [car_body_rel, robot_body_rel] for i, include in enumerate(tree.findall("worldbody")[0].iter("include")): include.attrib['file'] = world_temp_files[i] tree.write(main_xml_temp) # load car body xml and change position (offset is returned by this function) body_euler = normalize_rad(rotations.quat2euler(body_quat)) body_pos_offset = np.random.uniform(-var_body_pos, var_body_pos, (3,)) body_euler_offset = np.random.uniform(-var_body_rot/180*np.pi, var_body_rot/180*np.pi, (3,)) body_pos_rand = body_pos + body_pos_offset body_euler_rand = body_euler + body_euler_offset body_quat_rand = rotations.euler2quat(body_euler_rand) body_pos_rand_string = "{} {} {}".format(*body_pos_rand) body_quat_rand_string = "{} {} {} {}".format(*body_quat_rand) tree = ET.parse(car_body_xml) for body in tree.findall('body'): if body.attrib['name'] == 'body_link': body.attrib['pos'] = body_pos_rand_string body.attrib['quat'] = body_quat_rand_string tree.write(car_body_xml_temp) offset = { 'body_pos_offset': body_pos_offset, 'body_euler_offset': body_euler_offset } return offset
def quat_to_euler(self, quat): return rotations.quat2euler(quat)
def _get_obs(self): # positions grip_pos = self.sim.data.get_site_xpos('robot0:grip') dt = self.sim.nsubsteps * self.sim.model.opt.timestep grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt robot_qpos, robot_qvel = utils.robot_get_obs(self.sim) if self.has_object: object_pos = self.sim.data.get_site_xpos('object0') # rotations object_rot = rotations.mat2euler( self.sim.data.get_site_xmat('object0')) # velocities object_velp = self.sim.data.get_site_xvelp('object0') * dt object_velr = self.sim.data.get_site_xvelr('object0') * dt # gripper state object_rel_pos = object_pos - grip_pos object_velp -= grip_velp else: object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros( 0) gripper_state = robot_qpos[-2:] gripper_vel = robot_qvel[ -2:] * dt # change to a scalar if the gripper is made symmetric if not self.has_object: achieved_goal = grip_pos.copy() else: achieved_goal = np.squeeze(object_pos.copy()) obs = np.concatenate([ grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(), object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel, ]) # obstacle state if self.obstacle_added: # added by ray 4 if self.first_time_in_loop: # First time goes in to init network dimention. self.first_time_in_loop = False for i in range(self.obstacle_num): obstacle_id = "obstacle_" + str(i) self.used_obstacle_bid_list.append( self.model.body_name2id(obstacle_id)) all_obstacles_states = np.array([]) for obstacle_bid in self.used_obstacle_bid_list: obstacle_pos = self.model.body_pos[obstacle_bid].ravel().copy() obstacle_quat = self.model.body_quat[obstacle_bid].ravel( ).copy() obstacle_rot = rotations.quat2euler( obstacle_quat).ravel().copy() temp_all_obstacles_states = np.concatenate( [obstacle_pos, obstacle_rot]) all_obstacles_states = np.concatenate( [all_obstacles_states, temp_all_obstacles_states]) obs = np.concatenate([ grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(), object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel, all_obstacles_states.ravel(), ]) return { 'observation': obs.copy(), 'achieved_goal': achieved_goal.copy(), 'desired_goal': self.goal.copy(), }