def get_tf(world_to_b: np.ndarray, world_to_a: np.ndarray) -> np.ndarray: """Returns a_to_b pose""" pos_tf = world_to_b[..., :3] - world_to_a[..., :3] q = rotations.quat_mul(rotations.quat_identity(), rotations.quat_conjugate(world_to_a[..., 3:])) pos_tf = rotations.quat_rot_vec(q, pos_tf) quat_tf = rotations.quat_mul(world_to_b[..., 3:], rotations.quat_conjugate(world_to_a[..., 3:])) return np.concatenate([pos_tf, quat_tf], axis=-1)
def _goal_distance(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_pos = np.zeros_like(goal_a[..., 0]) d_rot = np.zeros_like(goal_b[..., 0]) if self.target_position != 'ignore': delta_pos = goal_a[..., :3] - goal_b[..., :3] d_pos = np.linalg.norm(delta_pos, axis=-1) if self.target_rotation != 'ignore': quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if self.ignore_z_target_rotation: # Special case: We want to ignore the Z component of the rotation. # This code here assumes Euler angles with xyz convention. We first transform # to euler, then set the Z component to be equal between the two, and finally # transform back into quaternions. euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff assert d_pos.shape == d_rot.shape return d_pos, d_rot
def _goal_rot_distance(goal_a, goal_b): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_rot = np.zeros_like(goal_b[..., 0]) quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff return d_rot
def _goal_distance(goal_a, goal_b, ignore_target_rotation): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 delta_pos = goal_a[..., :3] - goal_b[..., :3] d_pos = np.linalg.norm(delta_pos, axis=-1) d_rot = np.zeros_like(goal_b[..., 0]) if not ignore_target_rotation: quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff assert d_pos.shape == d_rot.shape return d_pos, d_rot
def _goal_distance_single(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_rot = np.zeros_like(goal_b[..., 0]) d_pos = np.linalg.norm(goal_a[..., :3] - goal_b[..., :3], axis=-1) if self.target_rotation != 'ignore': quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if self.ignore_z_target_rotation: euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff return d_pos, d_rot
def generate_next_action(self, curr_state, end_goal, imagination, env, num_acs=5000, noise=0.0, average=True): if average == False: num_acs = 3 if env.name.startswith("fetch_push") or env.name.startswith( "fetch_pick"): state = env.save_state() quaternions = np.tile(state.qpos[-4:], (len(self.quaternion_invariants), 1)) quaternion_rot = quat_mul(quaternions, self.quaternion_invariants) euler_angles = quat2euler(quaternion_rot) scores = np.linalg.norm(euler_angles[:, :2], axis=-1) + np.abs( euler_angles[:, -1] - np.tile(self.prev_angle[-1], len(euler_angles))) ind = np.argmin(scores) self.prev_angle = euler_angles[ind, :] state.qpos[-4:] = quaternion_rot[ind, :] state.qvel[-3:] = quat_rot_vec( quat_conjugate(self.quaternion_invariants[ind]), state.qvel[-3:]) env.restore_state(state) curr_state = env._get_obs()["observation"] init_states = np.tile(curr_state, (num_acs, 1)) init_states += noise * np.random.randn(*init_states.shape) _, gen_actions = imagination.test_traj_rand_gan(init_states, np.tile( end_goal, (num_acs, 1)), num_steps=1) if average: return np.mean(gen_actions[:, 0, :], axis=0), 1 else: return gen_actions[0, 0, :], 1
def _solve_qp_ik_pos(current_pose, target_pose, jac, joint_pos, joint_lims=None, duration=None, margin=0.2): pos_delta = target_pose[:3] - current_pose[:3] q_diff = rotations.quat_mul(target_pose[3:], rotations.quat_conjugate(current_pose[3:])) ang_delta = rotations.quat2euler(q_diff) vel = np.r_[pos_delta, ang_delta * 2.0] jac += np.random.uniform(size=jac.shape) * 1e-5 qvel, optimal = _solve_qp_ik_vel(vel, jac, joint_pos, joint_lims, duration, margin) qvel = np.clip(qvel, -1.0, 1.0) if not optimal: qvel *= 0.1 new_q = joint_pos + qvel return new_q
def _goal_distance(self, goal_a, goal_b): """ return: position distance and angle difference. """ assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_pos = np.zeros_like(goal_a[..., 0]) d_rot = np.zeros_like(goal_b[..., 0]) if self.target_position != 'ignore': # delta_pos = goal_a[..., :3] - goal_b[..., :3] delta_pos = goal_a[..., :2] - goal_b[..., :2] # ignore the z distance d_pos = np.linalg.norm(delta_pos, axis=-1) if self.target_rotation != 'ignore': quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] # Subtract quaternions and extract angle between them quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff assert d_pos.shape == d_rot.shape return d_pos, d_rot
def rotation_goal_distance(goal_a, goal_b, ignore_z_target_rotation = False): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_rot = np.zeros_like(goal_b[..., 0]) quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if ignore_z_target_rotation: # Special case: We want to ignore the Z component of the rotation. # This code here assumes Euler angles with xyz convention. We first transform # to euler, then set the Z component to be equal between the two, and finally # transform back into quaternions. euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff return d_rot
def generate_next_action(self, curr_state, end_goal, imagination, env, num_acs=50, max_steps=50, num_copies=100, num_reps=5, num_iterations=2, alpha=1.0, osm_frac=0.0, tol=0.05, return_average=True, noise=0.1): if env.name.startswith("fetch_push") or env.name.startswith( "fetch_pick"): state = env.save_state() quaternions = np.tile(state.qpos[-4:], (len(self.quaternion_invariants), 1)) quaternion_rot = quat_mul(quaternions, self.quaternion_invariants) euler_angles = quat2euler(quaternion_rot) scores = np.linalg.norm(euler_angles[:, :2], axis=-1) + np.abs( euler_angles[:, -1] - np.tile(self.prev_angle[-1], len(euler_angles))) ind = np.argmin(scores) self.prev_angle = euler_angles[ind, :] state.qpos[-4:] = quaternion_rot[ind, :] state.qvel[-3:] = quat_rot_vec( quat_conjugate(self.quaternion_invariants[ind]), state.qvel[-3:]) env.restore_state(state) curr_state = env._get_obs()["observation"] gen_states, gen_actions = imagination.test_traj_rand_gan( np.tile(curr_state, (num_acs, 1)), np.tile(end_goal, (num_acs, 1)), num_steps=1, frac_replaced_with_osm_pred=0.0) curr_states = gen_states[:, 0, :] curr_init_actions = gen_actions[:, 0, :] start_states = np.repeat(curr_states, num_copies, axis=0) end_goals = np.tile(end_goal, (start_states.shape[0], 1)) num_osms = len(imagination.one_step_model.networks) best_action = None for it in range(num_iterations): inds = np.array_split(np.random.permutation(start_states.shape[0]), num_osms) rep_acs = np.repeat(curr_init_actions, num_copies, axis=0) success_fracs = [] for j in range(num_reps): next_states = np.zeros_like(start_states) for i in range(num_osms): next_states[inds[i], ...] = imagination.one_step_model.predict( start_states[inds[i], ...], rep_acs[inds[i], ...], osm_ind=i, normed_input=False) gen_states, _ = imagination.test_traj_rand_gan( next_states, end_goals, num_steps=max_steps - 1, frac_replaced_with_osm_pred=osm_frac) success_mat = env.batch_goal_achieved(gen_states, end_goals[:, np.newaxis, :], tol=tol) success_fracs.append( np.sum(success_mat, axis=-1).reshape(num_acs, num_copies) / max_steps) success_fracs = np.concatenate(success_fracs, axis=1) pseudo_rewards = np.mean(success_fracs, axis=-1) succ_frac = pseudo_rewards.copy() if return_average: #pseudo_rewards = np.exp(alpha * np.clip((pseudo_rewards - pseudo_rewards.mean()) / (pseudo_rewards.std() + 1e-4), -2.0, 2.0)) pseudo_rewards = (pseudo_rewards - pseudo_rewards.mean()) / ( pseudo_rewards.std() + 1e-4) pseudo_rewards = np.exp( alpha * (pseudo_rewards - pseudo_rewards.max())) best_action = np.sum( curr_init_actions * pseudo_rewards[:, np.newaxis], axis=0) / np.sum(pseudo_rewards) else: best_action = curr_init_actions[np.argmax(pseudo_rewards), :] curr_init_actions = np.tile(best_action, (num_acs, 1)) curr_init_actions[1:] = np.clip( curr_init_actions[1:] + noise * np.random.randn(*curr_init_actions[1:].shape), -1.0, 1.0) return best_action, 1
def quat_angle_diff(quat_a, quat_b): quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) return np.abs(rotations.normalize_angles(angle_diff))