Esempio n. 1
0
def get_tf(world_to_b: np.ndarray, world_to_a: np.ndarray) -> np.ndarray:
    """Returns a_to_b pose"""
    pos_tf = world_to_b[..., :3] - world_to_a[..., :3]
    q = rotations.quat_mul(rotations.quat_identity(),
                           rotations.quat_conjugate(world_to_a[..., 3:]))
    pos_tf = rotations.quat_rot_vec(q, pos_tf)
    quat_tf = rotations.quat_mul(world_to_b[..., 3:],
                                 rotations.quat_conjugate(world_to_a[..., 3:]))
    return np.concatenate([pos_tf, quat_tf], axis=-1)
    def _goal_distance(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_pos = np.zeros_like(goal_a[..., 0])
        d_rot = np.zeros_like(goal_b[..., 0])
        if self.target_position != 'ignore':
            delta_pos = goal_a[..., :3] - goal_b[..., :3]
            d_pos = np.linalg.norm(delta_pos, axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

            if self.ignore_z_target_rotation:
                # Special case: We want to ignore the Z component of the rotation.
                # This code here assumes Euler angles with xyz convention. We first transform
                # to euler, then set the Z component to be equal between the two, and finally
                # transform back into quaternions.
                euler_a = rotations.quat2euler(quat_a)
                euler_b = rotations.quat2euler(quat_b)
                euler_a[2] = euler_b[2]
                quat_a = rotations.euler2quat(euler_a)

            # Subtract quaternions and extract angle between them.
            quat_diff = rotations.quat_mul(quat_a,
                                           rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        assert d_pos.shape == d_rot.shape
        return d_pos, d_rot
Esempio n. 3
0
    def _goal_distance(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_pos = np.zeros_like(goal_a[..., 0])
        d_rot = np.zeros_like(goal_b[..., 0])
        if self.target_position != 'ignore':
            delta_pos = goal_a[..., :3] - goal_b[..., :3]
            d_pos = np.linalg.norm(delta_pos, axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

            if self.ignore_z_target_rotation:
                # Special case: We want to ignore the Z component of the rotation.
                # This code here assumes Euler angles with xyz convention. We first transform
                # to euler, then set the Z component to be equal between the two, and finally
                # transform back into quaternions.
                euler_a = rotations.quat2euler(quat_a)
                euler_b = rotations.quat2euler(quat_b)
                euler_a[2] = euler_b[2]
                quat_a = rotations.euler2quat(euler_a)

            # Subtract quaternions and extract angle between them.
            quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        assert d_pos.shape == d_rot.shape
        return d_pos, d_rot
Esempio n. 4
0
File: her.py Progetto: ZiyeHu/myCHER
 def _goal_rot_distance(goal_a, goal_b):
     assert goal_a.shape == goal_b.shape
     assert goal_a.shape[-1] == 7
     d_rot = np.zeros_like(goal_b[..., 0])
     quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
     # Subtract quaternions and extract angle between them.
     quat_diff = rotations.quat_mul(quat_a,
                                    rotations.quat_conjugate(quat_b))
     angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
     d_rot = angle_diff
     return d_rot
Esempio n. 5
0
def _goal_distance(goal_a, goal_b, ignore_target_rotation):
    assert goal_a.shape == goal_b.shape
    assert goal_a.shape[-1] == 7

    delta_pos = goal_a[..., :3] - goal_b[..., :3]
    d_pos = np.linalg.norm(delta_pos, axis=-1)
    d_rot = np.zeros_like(goal_b[..., 0])

    if not ignore_target_rotation:
        quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
        # Subtract quaternions and extract angle between them.
        quat_diff = rotations.quat_mul(quat_a,
                                       rotations.quat_conjugate(quat_b))
        angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
        d_rot = angle_diff
    assert d_pos.shape == d_rot.shape
    return d_pos, d_rot
Esempio n. 6
0
    def _goal_distance_single(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_rot = np.zeros_like(goal_b[..., 0])
        d_pos = np.linalg.norm(goal_a[..., :3] - goal_b[..., :3], axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
            if self.ignore_z_target_rotation:
                euler_a = rotations.quat2euler(quat_a)
                euler_b = rotations.quat2euler(quat_b)
                euler_a[2] = euler_b[2]
                quat_a = rotations.euler2quat(euler_a)
            quat_diff = rotations.quat_mul(quat_a,
                                           rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        return d_pos, d_rot
Esempio n. 7
0
    def generate_next_action(self,
                             curr_state,
                             end_goal,
                             imagination,
                             env,
                             num_acs=5000,
                             noise=0.0,
                             average=True):

        if average == False:
            num_acs = 3

        if env.name.startswith("fetch_push") or env.name.startswith(
                "fetch_pick"):
            state = env.save_state()
            quaternions = np.tile(state.qpos[-4:],
                                  (len(self.quaternion_invariants), 1))
            quaternion_rot = quat_mul(quaternions, self.quaternion_invariants)
            euler_angles = quat2euler(quaternion_rot)
            scores = np.linalg.norm(euler_angles[:, :2], axis=-1) + np.abs(
                euler_angles[:, -1] -
                np.tile(self.prev_angle[-1], len(euler_angles)))
            ind = np.argmin(scores)
            self.prev_angle = euler_angles[ind, :]
            state.qpos[-4:] = quaternion_rot[ind, :]
            state.qvel[-3:] = quat_rot_vec(
                quat_conjugate(self.quaternion_invariants[ind]),
                state.qvel[-3:])
            env.restore_state(state)
            curr_state = env._get_obs()["observation"]

        init_states = np.tile(curr_state, (num_acs, 1))
        init_states += noise * np.random.randn(*init_states.shape)

        _, gen_actions = imagination.test_traj_rand_gan(init_states,
                                                        np.tile(
                                                            end_goal,
                                                            (num_acs, 1)),
                                                        num_steps=1)
        if average:
            return np.mean(gen_actions[:, 0, :], axis=0), 1
        else:
            return gen_actions[0, 0, :], 1
Esempio n. 8
0
def _solve_qp_ik_pos(current_pose,
                     target_pose,
                     jac,
                     joint_pos,
                     joint_lims=None,
                     duration=None,
                     margin=0.2):
    pos_delta = target_pose[:3] - current_pose[:3]
    q_diff = rotations.quat_mul(target_pose[3:],
                                rotations.quat_conjugate(current_pose[3:]))
    ang_delta = rotations.quat2euler(q_diff)
    vel = np.r_[pos_delta, ang_delta * 2.0]
    jac += np.random.uniform(size=jac.shape) * 1e-5
    qvel, optimal = _solve_qp_ik_vel(vel, jac, joint_pos, joint_lims, duration,
                                     margin)
    qvel = np.clip(qvel, -1.0, 1.0)
    if not optimal:
        qvel *= 0.1
    new_q = joint_pos + qvel
    return new_q
    def _goal_distance(self, goal_a, goal_b):
        """
        return: position distance and angle difference.
        """
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_pos = np.zeros_like(goal_a[..., 0])
        d_rot = np.zeros_like(goal_b[..., 0])
        if self.target_position != 'ignore':
            # delta_pos = goal_a[..., :3] - goal_b[..., :3]
            delta_pos = goal_a[..., :2] - goal_b[..., :2]  # ignore the z distance
            d_pos = np.linalg.norm(delta_pos, axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
            # Subtract quaternions and extract angle between them
            quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        assert d_pos.shape == d_rot.shape
        return d_pos, d_rot
Esempio n. 10
0
def rotation_goal_distance(goal_a, goal_b, ignore_z_target_rotation = False):
    assert goal_a.shape == goal_b.shape
    assert goal_a.shape[-1] == 7
    d_rot = np.zeros_like(goal_b[..., 0])
    quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

    if ignore_z_target_rotation:
        # Special case: We want to ignore the Z component of the rotation.
        # This code here assumes Euler angles with xyz convention. We first transform
        # to euler, then set the Z component to be equal between the two, and finally
        # transform back into quaternions.
        euler_a = rotations.quat2euler(quat_a)
        euler_b = rotations.quat2euler(quat_b)
        euler_a[2] = euler_b[2]
        quat_a = rotations.euler2quat(euler_a)

    # Subtract quaternions and extract angle between them.
    quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
    angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
    d_rot = angle_diff

    return d_rot
Esempio n. 11
0
    def generate_next_action(self,
                             curr_state,
                             end_goal,
                             imagination,
                             env,
                             num_acs=50,
                             max_steps=50,
                             num_copies=100,
                             num_reps=5,
                             num_iterations=2,
                             alpha=1.0,
                             osm_frac=0.0,
                             tol=0.05,
                             return_average=True,
                             noise=0.1):
        if env.name.startswith("fetch_push") or env.name.startswith(
                "fetch_pick"):
            state = env.save_state()
            quaternions = np.tile(state.qpos[-4:],
                                  (len(self.quaternion_invariants), 1))
            quaternion_rot = quat_mul(quaternions, self.quaternion_invariants)
            euler_angles = quat2euler(quaternion_rot)
            scores = np.linalg.norm(euler_angles[:, :2], axis=-1) + np.abs(
                euler_angles[:, -1] -
                np.tile(self.prev_angle[-1], len(euler_angles)))
            ind = np.argmin(scores)
            self.prev_angle = euler_angles[ind, :]
            state.qpos[-4:] = quaternion_rot[ind, :]
            state.qvel[-3:] = quat_rot_vec(
                quat_conjugate(self.quaternion_invariants[ind]),
                state.qvel[-3:])
            env.restore_state(state)
            curr_state = env._get_obs()["observation"]

        gen_states, gen_actions = imagination.test_traj_rand_gan(
            np.tile(curr_state, (num_acs, 1)),
            np.tile(end_goal, (num_acs, 1)),
            num_steps=1,
            frac_replaced_with_osm_pred=0.0)
        curr_states = gen_states[:, 0, :]
        curr_init_actions = gen_actions[:, 0, :]
        start_states = np.repeat(curr_states, num_copies, axis=0)
        end_goals = np.tile(end_goal, (start_states.shape[0], 1))
        num_osms = len(imagination.one_step_model.networks)
        best_action = None

        for it in range(num_iterations):
            inds = np.array_split(np.random.permutation(start_states.shape[0]),
                                  num_osms)
            rep_acs = np.repeat(curr_init_actions, num_copies, axis=0)

            success_fracs = []
            for j in range(num_reps):
                next_states = np.zeros_like(start_states)
                for i in range(num_osms):
                    next_states[inds[i],
                                ...] = imagination.one_step_model.predict(
                                    start_states[inds[i], ...],
                                    rep_acs[inds[i], ...],
                                    osm_ind=i,
                                    normed_input=False)
                gen_states, _ = imagination.test_traj_rand_gan(
                    next_states,
                    end_goals,
                    num_steps=max_steps - 1,
                    frac_replaced_with_osm_pred=osm_frac)
                success_mat = env.batch_goal_achieved(gen_states,
                                                      end_goals[:,
                                                                np.newaxis, :],
                                                      tol=tol)
                success_fracs.append(
                    np.sum(success_mat, axis=-1).reshape(num_acs, num_copies) /
                    max_steps)
            success_fracs = np.concatenate(success_fracs, axis=1)
            pseudo_rewards = np.mean(success_fracs, axis=-1)
            succ_frac = pseudo_rewards.copy()

            if return_average:
                #pseudo_rewards = np.exp(alpha * np.clip((pseudo_rewards - pseudo_rewards.mean()) / (pseudo_rewards.std() + 1e-4), -2.0, 2.0))
                pseudo_rewards = (pseudo_rewards - pseudo_rewards.mean()) / (
                    pseudo_rewards.std() + 1e-4)
                pseudo_rewards = np.exp(
                    alpha * (pseudo_rewards - pseudo_rewards.max()))
                best_action = np.sum(
                    curr_init_actions * pseudo_rewards[:, np.newaxis],
                    axis=0) / np.sum(pseudo_rewards)
            else:
                best_action = curr_init_actions[np.argmax(pseudo_rewards), :]

            curr_init_actions = np.tile(best_action, (num_acs, 1))
            curr_init_actions[1:] = np.clip(
                curr_init_actions[1:] +
                noise * np.random.randn(*curr_init_actions[1:].shape), -1.0,
                1.0)

        return best_action, 1
Esempio n. 12
0
def quat_angle_diff(quat_a, quat_b):
    quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
    angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
    return np.abs(rotations.normalize_angles(angle_diff))