Ejemplo n.º 1
0
    def _goal_distance(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_pos = np.zeros_like(goal_a[..., 0])
        d_rot = np.zeros_like(goal_b[..., 0])
        if self.target_position != 'ignore':
            delta_pos = goal_a[..., :3] - goal_b[..., :3]
            d_pos = np.linalg.norm(delta_pos, axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

            if self.ignore_z_target_rotation:
                # Special case: We want to ignore the Z component of the rotation.
                # This code here assumes Euler angles with xyz convention. We first transform
                # to euler, then set the Z component to be equal between the two, and finally
                # transform back into quaternions.
                euler_a = rotations.quat2euler(quat_a)
                euler_b = rotations.quat2euler(quat_b)
                euler_a[2] = euler_b[2]
                quat_a = rotations.euler2quat(euler_a)

            # Subtract quaternions and extract angle between them.
            quat_diff = rotations.quat_mul(quat_a,
                                           rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        assert d_pos.shape == d_rot.shape
        return d_pos, d_rot
Ejemplo n.º 2
0
    def _sample_goal(self):
        # Select a goal for the object position.
        target_pos = None
        if self.target_position == 'random':
            assert self.target_position_range.shape == (3, 2)
            offset = self.np_random.uniform(self.target_position_range[:, 0],
                                            self.target_position_range[:, 1])
            assert offset.shape == (3, )
            target_pos = self.sim.data.get_joint_qpos(
                'object:joint')[:3] + offset
        elif self.target_position in ['ignore', 'fixed']:
            target_pos = self.sim.data.get_joint_qpos('object:joint')[:3]
        else:
            raise error.Error('Unknown target_position option "{}".'.format(
                self.target_position))
        assert target_pos is not None
        assert target_pos.shape == (3, )

        # Select a goal for the object rotation.
        target_quat = None
        if self.target_rotation == 'z':
            angle = self.np_random.uniform(-np.pi, np.pi)
            axis = np.array([0., 0., 1.])
            target_quat = quat_from_angle_and_axis(angle, axis)
        elif self.target_rotation == 'parallel':
            angle = self.np_random.uniform(-np.pi, np.pi)
            axis = np.array([0., 0., 1.])
            target_quat = quat_from_angle_and_axis(angle, axis)
            parallel_quat = self.parallel_quats[self.np_random.randint(
                len(self.parallel_quats))]
            target_quat = rotations.quat_mul(target_quat, parallel_quat)
        elif self.target_rotation == 'xyz':
            angle = self.np_random.uniform(-np.pi, np.pi)
            axis = self.np_random.uniform(-1., 1., size=3)
            target_quat = quat_from_angle_and_axis(angle, axis)
        elif self.target_rotation in ['ignore', 'fixed']:
            target_quat = self.sim.data.get_joint_qpos('object:joint')
        else:
            raise error.Error('Unknown target_rotation option "{}".'.format(
                self.target_rotation))
        assert target_quat is not None
        assert target_quat.shape == (4, )

        target_quat /= np.linalg.norm(target_quat)  # normalized quaternion
        goal = np.concatenate([target_pos, target_quat])
        return goal
def rollout(env,
            model,
            noise,
            config,
            normalizer=None,
            render=False,
            step=(1, 5),
            boundary_sample=False):
    trajectories = []
    for i_agent in range(2):
        trajectories.append([])

    # monitoring variables
    episode_reward = np.zeros(env.num_envs)
    frames = []

    state_all = env.reset()
    state_all = back_to_dict(state_all, config)

    step_l, step_h = step
    for i_step in range(config['episode_length']):

        model.to_cpu()

        obs = [
            K.tensor(obs, dtype=K.float32) for obs in state_all['observation']
        ]
        goal = K.tensor(state_all['desired_goal'], dtype=K.float32)
        if i_step == 0:
            if (np.random.rand() < config['objtraj_p'] and not boundary_sample
                ) or (step_h >= config['episode_length']):
                objtraj_goal = goal
                original_goal = True
            else:
                original_goal = False
                achieved_goal = K.tensor(state_all['achieved_goal'],
                                         dtype=K.float32)

                objtraj_goal = []
                goal_len_per_obj = goal.shape[1] // model.n_objects
                for i_object in range(model.n_objects):

                    achieved_goal_per_obj = achieved_goal[:, i_object *
                                                          goal_len_per_obj:
                                                          (i_object + 1) *
                                                          goal_len_per_obj]
                    goal_per_obj = goal[:, i_object *
                                        goal_len_per_obj:(i_object + 1) *
                                        goal_len_per_obj]

                    normed_achieved_goal_per_obj, normed_goal_per_goal, normed_step = normalizer[
                        2].process(
                            achieved_goal_per_obj, goal_per_obj,
                            K.randint(low=step_l,
                                      high=step_h + 1,
                                      size=(achieved_goal_per_obj.shape[0],
                                            1)))
                    with K.no_grad():
                        objtraj_goal_per_obj = model.obj_traj(
                            normed_achieved_goal_per_obj.to('cuda'),
                            normed_goal_per_goal.to('cuda'),
                            normed_step.to('cuda'))

                    objtraj_goal.append(objtraj_goal_per_obj.cpu())
                objtraj_goal = K.cat(objtraj_goal, dim=-1)

        # Observation normalization
        obs_goal = []
        for i_agent in range(2):
            obs_goal.append(K.cat([obs[i_agent], objtraj_goal], dim=-1))
            if normalizer[i_agent] is not None:
                #if not original_goal:
                #    obs_goal[i_agent] = normalizer[i_agent].preprocess(obs_goal[i_agent])
                #else:
                obs_goal[i_agent] = normalizer[i_agent].preprocess_with_update(
                    obs_goal[i_agent])

        action = model.select_action(obs_goal[0], noise).cpu().numpy()

        action_to_env = np.zeros((len(action), len(env.action_space.sample())))
        action_to_env[:, 0:action.shape[1]] = action
        action_to_mem = action_to_env

        next_state_all, reward, done, info = env.step(action_to_env)
        next_state_all = back_to_dict(next_state_all, config)
        reward = K.tensor(reward, dtype=dtype).view(-1, 1)
        episode_reward += reward.squeeze(1).cpu().numpy()

        for i_agent in range(2):
            state = {
                'observation': state_all['observation'][i_agent].copy(),
                'achieved_goal': state_all['achieved_goal'].copy(),
                'desired_goal': objtraj_goal.numpy().copy()
            }

            trajectories[i_agent].append((state.copy(), action_to_mem, reward))

        goal_a = state_all['achieved_goal']
        goal_b = objtraj_goal.numpy().copy()  #state_all['desired_goal']
        ENV_NAME = config['env_id']
        if 'Rotate' in ENV_NAME:
            quat_a = goal_a[:, 3:]
            quat_b = goal_b[:, 3:]

        # Move to the next state
        state_all = next_state_all

        # Record frames
        if render:
            frames.append(env.render(mode='rgb_array')[0])

    if 'Rotate' in ENV_NAME:
        # Subtract quaternions and extract angle between them.
        quat_diff = rotations.quat_mul(quat_a,
                                       rotations.quat_conjugate(quat_b))
        angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
        d_rot = angle_diff
        distance = (d_rot < 0.1).astype(np.float32)
    else:
        distance = np.linalg.norm(goal_a - goal_b, axis=-1)
        distance = (distance < 0.05).astype(np.float32)

    obs, ags, goals, acts = [], [], [], []

    for trajectory in trajectories:
        obs.append([])
        ags.append([])
        goals.append([])
        acts.append([])
        for i_step in range(config['episode_length']):
            obs[-1].append(trajectory[i_step][0]['observation'])
            ags[-1].append(trajectory[i_step][0]['achieved_goal'])
            goals[-1].append(trajectory[i_step][0]['desired_goal'])
            if (i_step < config['episode_length'] - 1):
                acts[-1].append(trajectory[i_step][1])

    trajectories = {
        'o': np.concatenate(obs, axis=-1).swapaxes(0, 1),
        'ag': np.asarray(ags)[0, ].swapaxes(0, 1),
        'g': np.asarray(goals)[0, ].swapaxes(0, 1),
        'u': np.asarray(acts)[0, ].swapaxes(0, 1),
    }

    info = np.asarray([i_info['is_success'] for i_info in info])

    return trajectories, episode_reward, info, distance
Ejemplo n.º 4
0
    def _reset_sim(self):
        self.sim.set_state(self.initial_state)
        self.sim.forward()

        initial_qpos = self.sim.data.get_joint_qpos('object:joint').copy()
        initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
        assert initial_qpos.shape == (7, )
        assert initial_pos.shape == (3, )
        assert initial_quat.shape == (4, )
        initial_qpos = None

        # Randomization initial rotation.
        if self.randomize_initial_rotation:
            if self.target_rotation == 'z':
                angle = self.np_random.uniform(-np.pi, np.pi)
                axis = np.array([0., 0., 1.])
                offset_quat = quat_from_angle_and_axis(angle, axis)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation == 'parallel':
                angle = self.np_random.uniform(-np.pi, np.pi)
                axis = np.array([0., 0., 1.])
                z_quat = quat_from_angle_and_axis(angle, axis)
                parallel_quat = self.parallel_quats[self.np_random.randint(
                    len(self.parallel_quats))]
                offset_quat = rotations.quat_mul(z_quat, parallel_quat)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation in ['xyz', 'ignore']:
                angle = self.np_random.uniform(-np.pi, np.pi)
                axis = self.np_random.uniform(-1., 1., size=3)
                offset_quat = quat_from_angle_and_axis(angle, axis)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation == 'fixed':
                pass
            else:
                raise error.Error(
                    'Unknown target_rotation option "{}".'.format(
                        self.target_rotation))

        # Randomize initial position.
        if self.randomize_initial_position:
            if self.target_position != 'fixed':
                initial_pos += self.np_random.normal(size=3, scale=0.005)

        initial_quat /= np.linalg.norm(initial_quat)
        initial_qpos = np.concatenate([initial_pos, initial_quat])
        self.sim.data.set_joint_qpos('object:joint', initial_qpos)

        def is_on_palm():
            self.sim.forward()
            cube_middle_idx = self.sim.model.site_name2id('object:center')
            cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
            is_on_palm = (cube_middle_pos[2] > 0.04)
            return is_on_palm

        # Run the simulation for a bunch of timesteps to let everything settle in.
        for _ in range(10):
            self._set_action(np.zeros(20))
            try:
                self.sim.step()
            except mujoco_py.MujocoException:
                return False
        return is_on_palm()