def _compute_reward(self):

        reward = np.float32(0.0)
        robot_obs, _ = self._robot.get_observation()
        world_obs, _ = self._world.get_observation()

        d1 = goal_distance(np.array(robot_obs[:3]), np.array(world_obs[:3]))
        d2 = goal_distance(np.array(world_obs[:3]), np.array(self._tg_pose))

        if self._reward_type is 0:
            reward = -d1 - d2
            if d2 <= self._target_dist_min:
                reward += np.float32(1000.0)

        # normalized reward
        elif self._reward_type is 1:

            rew1 = 0.125
            rew2 = 0.25
            if d1 > 0.1:
                reward = rew1 * (1 - d1 / self._init_dist_hand_obj)
                # print("reward 1 ", reward)
            else:
                reward = rew1 * (1 - d1 / self._init_dist_hand_obj) + rew2 * (
                    1 - d2 / self._max_dist_obj_tg)
                # print("reward 2 ", reward)

            if d2 <= self._target_dist_min:
                reward += np.float32(1000.0)

        return reward
    def _compute_reward(self):
        robot_obs, _ = self._robot.get_observation()
        world_obs, _ = self._world.get_observation()

        d1 = goal_distance(np.array(robot_obs[:3]), np.array(world_obs[:3]))
        d2 = goal_distance(np.array(world_obs[:3]),
                           np.array(self._target_pose))

        reward = -d1 - d2
        # print("--------")
        # print(reward)
        # print("--------")
        if d2 <= self._target_dist_min:
            reward = np.float32(1000.0) + (100 - d2 * 80)
        return reward
Beispiel #3
0
    def reset(self):
        self.reset_simulation()

        # --- sample target pose --- #
        world_obs, _ = self._world.get_observation()
        self._tg_pose = self.sample_tg_pose(world_obs[:3])
        self.debug_gui()

        robot_obs, _ = self._robot.get_observation()
        world_obs, _ = self._world.get_observation()

        self._init_dist_hand_obj = goal_distance(np.array(robot_obs[:3]), np.array(world_obs[:3]))
        self._max_dist_obj_tg = goal_distance(np.array(world_obs[:3]), np.array(self._tg_pose))

        obs = self.get_goal_observation()
        scaled_obs = scale_gym_data(self.observation_space['observation'], obs['observation'])
        obs['observation'] = scaled_obs
        return obs
Beispiel #4
0
    def _compute_reward(self):
        robot_obs, _ = self._robot.get_observation()
        world_obs, _ = self._world.get_observation()

        d = goal_distance(np.array(robot_obs[:3]), np.array(world_obs[:3]))

        reward = -d
        if d <= self._target_dist_min:
            reward = np.float32(1000.0) + (100 - d*80)

        return reward
    def _termination(self):
        world_obs, _ = self._world.get_observation()
        d = goal_distance(np.array(world_obs[:3]), np.array(self._target_pose))

        if d <= self._target_dist_min:
            self.terminated = 1
            print('------------->>> success!')
            print('final reward')
            print(self._compute_reward())

            return np.float32(1.0)

        if self.terminated or self._env_step_counter > self._max_steps:
            return np.float32(1.0)

        return np.float32(0.0)
Beispiel #6
0
    def compute_reward(self, achieved_goal, goal, info):
        # Compute distance between goal and the achieved goal.
        d = goal_distance(achieved_goal[:3], goal[:3])

        return -(d > self._target_dist_min).astype(np.float32)
Beispiel #7
0
    def _is_success(self, achieved_goal, goal):
        # Compute distance between goal and the achieved goal.
        d = goal_distance(achieved_goal[:3], goal[:3])

        return d <= self._target_dist_min