Exemple #1
0
    def test_evaluate_state_difficulty_4(self):
        difficulty = 4
        pose_origin = move_cube.Pose()
        pose_trans = move_cube.Pose(position=[1, 2, 3])
        pose_rot = move_cube.Pose(
            orientation=Rotation.from_euler("z", 0.42).as_quat()
        )
        pose_both = move_cube.Pose(
            [1, 2, 3], Rotation.from_euler("z", 0.42).as_quat()
        )

        # needs to be zero for exact match
        cost = move_cube.evaluate_state(pose_origin, pose_origin, difficulty)
        self.assertEqual(cost, 0)

        # None-zero if there is translation, rotation or both
        self.assertNotEqual(
            move_cube.evaluate_state(pose_origin, pose_trans, difficulty), 0
        )
        self.assertNotEqual(
            move_cube.evaluate_state(pose_origin, pose_rot, difficulty), 0
        )
        self.assertNotEqual(
            move_cube.evaluate_state(pose_origin, pose_both, difficulty), 0
        )
    def compute_reward(self, achieved_goal, desired_goal, info):
        """Compute the reward for the given achieved and desired goal.

        Args:
            achieved_goal : Current pose of the object.
            desired_goal : Goal pose of the object.
            info : An info dictionary containing a field "difficulty"
                which specifies the difficulty level.

        Returns:
            float: The reward that corresponds to the provided achieved goal
            w.r.t. to the desired goal. Note that the following should always
            hold true::

                ob, reward, done, info = env.step()
                assert reward == env.compute_reward(
                    ob['achieved_goal'],
                    ob['desired_goal'],
                    info,
                )
        """

        if self.sparse_reward:
            return -np.float32((move_cube.evaluate_state(
                move_cube.Pose(desired_goal[0:3], desired_goal[3:7]),
                move_cube.Pose(achieved_goal[0:3], achieved_goal[3:7]),
                info['difficulty'],
            ) > 0.01))
        else:
            return move_cube.evaluate_state(
                move_cube.Pose.from_dict(desired_goal),
                move_cube.Pose.from_dict(achieved_goal),
                info['difficulty'],
            )
def _competition_reward(observation, difficulty):
    object_pose = move_cube.Pose(
        observation['object_position'],
        observation['object_orientation']
    )
    goal_pose = move_cube.Pose(
        observation['goal_object_position'],
        observation['goal_object_orientation']
    )
    return -move_cube.evaluate_state(goal_pose, object_pose, difficulty)
Exemple #4
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--logfile",
        "-l",
        required=True,
        type=str,
        help="Path to the log file.",
    )
    parser.add_argument(
        "--difficulty",
        "-d",
        required=True,
        type=int,
        help="The difficulty level of the goal (for reward computation).",
    )
    parser.add_argument(
        "--initial-pose",
        "-i",
        required=True,
        type=str,
        metavar="JSON",
        help="Initial pose of the cube as JSON string.",
    )
    parser.add_argument(
        "--goal-pose",
        "-g",
        required=True,
        type=str,
        metavar="JSON",
        help="Goal pose of the cube as JSON string.",
    )
    args = parser.parse_args()

    with open(args.logfile, "rb") as fh:
        log = pickle.load(fh)

    initial_object_pose = move_cube.Pose.from_json(args.initial_pose)
    goal_pose = move_cube.Pose.from_json(args.goal_pose)

    # verify that the initial object pose matches with the one in the log file
    try:
        np.testing.assert_array_almost_equal(
            initial_object_pose.position,
            log["initial_object_pose"].position,
            err_msg=(
                "Given initial object position does not match with log file."))
        np.testing.assert_array_almost_equal(
            initial_object_pose.orientation,
            log["initial_object_pose"].orientation,
            err_msg=(
                "Given initial object orientation does not match with log file."
            ))
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    platform = trifinger_platform.TriFingerPlatform(
        visualization=False, initial_object_pose=initial_object_pose)

    # verify that the robot is initialized to the same position as in the log
    # file
    initial_robot_position = platform.get_robot_observation(0).position
    try:
        np.testing.assert_array_almost_equal(
            initial_robot_position,
            log["initial_robot_position"],
            err_msg=("Initial robot position does not match with log file."))
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    # verify that the number of logged actions matches with the episode length
    n_actions = len(log["actions"])
    assert (
        n_actions == move_cube.episode_length
    ), "Number of actions in log does not match with expected episode length."

    accumulated_reward = 0
    for logged_action in log["actions"]:
        action = logged_action["action"]

        t = platform.append_desired_action(action)

        robot_obs = platform.get_robot_observation(t)
        cube_pose = platform.get_object_pose(t)
        reward = -move_cube.evaluate_state(goal_pose, cube_pose,
                                           args.difficulty)
        accumulated_reward += reward

        assert logged_action["t"] == t

        np.testing.assert_array_almost_equal(
            robot_obs.position,
            logged_action["robot_observation"].position,
            err_msg=("Step %d: Recorded robot position does not match with"
                     " the one achieved by the replay" % t))
        np.testing.assert_array_almost_equal(
            robot_obs.torque,
            logged_action["robot_observation"].torque,
            err_msg=("Step %d: Recorded robot torque does not match with"
                     " the one achieved by the replay" % t))
        np.testing.assert_array_almost_equal(
            robot_obs.velocity,
            logged_action["robot_observation"].velocity,
            err_msg=("Step %d: Recorded robot velocity does not match with"
                     " the one achieved by the replay" % t))

        np.testing.assert_array_almost_equal(
            cube_pose.position,
            logged_action["object_pose"].position,
            err_msg=("Step %d: Recorded object position does not match with"
                     " the one achieved by the replay" % t))
        np.testing.assert_array_almost_equal(
            cube_pose.orientation,
            logged_action["object_pose"].orientation,
            err_msg=("Step %d: Recorded object orientation does not match with"
                     " the one achieved by the replay" % t))

    cube_pose = platform.get_object_pose(t)
    final_pose = log["final_object_pose"]["pose"]

    print("Accumulated Reward:", accumulated_reward)

    # verify that actual and logged final object pose match
    try:
        np.testing.assert_array_almost_equal(
            cube_pose.position,
            final_pose.position,
            decimal=3,
            err_msg=("Recorded object position does not match with the one"
                     " achieved by the replay"))
        np.testing.assert_array_almost_equal(
            cube_pose.orientation,
            final_pose.orientation,
            decimal=3,
            err_msg=("Recorded object orientation does not match with the one"
                     " achieved by the replay"))
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    print("Passed.")