Ejemplo n.º 1
0
    def test_evaluate_state_difficulty_4_rotation_around_y(self):
        difficulty = 4
        pose_origin = move_cube.Pose()
        pose_rot_only_y = move_cube.Pose(
            orientation=Rotation.from_euler("y", 0.42).as_quat()
        )
        pose_rot_without_y = move_cube.Pose(
            orientation=Rotation.from_euler("yz", [0.0, 0.42]).as_quat()
        )
        pose_rot_with_y = move_cube.Pose(
            orientation=Rotation.from_euler("yz", [0.2, 0.42]).as_quat()
        )

        # needs to be zero for exact match
        cost = move_cube.evaluate_state(pose_origin, pose_origin, difficulty)
        self.assertEqual(cost, 0)

        cost_only_y = move_cube.evaluate_state(
            pose_origin, pose_rot_only_y, difficulty
        )
        self.assertEqual(cost_only_y, 0)

        cost_without_y = move_cube.evaluate_state(
            pose_origin, pose_rot_without_y, difficulty
        )
        cost_with_y = move_cube.evaluate_state(
            pose_origin, pose_rot_with_y, difficulty
        )
        self.assertAlmostEqual(cost_without_y, cost_with_y)
Ejemplo n.º 2
0
    def test_evaluate_state_difficulty_4(self):
        difficulty = 4
        pose_origin = move_cube.Pose()
        pose_trans = move_cube.Pose(position=[1, 2, 3])
        pose_rot = move_cube.Pose(
            orientation=Rotation.from_euler("z", 0.42).as_quat()
        )
        pose_both = move_cube.Pose(
            [1, 2, 3], Rotation.from_euler("z", 0.42).as_quat()
        )

        # needs to be zero for exact match
        cost = move_cube.evaluate_state(pose_origin, pose_origin, difficulty)
        self.assertEqual(cost, 0)

        # None-zero if there is translation, rotation or both
        self.assertNotEqual(
            move_cube.evaluate_state(pose_origin, pose_trans, difficulty), 0
        )
        self.assertNotEqual(
            move_cube.evaluate_state(pose_origin, pose_rot, difficulty), 0
        )
        self.assertNotEqual(
            move_cube.evaluate_state(pose_origin, pose_both, difficulty), 0
        )
Ejemplo n.º 3
0
    def compute_reward(self, achieved_goal, desired_goal, info):
        """Compute the reward for the given achieved and desired goal.

        Args:
            achieved_goal (dict): Current pose of the object.
            desired_goal (dict): Goal pose of the object.
            info (dict): An info dictionary containing a field "difficulty"
                which specifies the difficulty level.

        Returns:
            float: The reward that corresponds to the provided achieved goal
            w.r.t. to the desired goal. Note that the following should always
            hold true::

                ob, reward, done, info = env.step()
                assert reward == env.compute_reward(
                    ob['achieved_goal'],
                    ob['desired_goal'],
                    info,
                )
        """
        return -move_cube.evaluate_state(
            move_cube.Pose.from_dict(desired_goal),
            move_cube.Pose.from_dict(achieved_goal),
            info["difficulty"],
        )
def evaluate_state(
    trajectory: Trajectory, time_index: int, actual_position: Position
):
    """Compute cost of a given cube pose at a given time step.  Less is better.

    The cost is computed using :func:`move_cube.evaluate_state` (difficulty=3)
    for the goal position that is active at the given time_index.

    Args:
        trajectory:  The trajectory based on which the cost is computed.
        time_index:  Index of the time step that is evaluated.
        actual_position:  Cube position at the specified time step.

    Returns:
        Cost of the actual position w.r.t. to the goal position of the active
        step in the trajectory.  Lower value means that the actual pose is
        closer to the goal.  Zero if actual == goal.
    """
    active_goal = get_active_goal(trajectory, time_index)

    # wrap positions in Pose objects
    actual_pose = move_cube.Pose(position=actual_position)
    goal_pose = move_cube.Pose(position=active_goal)

    return move_cube.evaluate_state(goal_pose, actual_pose, GOAL_DIFFICULTY)
Ejemplo n.º 5
0
def get_synced_log_data(logdir, goal, difficulty):
    log = robot_fingers.TriFingerPlatformLog(os.path.join(logdir, "robot_data.dat"),
                                             os.path.join(logdir, "camera_data.dat"))
    log_camera = tricamera.LogReader(os.path.join(logdir, "camera_data.dat"))
    stamps = log_camera.timestamps

    obs = {'robot': [], 'cube': [], 'images': [], 't': [], 'desired_action': [],
           'stamp': [], 'acc_reward': []}
    ind = 0
    acc_reward = 0.0
    for t in range(log.get_first_timeindex(), log.get_last_timeindex()):
        camera_observation = log.get_camera_observation(t)
        acc_reward -= move_cube.evaluate_state(
            move_cube.Pose(**goal), camera_observation.filtered_object_pose,
            difficulty
        )
        if 1000 * log.get_timestamp_ms(t) >= stamps[ind]:
            robot_observation = log.get_robot_observation(t)
            obs['robot'].append(robot_observation)
            obs['cube'].append(camera_observation.filtered_object_pose)
            obs['images'].append([convert_image(camera.image)
                                  for camera in camera_observation.cameras])
            obs['desired_action'].append(log.get_desired_action(t))
            obs['acc_reward'].append(acc_reward)
            obs['t'].append(t)
            obs['stamp'].append(log.get_timestamp_ms(t))
            ind += 1
    return obs
def test_evaluate_state_dict():
    """Test evaluate state using a dict instead of Pose."""
    difficulty = 4
    pose_origin = move_cube.Pose()
    dict_origin = {"position": [0, 0, 0], "orientation": [0, 0, 0, 1]}
    pose_pose = move_cube.Pose([1, 2, 3],
                               Rotation.from_euler("z", 0.42).as_quat())
    dict_pose = {
        "position": [1, 2, 3],
        "orientation": Rotation.from_euler("z", 0.42).as_quat(),
    }

    # needs to be zero for exact match
    pose_cost = move_cube.evaluate_state(pose_origin, pose_pose, difficulty)
    dict_cost = move_cube.evaluate_state(dict_origin, dict_pose, difficulty)
    assert pose_cost == dict_cost
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--robot-log",
        required=True,
        type=str,
        help="Path to the robot log file.",
    )
    parser.add_argument(
        "--camera-log",
        required=True,
        type=str,
        help="Path to the camera log file.",
    )
    parser.add_argument(
        "--goal-file",
        required=True,
        type=str,
        help="Path to the goal JSON file.",
    )
    args = parser.parse_args()

    # load goal from json file
    move_cube.validate_goal_file(args.goal_file)
    with open(args.goal_file, "r") as fh:
        goal = json.load(fh)
    difficulty = int(goal["difficulty"])
    goal_pose = move_cube.Pose.from_dict(goal["goal"])

    try:
        log = robot_fingers.TriFingerPlatformLog(args.robot_log,
                                                 args.camera_log)
    except Exception as e:
        print("Failed to load logs:", e)
        sys.exit(1)

    # verify that the log is complete and matches the expected length
    t_first = log.get_first_timeindex()
    t_last = log.get_last_timeindex()

    if t_first != 0:
        print("Invalid log: First time index in robot log is {}. Expected 0.".
              format(t_first))
        sys.exit(1)

    if t_last != move_cube.episode_length - 1:
        print("Invalid log: Last time index in robot log is {}. Expected {}.".
              format(t_last, move_cube.episode_length - 1))
        sys.exit(1)

    cumulative_reward = 0
    for t in range(log.get_first_timeindex(), log.get_last_timeindex() + 1):
        camera_observation = log.get_camera_observation(t)
        cube_pose = camera_observation.object_pose

        reward = -move_cube.evaluate_state(goal_pose, cube_pose, difficulty)
        cumulative_reward += reward

    print("Cumulative Reward:", cumulative_reward)
def test_evaluate_state_difficulty_3():
    difficulty = 3
    pose_origin = move_cube.Pose()
    pose_trans = move_cube.Pose(position=[1, 2, 3])
    pose_rot = move_cube.Pose(
        orientation=Rotation.from_euler("z", 0.42).as_quat())
    pose_both = move_cube.Pose([1, 2, 3],
                               Rotation.from_euler("z", 0.42).as_quat())

    # needs to be zero for exact match
    cost = move_cube.evaluate_state(pose_origin, pose_origin, difficulty)
    assert cost == 0

    # None-zero if there is translation, rotation is ignored
    assert move_cube.evaluate_state(pose_origin, pose_trans, difficulty) != 0
    assert move_cube.evaluate_state(pose_origin, pose_rot, difficulty) == 0
    assert move_cube.evaluate_state(pose_origin, pose_both, difficulty) != 0
Ejemplo n.º 9
0
def compute_reward(logdir):
    log = robot_fingers.TriFingerPlatformLog(os.path.join(logdir, "robot_data.dat"),
                                             os.path.join(logdir, "camera_data.dat"))
    with open(os.path.join(logdir, "goal.json"), 'r') as f:
        goal = json.load(f)
    difficulty = goal['difficulty']
    goal_pose = move_cube.Pose(position=np.array(goal['goal']['position']),
                               orientation=np.array(goal['goal']['orientation']))

    reward = 0.0
    for t in range(log.get_first_timeindex(), log.get_last_timeindex() + 1):
        camera_observation = log.get_camera_observation(t)
        reward -= move_cube.evaluate_state(
            goal_pose, camera_observation.filtered_object_pose, difficulty

        )
    return reward
def replay_action_log(logfile, difficulty, initial_pose, goal_pose):

    with open(logfile, "rb") as fh:
        log = pickle.load(fh)

    initial_object_pose = move_cube.Pose.from_json(initial_pose)
    goal_pose = move_cube.Pose.from_json(goal_pose)

    # verify that the initial object pose matches with the one in the log file
    try:
        np.testing.assert_array_almost_equal(
            initial_object_pose.position,
            log["initial_object_pose"].position,
            err_msg=(
                "Given initial object position does not match with log file."),
        )
        np.testing.assert_array_almost_equal(
            initial_object_pose.orientation,
            log["initial_object_pose"].orientation,
            err_msg=("Given initial object orientation does not match with log"
                     " file."),
        )
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    platform = trifinger_platform.TriFingerPlatform(
        visualization=False, initial_object_pose=initial_object_pose)

    # verify that the robot is initialized to the same position as in the log
    # file
    initial_robot_position = platform.get_robot_observation(0).position
    try:
        np.testing.assert_array_almost_equal(
            initial_robot_position,
            log["initial_robot_position"],
            err_msg=("Initial robot position does not match with log file."),
        )
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    # verify that the number of logged actions matches with the episode length
    n_actions = len(log["actions"])
    assert (
        n_actions == move_cube.episode_length
    ), "Number of actions in log does not match with expected episode length."

    accumulated_reward = 0
    for logged_action in log["actions"]:
        action = logged_action["action"]

        t = platform.append_desired_action(action)

        robot_obs = platform.get_robot_observation(t)
        cube_pose = platform.get_camera_observation(t).filtered_object_pose
        reward = -move_cube.evaluate_state(goal_pose, cube_pose, difficulty)
        accumulated_reward += reward

        assert logged_action["t"] == t

        np.testing.assert_array_almost_equal(
            robot_obs.position,
            logged_action["robot_observation"].position,
            err_msg=("Step %d: Recorded robot position does not match with"
                     " the one achieved by the replay" % t),
        )
        np.testing.assert_array_almost_equal(
            robot_obs.torque,
            logged_action["robot_observation"].torque,
            err_msg=("Step %d: Recorded robot torque does not match with"
                     " the one achieved by the replay" % t),
        )
        np.testing.assert_array_almost_equal(
            robot_obs.velocity,
            logged_action["robot_observation"].velocity,
            err_msg=("Step %d: Recorded robot velocity does not match with"
                     " the one achieved by the replay" % t),
        )

        np.testing.assert_array_almost_equal(
            cube_pose.position,
            logged_action["object_pose"].position,
            err_msg=("Step %d: Recorded object position does not match with"
                     " the one achieved by the replay" % t),
        )
        np.testing.assert_array_almost_equal(
            cube_pose.orientation,
            logged_action["object_pose"].orientation,
            err_msg=("Step %d: Recorded object orientation does not match with"
                     " the one achieved by the replay" % t),
        )

    cube_pose = platform.get_camera_observation(t).object_pose
    final_pose = log["final_object_pose"]["pose"]

    print("Accumulated Reward:", accumulated_reward)

    # verify that actual and logged final object pose match
    try:
        np.testing.assert_array_almost_equal(
            cube_pose.position,
            final_pose.position,
            decimal=3,
            err_msg=("Recorded object position does not match with the one"
                     " achieved by the replay"),
        )
        np.testing.assert_array_almost_equal(
            cube_pose.orientation,
            final_pose.orientation,
            decimal=3,
            err_msg=("Recorded object orientation does not match with the one"
                     " achieved by the replay"),
        )
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    print("Passed.")

    return accumulated_reward
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--logfile",
        "-l",
        required=True,
        type=str,
        help="Path to the log file.",
    )
    parser.add_argument(
        "--difficulty",
        "-d",
        required=True,
        type=int,
        help="The difficulty level of the goal (for reward computation).",
    )
    parser.add_argument(
        "--initial-pose",
        "-i",
        required=True,
        type=str,
        metavar="JSON",
        help="Initial pose of the cube as JSON string.",
    )
    parser.add_argument(
        "--goal-pose",
        "-g",
        required=True,
        type=str,
        metavar="JSON",
        help="Goal pose of the cube as JSON string.",
    )
    args = parser.parse_args()

    with open(args.logfile, "rb") as fh:
        log = pickle.load(fh)

    initial_object_pose = move_cube.Pose.from_json(args.initial_pose)
    goal_pose = move_cube.Pose.from_json(args.goal_pose)

    # verify that the initial object pose matches with the one in the log file
    try:
        np.testing.assert_array_almost_equal(
            initial_object_pose.position,
            log["initial_object_pose"].position,
            err_msg=(
                "Given initial object position does not match with log file."
            ),
        )
        np.testing.assert_array_almost_equal(
            initial_object_pose.orientation,
            log["initial_object_pose"].orientation,
            err_msg=(
                "Given initial object orientation does not match with log file."
            ),
        )
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    platform = trifinger_platform.TriFingerPlatform(
        visualization=False, initial_object_pose=initial_object_pose
    )

    # verify that the robot is initialized to the same position as in the log
    # file
    initial_robot_position = platform.get_robot_observation(0).position
    try:
        np.testing.assert_array_almost_equal(
            initial_robot_position,
            log["initial_robot_position"],
            err_msg=("Initial robot position does not match with log file."),
        )
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    # verify that the number of logged actions matches with the episode length
    n_actions = len(log["actions"])
    assert (
        n_actions == move_cube.episode_length
    ), "Number of actions in log does not match with expected episode length."

    accumulated_reward = 0
    for logged_action in log["actions"]:
        action = logged_action["action"]

        t = platform.append_desired_action(action)

        robot_obs = platform.get_robot_observation(t)
        cube_pose = platform.get_object_pose(t)
        reward = -move_cube.evaluate_state(
            goal_pose, cube_pose, args.difficulty
        )
        accumulated_reward += reward

        assert logged_action["t"] == t

        np.testing.assert_array_almost_equal(
            robot_obs.position,
            logged_action["robot_observation"].position,
            err_msg=(
                "Step %d: Recorded robot position does not match with"
                " the one achieved by the replay" % t
            ),
        )
        np.testing.assert_array_almost_equal(
            robot_obs.torque,
            logged_action["robot_observation"].torque,
            err_msg=(
                "Step %d: Recorded robot torque does not match with"
                " the one achieved by the replay" % t
            ),
        )
        np.testing.assert_array_almost_equal(
            robot_obs.velocity,
            logged_action["robot_observation"].velocity,
            err_msg=(
                "Step %d: Recorded robot velocity does not match with"
                " the one achieved by the replay" % t
            ),
        )

        np.testing.assert_array_almost_equal(
            cube_pose.position,
            logged_action["object_pose"].position,
            err_msg=(
                "Step %d: Recorded object position does not match with"
                " the one achieved by the replay" % t
            ),
        )
        np.testing.assert_array_almost_equal(
            cube_pose.orientation,
            logged_action["object_pose"].orientation,
            err_msg=(
                "Step %d: Recorded object orientation does not match with"
                " the one achieved by the replay" % t
            ),
        )

    cube_pose = platform.get_object_pose(t)
    final_pose = log["final_object_pose"]["pose"]

    print("Accumulated Reward:", accumulated_reward)

    # verify that actual and logged final object pose match
    try:
        np.testing.assert_array_almost_equal(
            cube_pose.position,
            final_pose.position,
            decimal=3,
            err_msg=(
                "Recorded object position does not match with the one"
                " achieved by the replay"
            ),
        )
        np.testing.assert_array_almost_equal(
            cube_pose.orientation,
            final_pose.orientation,
            decimal=3,
            err_msg=(
                "Recorded object orientation does not match with the one"
                " achieved by the replay"
            ),
        )
    except AssertionError as e:
        print("Failed.", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)

    print("Passed.")
Ejemplo n.º 12
0
def competition_reward(previous_observation, observation, info):
    return -move_cube.evaluate_state(
        move_cube.Pose.from_dict(observation['desired_goal']),
        move_cube.Pose.from_dict(observation['achieved_goal']),
        info["difficulty"],
    )