def test_evaluate_state_difficulty_4_rotation_around_y(self): difficulty = 4 pose_origin = move_cube.Pose() pose_rot_only_y = move_cube.Pose( orientation=Rotation.from_euler("y", 0.42).as_quat() ) pose_rot_without_y = move_cube.Pose( orientation=Rotation.from_euler("yz", [0.0, 0.42]).as_quat() ) pose_rot_with_y = move_cube.Pose( orientation=Rotation.from_euler("yz", [0.2, 0.42]).as_quat() ) # needs to be zero for exact match cost = move_cube.evaluate_state(pose_origin, pose_origin, difficulty) self.assertEqual(cost, 0) cost_only_y = move_cube.evaluate_state( pose_origin, pose_rot_only_y, difficulty ) self.assertEqual(cost_only_y, 0) cost_without_y = move_cube.evaluate_state( pose_origin, pose_rot_without_y, difficulty ) cost_with_y = move_cube.evaluate_state( pose_origin, pose_rot_with_y, difficulty ) self.assertAlmostEqual(cost_without_y, cost_with_y)
def test_evaluate_state_difficulty_4(self): difficulty = 4 pose_origin = move_cube.Pose() pose_trans = move_cube.Pose(position=[1, 2, 3]) pose_rot = move_cube.Pose( orientation=Rotation.from_euler("z", 0.42).as_quat() ) pose_both = move_cube.Pose( [1, 2, 3], Rotation.from_euler("z", 0.42).as_quat() ) # needs to be zero for exact match cost = move_cube.evaluate_state(pose_origin, pose_origin, difficulty) self.assertEqual(cost, 0) # None-zero if there is translation, rotation or both self.assertNotEqual( move_cube.evaluate_state(pose_origin, pose_trans, difficulty), 0 ) self.assertNotEqual( move_cube.evaluate_state(pose_origin, pose_rot, difficulty), 0 ) self.assertNotEqual( move_cube.evaluate_state(pose_origin, pose_both, difficulty), 0 )
def compute_reward(self, achieved_goal, desired_goal, info): """Compute the reward for the given achieved and desired goal. Args: achieved_goal (dict): Current pose of the object. desired_goal (dict): Goal pose of the object. info (dict): An info dictionary containing a field "difficulty" which specifies the difficulty level. Returns: float: The reward that corresponds to the provided achieved goal w.r.t. to the desired goal. Note that the following should always hold true:: ob, reward, done, info = env.step() assert reward == env.compute_reward( ob['achieved_goal'], ob['desired_goal'], info, ) """ return -move_cube.evaluate_state( move_cube.Pose.from_dict(desired_goal), move_cube.Pose.from_dict(achieved_goal), info["difficulty"], )
def evaluate_state( trajectory: Trajectory, time_index: int, actual_position: Position ): """Compute cost of a given cube pose at a given time step. Less is better. The cost is computed using :func:`move_cube.evaluate_state` (difficulty=3) for the goal position that is active at the given time_index. Args: trajectory: The trajectory based on which the cost is computed. time_index: Index of the time step that is evaluated. actual_position: Cube position at the specified time step. Returns: Cost of the actual position w.r.t. to the goal position of the active step in the trajectory. Lower value means that the actual pose is closer to the goal. Zero if actual == goal. """ active_goal = get_active_goal(trajectory, time_index) # wrap positions in Pose objects actual_pose = move_cube.Pose(position=actual_position) goal_pose = move_cube.Pose(position=active_goal) return move_cube.evaluate_state(goal_pose, actual_pose, GOAL_DIFFICULTY)
def get_synced_log_data(logdir, goal, difficulty): log = robot_fingers.TriFingerPlatformLog(os.path.join(logdir, "robot_data.dat"), os.path.join(logdir, "camera_data.dat")) log_camera = tricamera.LogReader(os.path.join(logdir, "camera_data.dat")) stamps = log_camera.timestamps obs = {'robot': [], 'cube': [], 'images': [], 't': [], 'desired_action': [], 'stamp': [], 'acc_reward': []} ind = 0 acc_reward = 0.0 for t in range(log.get_first_timeindex(), log.get_last_timeindex()): camera_observation = log.get_camera_observation(t) acc_reward -= move_cube.evaluate_state( move_cube.Pose(**goal), camera_observation.filtered_object_pose, difficulty ) if 1000 * log.get_timestamp_ms(t) >= stamps[ind]: robot_observation = log.get_robot_observation(t) obs['robot'].append(robot_observation) obs['cube'].append(camera_observation.filtered_object_pose) obs['images'].append([convert_image(camera.image) for camera in camera_observation.cameras]) obs['desired_action'].append(log.get_desired_action(t)) obs['acc_reward'].append(acc_reward) obs['t'].append(t) obs['stamp'].append(log.get_timestamp_ms(t)) ind += 1 return obs
def test_evaluate_state_dict(): """Test evaluate state using a dict instead of Pose.""" difficulty = 4 pose_origin = move_cube.Pose() dict_origin = {"position": [0, 0, 0], "orientation": [0, 0, 0, 1]} pose_pose = move_cube.Pose([1, 2, 3], Rotation.from_euler("z", 0.42).as_quat()) dict_pose = { "position": [1, 2, 3], "orientation": Rotation.from_euler("z", 0.42).as_quat(), } # needs to be zero for exact match pose_cost = move_cube.evaluate_state(pose_origin, pose_pose, difficulty) dict_cost = move_cube.evaluate_state(dict_origin, dict_pose, difficulty) assert pose_cost == dict_cost
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--robot-log", required=True, type=str, help="Path to the robot log file.", ) parser.add_argument( "--camera-log", required=True, type=str, help="Path to the camera log file.", ) parser.add_argument( "--goal-file", required=True, type=str, help="Path to the goal JSON file.", ) args = parser.parse_args() # load goal from json file move_cube.validate_goal_file(args.goal_file) with open(args.goal_file, "r") as fh: goal = json.load(fh) difficulty = int(goal["difficulty"]) goal_pose = move_cube.Pose.from_dict(goal["goal"]) try: log = robot_fingers.TriFingerPlatformLog(args.robot_log, args.camera_log) except Exception as e: print("Failed to load logs:", e) sys.exit(1) # verify that the log is complete and matches the expected length t_first = log.get_first_timeindex() t_last = log.get_last_timeindex() if t_first != 0: print("Invalid log: First time index in robot log is {}. Expected 0.". format(t_first)) sys.exit(1) if t_last != move_cube.episode_length - 1: print("Invalid log: Last time index in robot log is {}. Expected {}.". format(t_last, move_cube.episode_length - 1)) sys.exit(1) cumulative_reward = 0 for t in range(log.get_first_timeindex(), log.get_last_timeindex() + 1): camera_observation = log.get_camera_observation(t) cube_pose = camera_observation.object_pose reward = -move_cube.evaluate_state(goal_pose, cube_pose, difficulty) cumulative_reward += reward print("Cumulative Reward:", cumulative_reward)
def test_evaluate_state_difficulty_3(): difficulty = 3 pose_origin = move_cube.Pose() pose_trans = move_cube.Pose(position=[1, 2, 3]) pose_rot = move_cube.Pose( orientation=Rotation.from_euler("z", 0.42).as_quat()) pose_both = move_cube.Pose([1, 2, 3], Rotation.from_euler("z", 0.42).as_quat()) # needs to be zero for exact match cost = move_cube.evaluate_state(pose_origin, pose_origin, difficulty) assert cost == 0 # None-zero if there is translation, rotation is ignored assert move_cube.evaluate_state(pose_origin, pose_trans, difficulty) != 0 assert move_cube.evaluate_state(pose_origin, pose_rot, difficulty) == 0 assert move_cube.evaluate_state(pose_origin, pose_both, difficulty) != 0
def compute_reward(logdir): log = robot_fingers.TriFingerPlatformLog(os.path.join(logdir, "robot_data.dat"), os.path.join(logdir, "camera_data.dat")) with open(os.path.join(logdir, "goal.json"), 'r') as f: goal = json.load(f) difficulty = goal['difficulty'] goal_pose = move_cube.Pose(position=np.array(goal['goal']['position']), orientation=np.array(goal['goal']['orientation'])) reward = 0.0 for t in range(log.get_first_timeindex(), log.get_last_timeindex() + 1): camera_observation = log.get_camera_observation(t) reward -= move_cube.evaluate_state( goal_pose, camera_observation.filtered_object_pose, difficulty ) return reward
def replay_action_log(logfile, difficulty, initial_pose, goal_pose): with open(logfile, "rb") as fh: log = pickle.load(fh) initial_object_pose = move_cube.Pose.from_json(initial_pose) goal_pose = move_cube.Pose.from_json(goal_pose) # verify that the initial object pose matches with the one in the log file try: np.testing.assert_array_almost_equal( initial_object_pose.position, log["initial_object_pose"].position, err_msg=( "Given initial object position does not match with log file."), ) np.testing.assert_array_almost_equal( initial_object_pose.orientation, log["initial_object_pose"].orientation, err_msg=("Given initial object orientation does not match with log" " file."), ) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) platform = trifinger_platform.TriFingerPlatform( visualization=False, initial_object_pose=initial_object_pose) # verify that the robot is initialized to the same position as in the log # file initial_robot_position = platform.get_robot_observation(0).position try: np.testing.assert_array_almost_equal( initial_robot_position, log["initial_robot_position"], err_msg=("Initial robot position does not match with log file."), ) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) # verify that the number of logged actions matches with the episode length n_actions = len(log["actions"]) assert ( n_actions == move_cube.episode_length ), "Number of actions in log does not match with expected episode length." accumulated_reward = 0 for logged_action in log["actions"]: action = logged_action["action"] t = platform.append_desired_action(action) robot_obs = platform.get_robot_observation(t) cube_pose = platform.get_camera_observation(t).filtered_object_pose reward = -move_cube.evaluate_state(goal_pose, cube_pose, difficulty) accumulated_reward += reward assert logged_action["t"] == t np.testing.assert_array_almost_equal( robot_obs.position, logged_action["robot_observation"].position, err_msg=("Step %d: Recorded robot position does not match with" " the one achieved by the replay" % t), ) np.testing.assert_array_almost_equal( robot_obs.torque, logged_action["robot_observation"].torque, err_msg=("Step %d: Recorded robot torque does not match with" " the one achieved by the replay" % t), ) np.testing.assert_array_almost_equal( robot_obs.velocity, logged_action["robot_observation"].velocity, err_msg=("Step %d: Recorded robot velocity does not match with" " the one achieved by the replay" % t), ) np.testing.assert_array_almost_equal( cube_pose.position, logged_action["object_pose"].position, err_msg=("Step %d: Recorded object position does not match with" " the one achieved by the replay" % t), ) np.testing.assert_array_almost_equal( cube_pose.orientation, logged_action["object_pose"].orientation, err_msg=("Step %d: Recorded object orientation does not match with" " the one achieved by the replay" % t), ) cube_pose = platform.get_camera_observation(t).object_pose final_pose = log["final_object_pose"]["pose"] print("Accumulated Reward:", accumulated_reward) # verify that actual and logged final object pose match try: np.testing.assert_array_almost_equal( cube_pose.position, final_pose.position, decimal=3, err_msg=("Recorded object position does not match with the one" " achieved by the replay"), ) np.testing.assert_array_almost_equal( cube_pose.orientation, final_pose.orientation, decimal=3, err_msg=("Recorded object orientation does not match with the one" " achieved by the replay"), ) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) print("Passed.") return accumulated_reward
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "--logfile", "-l", required=True, type=str, help="Path to the log file.", ) parser.add_argument( "--difficulty", "-d", required=True, type=int, help="The difficulty level of the goal (for reward computation).", ) parser.add_argument( "--initial-pose", "-i", required=True, type=str, metavar="JSON", help="Initial pose of the cube as JSON string.", ) parser.add_argument( "--goal-pose", "-g", required=True, type=str, metavar="JSON", help="Goal pose of the cube as JSON string.", ) args = parser.parse_args() with open(args.logfile, "rb") as fh: log = pickle.load(fh) initial_object_pose = move_cube.Pose.from_json(args.initial_pose) goal_pose = move_cube.Pose.from_json(args.goal_pose) # verify that the initial object pose matches with the one in the log file try: np.testing.assert_array_almost_equal( initial_object_pose.position, log["initial_object_pose"].position, err_msg=( "Given initial object position does not match with log file." ), ) np.testing.assert_array_almost_equal( initial_object_pose.orientation, log["initial_object_pose"].orientation, err_msg=( "Given initial object orientation does not match with log file." ), ) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) platform = trifinger_platform.TriFingerPlatform( visualization=False, initial_object_pose=initial_object_pose ) # verify that the robot is initialized to the same position as in the log # file initial_robot_position = platform.get_robot_observation(0).position try: np.testing.assert_array_almost_equal( initial_robot_position, log["initial_robot_position"], err_msg=("Initial robot position does not match with log file."), ) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) # verify that the number of logged actions matches with the episode length n_actions = len(log["actions"]) assert ( n_actions == move_cube.episode_length ), "Number of actions in log does not match with expected episode length." accumulated_reward = 0 for logged_action in log["actions"]: action = logged_action["action"] t = platform.append_desired_action(action) robot_obs = platform.get_robot_observation(t) cube_pose = platform.get_object_pose(t) reward = -move_cube.evaluate_state( goal_pose, cube_pose, args.difficulty ) accumulated_reward += reward assert logged_action["t"] == t np.testing.assert_array_almost_equal( robot_obs.position, logged_action["robot_observation"].position, err_msg=( "Step %d: Recorded robot position does not match with" " the one achieved by the replay" % t ), ) np.testing.assert_array_almost_equal( robot_obs.torque, logged_action["robot_observation"].torque, err_msg=( "Step %d: Recorded robot torque does not match with" " the one achieved by the replay" % t ), ) np.testing.assert_array_almost_equal( robot_obs.velocity, logged_action["robot_observation"].velocity, err_msg=( "Step %d: Recorded robot velocity does not match with" " the one achieved by the replay" % t ), ) np.testing.assert_array_almost_equal( cube_pose.position, logged_action["object_pose"].position, err_msg=( "Step %d: Recorded object position does not match with" " the one achieved by the replay" % t ), ) np.testing.assert_array_almost_equal( cube_pose.orientation, logged_action["object_pose"].orientation, err_msg=( "Step %d: Recorded object orientation does not match with" " the one achieved by the replay" % t ), ) cube_pose = platform.get_object_pose(t) final_pose = log["final_object_pose"]["pose"] print("Accumulated Reward:", accumulated_reward) # verify that actual and logged final object pose match try: np.testing.assert_array_almost_equal( cube_pose.position, final_pose.position, decimal=3, err_msg=( "Recorded object position does not match with the one" " achieved by the replay" ), ) np.testing.assert_array_almost_equal( cube_pose.orientation, final_pose.orientation, decimal=3, err_msg=( "Recorded object orientation does not match with the one" " achieved by the replay" ), ) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) print("Passed.")
def competition_reward(previous_observation, observation, info): return -move_cube.evaluate_state( move_cube.Pose.from_dict(observation['desired_goal']), move_cube.Pose.from_dict(observation['achieved_goal']), info["difficulty"], )