Beispiel #1
0
def generate_test_set(levels: typing.List[int], samples_per_level: int,
                      logfile_tmpl: str) -> typing.List[TestSample]:
    """Generate random test set for policy evaluation.

    For each difficulty level a list of samples, consisting of randomized
    initial pose and goal pose, is generated.

    Args:
        levels:  List of difficulty levels.
        samples_per_level:  How many samples are generated per level.
        logfile_tmpl:  Format string for the log file associated with this
            sample.  Needs to contain placeholders "{level}" and "{iteration}".

    Returns:
        List of ``len(levels) * samples_per_level`` test samples.
    """
    samples = []
    for level in levels:
        for i in range(samples_per_level):
            init = move_cube.sample_goal(-1)
            goal = move_cube.sample_goal(level)
            logfile = logfile_tmpl.format(level=level, iteration=i)

            samples.append(
                TestSample(level, i, init.to_json(), goal.to_json(), logfile))

    return samples
    def get_initial_state(self):
        """Get a random initial object pose (always on the ground)."""
        init = move_cube.sample_goal(difficulty=-1)
        init.position[:2] = sample_uniform_from_circle(0.07)
        if self.difficulty == 4:
            self.goal = move_cube.sample_goal(self.difficulty)

            # sample orientation
            projected_goal_ori = project_cube_xy_plane(self.goal.orientation)
            z_rot_noise = Rotation.from_euler('z', (np.pi / 2 * 0.70) *
                                              random.random())
            init.orientation = (
                z_rot_noise *
                Rotation.from_quat(projected_goal_ori)).as_quat()
        return init
Beispiel #3
0
    def _reset_direct_simulation(self):
        """Reset direct simulation.

        With this the env can be used without backend.
        """
        # set this to false to disable pyBullet's simulation
        visualization = True

        # reset simulation
        del self.platform

        # initialize simulation
        initial_object_pose = move_cube.sample_goal(difficulty=-1)
        self.platform = trifinger_simulation.TriFingerPlatform(
            visualization=visualization,
            initial_object_pose=initial_object_pose,
        )

        # visualize the goal
        if visualization:
            self.goal_marker = trifinger_simulation.visual_objects.CubeMarker(
                width=0.065,
                position=self.goal["position"],
                orientation=self.goal["orientation"],
                physicsClientId=self.platform.simfinger._pybullet_client_id,
            )
def compute_reward(logdir, simulation):
    with open(os.path.join(logdir, "goal.json"), 'r') as f:
        goal = json.load(f)
    difficulty = goal['difficulty']
    goal_pose = move_cube.Pose(position=np.array(goal['goal']['position']),
                               orientation=np.array(
                                   goal['goal']['orientation']))

    if (simulation == 0):
        log = robot_fingers.TriFingerPlatformLog(
            os.path.join(logdir, "robot_data.dat"),
            os.path.join(logdir, "camera_data.dat"))
        reward = 0.0
        for t in range(log.get_first_timeindex(),
                       log.get_last_timeindex() + 1):
            camera_observation = log.get_camera_observation(t)
            reward -= evaluate_state(goal_pose,
                                     camera_observation.filtered_object_pose,
                                     difficulty)
        return reward, True
    else:
        path = os.path.join(logdir, 'observations.pkl')
        with open(path, 'rb') as handle:
            observations = pkl.load(handle)
        reward = 0.0
        ex_state = move_cube.sample_goal(difficulty=-1)
        for i in range(len(observations)):
            ex_state.position = observations[i]["achieved_goal"]["position"]
            ex_state.orientation = observations[i]["achieved_goal"][
                "orientation"]
            reward -= evaluate_state(goal_pose, ex_state, difficulty)
        return reward, True
Beispiel #5
0
    def _reset_direct_simulation(self):
        """Reset direct simulation.

        With this the env can be used without backend.
        """

        # reset simulation
        del self.platform

        # initialize simulation
        if self.initializer is None:
            initial_object_pose = move_cube.sample_goal(difficulty=-1)
        else:
            initial_object_pose = self.initializer.get_initial_state()
            self.goal = self.initializer.get_goal().to_dict()
        self.platform = trifinger_simulation.TriFingerPlatform(
            visualization=self.visualization,
            initial_object_pose=initial_object_pose,
        )
        # use mass of real cube
        p.changeDynamics(
            bodyUniqueId=self.platform.cube.block,
            linkIndex=-1,
            physicsClientId=self.platform.simfinger._pybullet_client_id,
            mass=CUBOID_MASS)
        # visualize the goal
        if self.visualization:
            self.goal_marker = trifinger_simulation.visual_objects.CuboidMarker(
                size=CUBOID_SIZE,
                position=self.goal["position"],
                orientation=self.goal["orientation"],
                pybullet_client_id=self.platform.simfinger._pybullet_client_id,
            )
Beispiel #6
0
    def _reset_direct_simulation(self):
        """Reset direct simulation.

        With this the env can be used without backend.
        """

        # reset simulation
        del self.platform

        # initialize simulation
        if self.initializer is None:
            initial_object_pose = move_cube.sample_goal(difficulty=-1)
        else:
            initial_object_pose = self.initializer.get_initial_state()
            self.goal = self.initializer.get_goal()
        self.platform = trifinger_simulation.TriFingerPlatform(
            visualization=self.visualization,
            initial_object_pose=initial_object_pose,
        )
        # visualize the goal
        if self.visualization:
            self.goal_marker = trifinger_simulation.visual_objects.CubeMarker(
                width=0.065,
                position=self.goal["position"],
                orientation=self.goal["orientation"],
                physicsClientId=self.platform.simfinger._pybullet_client_id,
            )
 def get_initial_state(self):
     """Get the initial position based on the information from the file. Do not read z coordinate to ensure
     that this position is on the ground plane"""
     ex_state = move_cube.sample_goal(difficulty=-1)
     ex_state.position = self.init_information[
         0:2]  # read only two values, z-coordinate is unchanged
     ex_state.orientation = self.init_information[3:7]
     return ex_state
    def reset(self):
        # reset simulation
        del self.platform

        # initialize simulation
        if self.initializer is None:
            # if no initializer is given (which will be the case during training),
            # we can initialize in any way desired. here, we initialize the cube always
            # in the center of the arena, instead of randomly, as this appears to help
            # training
            initial_robot_position = (
                TriFingerPlatform.spaces.robot_position.default
            )
            default_object_position = (
                TriFingerPlatform.spaces.object_position.default
            )
            default_object_orientation = (
                TriFingerPlatform.spaces.object_orientation.default
            )
            initial_object_pose = move_cube.Pose(
                position=default_object_position,
                orientation=default_object_orientation,
            )
            goal_object_pose = move_cube.sample_goal(difficulty=1)
        else:
            # if an initializer is given, i.e. during evaluation, we need to initialize
            # according to it, to make sure we remain coherent with the standard CubeEnv.
            # otherwise the trajectories produced during evaluation will be invalid.
            initial_robot_position = (
                TriFingerPlatform.spaces.robot_position.default
            )
            initial_object_pose = self.initializer.get_initial_state()
            goal_object_pose = self.initializer.get_goal()

        self.platform = TriFingerPlatform(
            visualization=self.visualization,
            initial_robot_position=initial_robot_position,
            initial_object_pose=initial_object_pose,
        )

        self.goal = {
            "position": goal_object_pose.position,
            "orientation": goal_object_pose.orientation,
        }
        # visualize the goal
        if self.visualization:
            self.goal_marker = visual_objects.CubeMarker(
                width=0.065,
                position=goal_object_pose.position,
                orientation=goal_object_pose.orientation,
                pybullet_client_id=self.platform.simfinger._pybullet_client_id,
            )

        self.info = dict()

        self.step_count = 0

        return self._create_observation(0)
 def get_goal(self):
     """Get a random goal depending on the difficulty."""
     ori_error = 100000  # some large value
     while ori_error < np.pi/2 * 0.1 or \
           ori_error > self.orientation_error_threshold:
         goal = move_cube.sample_goal(difficulty=4)
         # goal.position[:2] = self.init.position[:2]  # TEMP: align x and y
         ori_error = self._weighted_orientation_error(goal)
         # pos_error = self._weighted_position_error(goal)
     return goal
def compute_reward_adaptive_behind(logdir, simulation, TOTALTIMESTEPS):
    with open(os.path.join(logdir, "goal.json"), 'r') as f:
        goal = json.load(f)
    difficulty = goal['difficulty']
    goal_pose = move_cube.Pose(position=np.array(goal['goal']['position']),
                               orientation=np.array(
                                   goal['goal']['orientation']))

    indice = goal['reachstart']
    if (indice == -1):
        return -10000, False

    if (simulation == 0):
        min_length = TOTALTIMESTEPS
        log = robot_fingers.TriFingerPlatformLog(
            os.path.join(logdir, "robot_data.dat"),
            os.path.join(logdir, "camera_data.dat"))
        reward = 0.0
        count = 0
        for t in range(log.get_last_timeindex() - TOTALTIMESTEPS,
                       log.get_last_timeindex() + 1):
            count += 1
            camera_observation = log.get_camera_observation(t)
            reward -= evaluate_state(goal_pose,
                                     camera_observation.filtered_object_pose,
                                     difficulty)
            if (count == min_length):
                break
        if (count == min_length):
            return reward, True
        else:
            return -10000, False

    else:
        min_length = TOTALTIMESTEPS
        path = os.path.join(logdir, 'observations.pkl')
        with open(path, 'rb') as handle:
            observations = pkl.load(handle)
        reward = 0.0
        count = 0
        ex_state = move_cube.sample_goal(difficulty=-1)
        for i in range(
                len(observations) - TOTALTIMESTEPS - 1, len(observations)):
            count += 1
            ex_state.position = observations[i]["achieved_goal"]["position"]
            ex_state.orientation = observations[i]["achieved_goal"][
                "orientation"]
            reward -= evaluate_state(goal_pose, ex_state, difficulty)
            if (count == min_length):
                break
        if (count == min_length):
            return reward, True
        else:
            return -10000, False
 def get_goal(self):
     """Get a random goal depending on the difficulty."""
     if self.difficulty == 4:
         if self.goal is None:
             raise ValueError(
                 "Goal is unset. Call get_initial_state before "
                 "get_goal.")
         else:
             goal = self.goal
             self.goal = None
     else:
         goal = move_cube.sample_goal(difficulty=self.difficulty)
     return goal
def test_sample_goal_difficulty_4_no_initial_pose():
    for i in range(1000):
        goal = move_cube.sample_goal(difficulty=4)
        # verify the goal is valid (i.e. within the allowed ranges)
        try:
            move_cube.validate_goal(goal)
        except move_cube.InvalidGoalError as e:
            pytest.fail(msg="Invalid goal: {}  pose is {}, {}".format(
                e, e.position, e.orientation), )

        # verify the goal satisfies conditions of difficulty 2
        assert goal.position[2] <= move_cube._max_height
        assert goal.position[2] >= move_cube._min_height
def sample_goal() -> Trajectory:
    """Sample a goal trajectory with random steps.

    The number of goals in the trajectory is depending on the episode length
    (see :data:`EPISODE_LENGTH`).  The first goal has a duration of
    :data:`FIRST_GOAL_DURATION` steps, the following ones a duration of
    :data:`GOAL_DURATION`.

    Returns:
        Trajectory as a list of tuples ``(t, position)`` where ``t`` marks the
        time step from which the goal is active.
    """
    trajectory = []

    first_goal = move_cube.sample_goal(GOAL_DIFFICULTY)
    trajectory.append((0, first_goal.position))
    t = FIRST_GOAL_DURATION

    while t < EPISODE_LENGTH:
        goal = move_cube.sample_goal(GOAL_DIFFICULTY)
        trajectory.append((t, goal.position))
        t += GOAL_DURATION

    return trajectory
Beispiel #14
0
def main():
    difficulty = int(sys.argv[1])
    if len(sys.argv) == 3:
        goal_pose_json = sys.argv[2]
    else:
        goal_pose = move_cube.sample_goal(difficulty)
        goal_pose_json = json.dumps({
            'position':
            goal_pose.position.tolist(),
            'orientation':
            goal_pose.orientation.tolist()
        })

    env = _init_env(goal_pose_json, difficulty)
    state_machine = TriFingerStateMachine(env)
    state_machine.run()
def test_sample_goal_difficulty_1_no_initial_pose():
    for i in range(1000):
        goal = move_cube.sample_goal(difficulty=1)
        # verify the goal is valid (i.e. within the allowed ranges)
        try:
            move_cube.validate_goal(goal)
        except move_cube.InvalidGoalError as e:
            pytest.fail(msg="Invalid goal: {}  pose is {}, {}".format(
                e, e.position, e.orientation), )

        # verify the goal satisfies conditions of difficulty 1
        # always on ground
        assert goal.position[2] == (move_cube._CUBE_WIDTH / 2)

        # no orientation
        np.testing.assert_array_equal(goal.orientation, [0, 0, 0, 1])
Beispiel #16
0
    def test_sample_goal_difficulty_4(self):
        for i in range(1000):
            goal = move_cube.sample_goal(difficulty=4)
            # verify the goal is valid (i.e. within the allowed ranges)
            try:
                move_cube.validate_goal(goal)
            except move_cube.InvalidGoalError as e:
                self.fail(
                    msg="Invalid goal: {}  pose is {}, {}".format(
                        e, e.position, e.orientation
                    ),
                )

            # verify the goal satisfies conditions of difficulty 2
            self.assertLessEqual(goal.position[2], move_cube._max_height)
            self.assertGreaterEqual(goal.position[2], move_cube._min_height)
    def __init__(self, difficulty):
        """Initialize.
        Args:
            difficulty (int):  Difficulty level for sampling goals.
        """
        from code.align_rotation import project_cube_xy_plane
        self.difficulty = difficulty
        self._goal = move_cube.sample_goal(difficulty=self.difficulty)
        self._goal.position[:2] = sample_uniform_from_circle(0.05)  # centered
        self._goal.position[2] = min(self._goal.position[2], 0.2)

        projected_ori = project_cube_xy_plane(self._goal.orientation)
        projected_pos = self._goal.position * np.array([1, 1, 0]) + np.array(
            [0, 0, 0.015])
        self._init = move_cube.Pose.from_dict({
            'position': projected_pos,
            'orientation': projected_ori
        })
Beispiel #18
0
    def set_goal(self, pos=None, orientation=None, log_timestep=True):
        ex_state = move_cube.sample_goal(
            difficulty=-1)  # ensures that on the ground
        if not (pos is None):
            self.goal["position"] = ex_state.position
            self.goal["position"][:3] = pos[:3]
        if not (orientation is None):
            self.goal["orientation"] = ex_state.orientation
            self.goal["orientation"] = orientation

        if (self.visualization):
            self.cube_viz.goal_viz = None

        if (log_timestep):
            if (self.simulation):
                self.change_goal_last = self.platform.get_current_timeindex()
            else:
                self.change_goal_last = self.real_platform.get_current_timeindex(
                )
Beispiel #19
0
    def test_sample_goal_difficulty_1(self):
        for i in range(1000):
            goal = move_cube.sample_goal(difficulty=1)
            # verify the goal is valid (i.e. within the allowed ranges)
            try:
                move_cube.validate_goal(goal)
            except move_cube.InvalidGoalError as e:
                self.fail(
                    msg="Invalid goal: {}  pose is {}, {}".format(
                        e, e.position, e.orientation
                    ),
                )

            # verify the goal satisfies conditions of difficulty 1
            # always on ground
            self.assertEqual(goal.position[2], move_cube._CUBOID_HALF_SIZE[2])

            # no orientation
            np.testing.assert_array_equal(goal.orientation, [0, 0, 0, 1])
def main():
    parser = argparse.ArgumentParser('args')
    parser.add_argument('difficulty', type=int)
    parser.add_argument(
        'method',
        type=str,
        help="The method to run. One of 'mp-pg', 'cic-cg', 'cpc-tg'")
    parser.add_argument(
        '--residual',
        default=False,
        action='store_true',
        help=
        "add to use residual policies. Only compatible with difficulties 3 and 4."
    )
    parser.add_argument('--bo',
                        default=False,
                        action='store_true',
                        help="add to use BO optimized parameters.")
    args = parser.parse_args()
    goal_pose = move_cube.sample_goal(args.difficulty)
    goal_pose_dict = {
        'position': goal_pose.position.tolist(),
        'orientation': goal_pose.orientation.tolist()
    }

    env = _init_env(goal_pose_dict, args.difficulty)
    state_machine = create_state_machine(args.difficulty, args.method, env,
                                         args.residual, args.bo)

    #####################
    # Run state machine
    #####################
    obs = env.reset()
    state_machine.reset()

    done = False
    while not done:
        action = state_machine(obs)
        obs, _, done, _ = env.step(action)
Beispiel #21
0
        goal_rot = Rotation.from_quat(self.obs['goal_object_orientation'])

        y_axis = [0, 1, 0]
        goal_direction_vector = goal_rot.apply(y_axis)
        actual_direction_vector = actual_rot.apply(y_axis)

        quat = get_rotation_between_vecs(actual_direction_vector,
                                         goal_direction_vector)
        return (Rotation.from_quat(quat) * actual_rot).as_quat()


if __name__ == '__main__':
    from code.make_env import make_training_env
    from trifinger_simulation.tasks import move_cube
    import dl
    seed = 0
    dl.rng.seed(seed)

    env = make_training_env(move_cube.sample_goal(-1).to_dict(), 4,
                            reward_fn='competition_reward',
                            termination_fn='position_close_to_goal',
                            initializer='centered_init',
                            action_space='torque_and_position',
                            sim=True,
                            visualization=True,
                            episode_length=10000,
                            rank=seed)

    state_machine = TriFingerStateMachine(env)
    state_machine.run()
def main(args):
    goal = move_cube.sample_goal(args.difficulty)
    goal_dict = {'position': goal.position, 'orientation': goal.orientation}
    eval_config = {
        'cube_goal_pose': goal_dict,
        'goal_difficulty': args.difficulty,
        'action_space':
        'torque' if args.policy == 'fc' else 'torque_and_position',
        'frameskip': 3,
        'reward_fn': 'competition_reward',
        'termination_fn': 'no_termination',
        'initializer': 'training_init',
        'sim': True,
        'monitor': False,
        'rank': args.seed,
        'training': True
    }
    env = make_training_env(visualization=True, **eval_config)

    acc_rewards = []
    wallclock_times = []
    aligning_steps = []
    env_steps = []
    avg_rewards = []
    for i in range(args.num_episodes):
        start = time.time()
        is_done = False
        observation = env.reset()
        accumulated_reward = 0
        aligning_steps.append(env.unwrapped.step_count)

        #clear some windows in GUI
        p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0)
        #change camera parameters # You can also rotate the camera by CTRL + drag
        p.resetDebugVisualizerCamera(cameraDistance=0.6,
                                     cameraYaw=0,
                                     cameraPitch=-40,
                                     cameraTargetPosition=[0, 0, 0])

        step = 0
        while not is_done and step < 2000:
            step += 1
            action = env.action_space.sample()
            if isinstance(action, dict):
                action['torque'] *= 0
                action['position'] *= 0
            else:
                action *= 0
            observation, reward, is_done, info = env.step(action)
            accumulated_reward += reward
        acc_rewards.append(accumulated_reward)
        env_steps.append(env.unwrapped.step_count - aligning_steps[-1])
        avg_rewards.append(accumulated_reward / env_steps[-1])
        print("Episode {}\tAccumulated reward: {}".format(
            i, accumulated_reward))
        print("Episode {}\tAlinging steps: {}".format(i, aligning_steps[-1]))
        print("Episode {}\tEnv steps: {}".format(i, env_steps[-1]))
        print("Episode {}\tAvg reward: {}".format(i, avg_rewards[-1]))
        end = time.time()
        print('Elapsed:', end - start)
        wallclock_times.append(end - start)

    env.close()

    def _print_stats(name, data):
        print('======================================')
        print(f'Mean   {name}\t{np.mean(data):.2f}')
        print(f'Max    {name}\t{max(data):.2f}')
        print(f'Min    {name}\t{min(data):.2f}')
        print(f'Median {name}\t{statistics.median(data):2f}')

    print('Total elapsed time\t{:.2f}'.format(sum(wallclock_times)))
    print('Mean elapsed time\t{:.2f}'.format(
        sum(wallclock_times) / len(wallclock_times)))
    _print_stats('acc reward', acc_rewards)
    _print_stats('aligning steps', aligning_steps)
    _print_stats('step reward', avg_rewards)
        if one_pose:
            cube_path = [cube_path]
        tip_path = _get_tip_path(grasp.cube_tip_pos, cube_path)
        path = Path(cube_path, joint_conf_path, tip_path, grasp)
        path.set_min_height(self.env, path_min_height)
        return path


if __name__ == '__main__':
    from trifinger_simulation.tasks import move_cube
    from env.make_env import make_env

    reward_fn = 'competition_reward'
    termination_fn = 'position_close_to_goal'
    initializer = 'small_rot_init'
    env = make_env(move_cube.sample_goal(-1).to_dict(),
                   4,
                   reward_fn=reward_fn,
                   termination_fn=termination_fn,
                   initializer=initializer,
                   action_space='position',
                   sim=True,
                   visualization=True)

    for i in range(1):
        obs = env.reset()

        pos = obs["object_position"]
        quat = obs["object_orientation"]
        goal_pos = obs["goal_object_position"]
        goal_quat = obs["goal_object_orientation"]
Beispiel #24
0
def make_training_env(nenv,
                      state_machine,
                      goal_difficulty,
                      action_space,
                      residual_state,
                      frameskip=1,
                      sim=True,
                      visualization=False,
                      reward_fn=None,
                      termination_fn=None,
                      initializer=None,
                      episode_length=100000,
                      monitor=False,
                      seed=0,
                      domain_randomization=False,
                      norm_observations=False,
                      max_torque=0.1):

    # dummy goal dict
    goal = move_cube.sample_goal(goal_difficulty)

    goal_dict = {'position': goal.position, 'orientation': goal.orientation}

    def _env(rank):
        def _thunk():
            env = make_env(
                cube_goal_pose=goal_dict,
                goal_difficulty=goal_difficulty,
                action_space=action_space,
                frameskip=frameskip,
                sim=sim,
                visualization=visualization,
                reward_fn=reward_fn,
                termination_fn=termination_fn,
                initializer=initializer,
                episode_length=10 *
                episode_length,  # make this long enough to ensure that we have "episode_length" steps in the residual_state.
                rank=rank,
                monitor=monitor,
            )
            if domain_randomization:
                env = RandomizedEnvWrapper(env)
            env = ResidualWrapper(env,
                                  state_machine,
                                  frameskip,
                                  max_torque,
                                  residual_state,
                                  max_length=episode_length)
            env = EpisodeInfo(env)
            env.seed(seed + rank)
            return env

        return _thunk

    if nenv > 1:
        env = SubprocVecEnv([_env(i) for i in range(nenv)], context='fork')
    else:
        env = DummyVecEnv([_env(0)])
        env.reward_range = env.envs[0].reward_range

    if norm_observations:
        env = VecObsNormWrapper(env)
        norm_params = torch.load(
            os.path.join(os.path.dirname(__file__), 'obs_norm.pt'))
        env.load_state_dict(norm_params)
        assert env.mean is not None
        assert env.std is not None
    return env
 def get_initial_state(self):
     """Get a random initial object pose (always on the ground)."""
     init = move_cube.sample_goal(difficulty=-1)
     init.position[:2] = sample_uniform_from_circle(0.10)
     return init
 def get_goal(self):
     """Get the goal position based on the information from the file."""
     ex_state = move_cube.sample_goal(difficulty=self.difficulty)
     ex_state.position = self.init_information[7:10]
     ex_state.orientation = self.init_information[10:14]
     return ex_state
 def get_goal(self):
     """Get a random goal depending on the difficulty."""
     return move_cube.sample_goal(difficulty=self.difficulty)
 def get_initial_state(self):
     """Get a random initial object pose (always on the ground)."""
     return move_cube.sample_goal(difficulty=-1)