def test_buffer_performance(self):
        small_buffer = Buffer()
        small_buffer.add_many(obs=np.zeros([1000, 84, 84, 3], dtype=np.uint8))

        buffer = Buffer()

        t = Timing()

        with t.timeit('add'):
            for i in range(100):
                buffer.add_buff(small_buffer)

        huge_buffer = Buffer()
        with t.timeit('add_huge'):
            huge_buffer.add_buff(buffer)
            huge_buffer.add_buff(buffer)

        with t.timeit('single_add_small'):
            huge_buffer.add_buff(small_buffer)

        with t.timeit('clear_and_add'):
            huge_buffer.clear()
            huge_buffer.add_buff(buffer)
            huge_buffer.add_buff(buffer)

        with t.timeit('shuffle_and_add'):
            huge_buffer.clear()
            huge_buffer.add_buff(buffer)
            huge_buffer.add_buff(small_buffer)
            with t.timeit('shuffle'):
                huge_buffer.shuffle_data()

        log.debug('Timing: %s', t)
    def test_buffer_shuffle(self):
        b = Buffer()
        b.add_many(a=np.arange(10000), b=np.arange(10000))

        for i in range(5):
            self.assertTrue(np.array_equal(b.a, b.b))
            b.shuffle_data()
    def test_buffer(self):
        buff = Buffer()

        buff.add(a=1, b='b', c=None, d=3.14)
        self.assertEqual(len(buff), 1)
        self.assertGreaterEqual(buff._capacity, 1)

        self.assertEqual(buff.a[0], 1)
        self.assertEqual(buff.b[0], 'b')

        buff.add_many(a=[2, 3], b=['c', 'd'], c=[None, list()], d=[2.71, 1.62])
        self.assertEqual(len(buff), 3)
        self.assertGreaterEqual(buff._capacity, 3)

        self.assertTrue(np.array_equal(buff.a, [1, 2, 3]))
        self.assertTrue(np.array_equal(buff.b, ['b', 'c', 'd']))

        buff.trim_at(5)
        self.assertTrue(np.array_equal(buff.a, [1, 2, 3]))

        buff.trim_at(2)
        self.assertTrue(np.array_equal(buff.a, [1, 2]))

        buff.add_many(a=[2, 3], b=['c', 'd'], c=[None, list()], d=[2.71, 1.62])

        buff.shuffle_data()
        buff.shuffle_data()
        buff.shuffle_data()

        buff.trim_at(1)
        self.assertIn(buff.a[0], [1, 2, 3])

        self.assertEqual(len(buff), 1)
        self.assertGreaterEqual(buff._capacity, 4)

        buff_temp = Buffer()
        buff_temp.add(a=10, b='e', c=dict(), d=9.81)

        buff.add_buff(buff_temp)

        self.assertEqual(len(buff), 2)

        buff.clear()
        self.assertEqual(len(buff), 0)
Esempio n. 4
0
class DistanceBuffer:
    """Training data for the distance network (observation pairs and labels)."""
    def __init__(self, params):
        self.buffer = Buffer()
        self.close_buff, self.far_buff = Buffer(), Buffer()
        self.batch_num = 0

        self._vis_dirs = deque([])

        self.num_trajectories_to_process = 20
        self.complete_trajectories = deque([])

        self.params = params

    def extract_data(self, trajectories):
        timing = Timing()

        if len(self.buffer) > self.params.distance_target_buffer_size:
            # already enough data
            return

        close, far = self.params.close_threshold, self.params.far_threshold

        num_close, num_far = 0, 0
        data_added = 0

        with timing.timeit('trajectories'):
            for trajectory in trajectories:
                check_tmax = isinstance(trajectory, TmaxTrajectory)

                obs = trajectory.obs

                indices = list(range(len(trajectory)))
                np.random.shuffle(indices)

                for i in indices:
                    if len(self.buffer
                           ) > self.params.distance_target_buffer_size // 2:
                        if data_added > self.params.distance_target_buffer_size // 4:  # to limit memory usage
                            break

                    if len(self.buffer
                           ) > self.params.distance_target_buffer_size:
                        break

                    close_i = min(i + close, len(trajectory))
                    far_i = min(i + far, len(trajectory))

                    # sample close observation pair
                    first_idx = i
                    second_idx = np.random.randint(i, close_i)

                    # in TMAX we do some additional checks
                    add_close = True
                    if check_tmax:
                        both_frames_random = trajectory.is_random[
                            first_idx] and trajectory.is_random[second_idx]
                        first_exploration = trajectory.mode[
                            first_idx] == TmaxMode.EXPLORATION
                        second_exploration = trajectory.mode[
                            second_idx] == TmaxMode.EXPLORATION
                        if both_frames_random or (first_exploration
                                                  and second_exploration):
                            add_close = True
                        else:
                            add_close = False

                    if add_close:
                        if self.params.distance_symmetric and random.random(
                        ) < 0.5:
                            first_idx, second_idx = second_idx, first_idx

                        self.buffer.add(obs_first=obs[first_idx],
                                        obs_second=obs[second_idx],
                                        labels=0)
                        data_added += 1
                        num_close += 1

                    # sample far observation pair
                    if far_i < len(trajectory):
                        first_idx = i
                        second_idx = np.random.randint(far_i, len(trajectory))

                        add_far = True
                        if check_tmax:
                            both_frames_random = trajectory.is_random[
                                first_idx] and trajectory.is_random[second_idx]
                            first_exploration = trajectory.mode[
                                first_idx] == TmaxMode.EXPLORATION
                            second_exploration = trajectory.mode[
                                second_idx] == TmaxMode.EXPLORATION
                            if both_frames_random or (first_exploration
                                                      and second_exploration):
                                add_far = True
                            else:
                                add_far = False

                        if add_far:
                            if self.params.distance_symmetric and random.random(
                            ) < 0.5:
                                first_idx, second_idx = second_idx, first_idx

                            self.buffer.add(obs_first=obs[first_idx],
                                            obs_second=obs[second_idx],
                                            labels=1)
                            data_added += 1
                            num_far += 1

        with timing.timeit('finalize'):
            self.buffer.trim_at(self.params.distance_target_buffer_size)

        if self.batch_num % 20 == 0:
            with timing.timeit('visualize'):
                self._visualize_data()

        self.batch_num += 1
        log.info('num close %d, num far %d, distance net timing %s', num_close,
                 num_far, timing)

    def has_enough_data(self):
        len_data, min_data = len(
            self.buffer), self.params.distance_target_buffer_size // 3
        if len_data < min_data:
            log.info('Need to gather more data to train distance net, %d/%d',
                     len_data, min_data)
            return False
        return True

    def shuffle_data(self):
        self.buffer.shuffle_data()

    def reset(self):
        self.buffer.clear()

    def _visualize_data(self):
        min_vis = 10
        if len(self.buffer) < min_vis:
            return

        close_examples, far_examples = [], []
        labels = self.buffer.labels
        obs_first, obs_second = self.buffer.obs_first, self.buffer.obs_second

        for i in range(len(labels)):
            if labels[i] == 0 and len(close_examples) < min_vis:
                close_examples.append((obs_first[i], obs_second[i]))
            elif labels[i] == 1 and len(far_examples) < min_vis:
                far_examples.append((obs_first[i], obs_second[i]))

        if len(close_examples) < min_vis or len(far_examples) < min_vis:
            return

        img_folder = vis_dir(self.params.experiment_dir())
        img_folder = ensure_dir_exists(join(img_folder, 'dist'))
        img_folder = ensure_dir_exists(join(img_folder, f'dist_{time.time()}'))

        def save_images(examples, close_or_far):
            for visualize_i in range(len(examples)):
                img_first_name = join(
                    img_folder, f'{close_or_far}_{visualize_i}_first.png')
                img_second_name = join(
                    img_folder, f'{close_or_far}_{visualize_i}_second.png')
                cv2.imwrite(img_first_name, examples[visualize_i][0])
                cv2.imwrite(img_second_name, examples[visualize_i][1])

        save_images(close_examples, 'close')
        save_images(far_examples, 'far')

        self._vis_dirs.append(img_folder)
        while len(self._vis_dirs) > 20:
            dir_name = self._vis_dirs.popleft()
            if os.path.isdir(dir_name):
                shutil.rmtree(dir_name)
class LocomotionBuffer:
    """
    Training data for the hindsight experience replay (for locomotion policy).
    """
    def __init__(self, params):
        self.params = params
        self.batch_num = 0
        self.buffer = Buffer()
        self._vis_dirs = deque([])

    def extract_data(self, trajectories):
        timing = Timing()

        if len(trajectories) <= 0:
            return

        if len(self.buffer) > self.params.locomotion_experience_replay_buffer:
            return

        with timing.timeit('trajectories'):
            max_trajectory = self.params.locomotion_max_trajectory

            data_so_far = 0

            trajectories = [
                t for t in trajectories
                if len(t) > self.params.locomotion_max_trajectory
            ]

            # train only on random frames
            random_frames = [[
                i for i, is_random in enumerate(t.is_random) if is_random
            ] for t in trajectories]

            total_experience = sum(len(frames) for frames in random_frames)
            max_total_experience = 0.75 * total_experience  # max fraction of experience to use
            max_num_segments = int(max_total_experience / max_trajectory)

            log.info(
                '%d total experience from %d trajectories (%d segments)',
                max_total_experience,
                len(trajectories),
                max_num_segments,
            )

            attempts = 0

            while data_so_far < max_total_experience:
                attempts += 1
                if attempts > 100 * max_total_experience:  # just in case
                    break

                trajectory_idx = random.choice(range(len(trajectories)))
                trajectory = trajectories[trajectory_idx]
                if len(random_frames[trajectory_idx]) <= max_trajectory:
                    continue

                first_random_frame = random_frames[trajectory_idx][0]
                if len(trajectory) - first_random_frame < max_trajectory:
                    continue

                # sample random interval in trajectory, treat the last frame as "imaginary" goal, use actions as
                # ground truth
                start_idx = random.randint(first_random_frame,
                                           len(trajectory) - 2)
                goal_idx = min(start_idx + max_trajectory, len(trajectory) - 1)
                assert start_idx < goal_idx

                if not trajectory.is_random[start_idx]:
                    continue
                if not trajectory.is_random[goal_idx]:
                    continue

                for i in range(start_idx, goal_idx):
                    if not trajectory.is_random[i]:
                        continue

                    assert 0 < goal_idx - i <= max_trajectory
                    self.buffer.add(
                        obs_prev=trajectory.obs[max(0, i - 1)],
                        obs_curr=trajectory.obs[i],
                        obs_goal=trajectory.obs[goal_idx],
                        actions=trajectory.actions[i],
                        mode=trajectory.mode[i],
                        diff=goal_idx - i,
                    )
                    data_so_far += 1

                if len(self.buffer
                       ) > self.params.locomotion_experience_replay_buffer:
                    break

        # if self.batch_num % 10 == 0:
        #     with timing.timeit('vis'):
        #         self._visualize_data(training_data)

        # with timing.timeit('finalize'):
        #     for traj_buffer in training_data:
        #         self.buffer.add_buff(traj_buffer)

        # self.shuffle_data()
        # self.buffer.trim_at(self.params.locomotion_experience_replay_buffer)

        self.batch_num += 1
        log.info('Locomotion, buffer size: %d, timing: %s', len(self.buffer),
                 timing)

    def has_enough_data(self):
        len_data, min_data = len(
            self.buffer), self.params.locomotion_experience_replay_buffer // 3
        if len_data < min_data:
            log.info('Need to gather more data to train locomotion net, %d/%d',
                     len_data, min_data)
            return False
        return True

    def shuffle_data(self):
        permutation = self.buffer.shuffle_data(return_permutation=True)
        return permutation

    def reset(self):
        self.buffer.clear()