def test_dataset_shuffle(self):
     run_length = 10
     dataset = Dataset()
     for run_index in range(3):
         for step_index in range(run_length + run_index):
             dataset.append(
                 Experience(
                     observation=torch.as_tensor((len(dataset), )),
                     action=torch.as_tensor((len(dataset), )),
                     reward=torch.as_tensor((0, )),
                     done=torch.as_tensor(
                         (0, )) if step_index != run_length + run_index - 1
                     else torch.as_tensor((1, ))))
     self.assertEqual(dataset.observations[0].item(), 0)
     dataset.shuffle()
     self.assertEqual(dataset.observations[0], dataset.actions[0])
     self.assertNotEqual(dataset.observations[0].item(), 0)
    def _clean(self, filename_tag: str, runs: List[str]) -> None:
        total_data_points = 0
        filename_index = 0
        hdf5_data = Dataset()
        for run in tqdm(runs):
            if self._config.require_success:
                if not os.path.isfile(os.path.join(run, 'Success')):
                    continue
            # load data in dataset in input size
            run_dataset = self._data_loader.load_dataset_from_directories(
                [run])
            if len(run_dataset) <= self._config.remove_first_n_timestamps:
                continue
            # remove first N frames
            for _ in range(self._config.remove_first_n_timestamps):
                run_dataset.pop()
            # subsample
            run_dataset.subsample(self._config.data_loader_config.subsample)
            # enforce max run length
            if self._config.max_run_length != -1:
                run_dataset.clip(self._config.max_run_length)
                assert len(run_dataset) <= self._config.max_run_length
            # augment with background noise and change target to binary map

            binary_maps = parse_binary_maps(run_dataset.observations, invert=self._config.invert_binary_maps) \
                if self._config.augment_background_noise != 0 or self._config.augment_background_textured != 0 else None
            if self._config.binary_maps_as_target:
                run_dataset = set_binary_maps_as_target(
                    run_dataset,
                    invert=self._config.invert_binary_maps,
                    binary_images=binary_maps,
                    smoothen_labels=self._config.smoothen_labels)

            if self._config.augment_background_noise != 0:
                run_dataset = augment_background_noise(
                    run_dataset,
                    p=self._config.augment_background_noise,
                    binary_images=binary_maps)
            if self._config.augment_background_textured != 0:
                run_dataset = augment_background_textured(
                    run_dataset,
                    texture_directory=self._config.texture_directory,
                    p=self._config.augment_background_textured,
                    p_empty=self._config.augment_empty_images,
                    binary_images=binary_maps)
            # store dhf5 file once max dataset size is reached
            hdf5_data.extend(run_dataset)
            self._data_loader.empty_dataset()
            if hdf5_data.get_memory_size() > self._config.max_hdf5_size:
                if self._config.shuffle:
                    hdf5_data.shuffle()
                create_hdf5_file_from_dataset(filename=os.path.join(
                    self._config.output_path,
                    f'{filename_tag}_{filename_index}.hdf5'),
                                              dataset=hdf5_data)
                filename_index += 1
                total_data_points += len(hdf5_data)
                hdf5_data = Dataset()
        if len(hdf5_data) != 0:
            if self._config.shuffle:
                hdf5_data.shuffle()
            create_hdf5_file_from_dataset(filename=os.path.join(
                self._config.output_path,
                f'{filename_tag}_{filename_index}.hdf5'),
                                          dataset=hdf5_data)
            total_data_points += len(hdf5_data)
        print(f'Total data points: {total_data_points}')