def test_dataset_shuffle(self): run_length = 10 dataset = Dataset() for run_index in range(3): for step_index in range(run_length + run_index): dataset.append( Experience( observation=torch.as_tensor((len(dataset), )), action=torch.as_tensor((len(dataset), )), reward=torch.as_tensor((0, )), done=torch.as_tensor( (0, )) if step_index != run_length + run_index - 1 else torch.as_tensor((1, )))) self.assertEqual(dataset.observations[0].item(), 0) dataset.shuffle() self.assertEqual(dataset.observations[0], dataset.actions[0]) self.assertNotEqual(dataset.observations[0].item(), 0)
def _clean(self, filename_tag: str, runs: List[str]) -> None: total_data_points = 0 filename_index = 0 hdf5_data = Dataset() for run in tqdm(runs): if self._config.require_success: if not os.path.isfile(os.path.join(run, 'Success')): continue # load data in dataset in input size run_dataset = self._data_loader.load_dataset_from_directories( [run]) if len(run_dataset) <= self._config.remove_first_n_timestamps: continue # remove first N frames for _ in range(self._config.remove_first_n_timestamps): run_dataset.pop() # subsample run_dataset.subsample(self._config.data_loader_config.subsample) # enforce max run length if self._config.max_run_length != -1: run_dataset.clip(self._config.max_run_length) assert len(run_dataset) <= self._config.max_run_length # augment with background noise and change target to binary map binary_maps = parse_binary_maps(run_dataset.observations, invert=self._config.invert_binary_maps) \ if self._config.augment_background_noise != 0 or self._config.augment_background_textured != 0 else None if self._config.binary_maps_as_target: run_dataset = set_binary_maps_as_target( run_dataset, invert=self._config.invert_binary_maps, binary_images=binary_maps, smoothen_labels=self._config.smoothen_labels) if self._config.augment_background_noise != 0: run_dataset = augment_background_noise( run_dataset, p=self._config.augment_background_noise, binary_images=binary_maps) if self._config.augment_background_textured != 0: run_dataset = augment_background_textured( run_dataset, texture_directory=self._config.texture_directory, p=self._config.augment_background_textured, p_empty=self._config.augment_empty_images, binary_images=binary_maps) # store dhf5 file once max dataset size is reached hdf5_data.extend(run_dataset) self._data_loader.empty_dataset() if hdf5_data.get_memory_size() > self._config.max_hdf5_size: if self._config.shuffle: hdf5_data.shuffle() create_hdf5_file_from_dataset(filename=os.path.join( self._config.output_path, f'{filename_tag}_{filename_index}.hdf5'), dataset=hdf5_data) filename_index += 1 total_data_points += len(hdf5_data) hdf5_data = Dataset() if len(hdf5_data) != 0: if self._config.shuffle: hdf5_data.shuffle() create_hdf5_file_from_dataset(filename=os.path.join( self._config.output_path, f'{filename_tag}_{filename_index}.hdf5'), dataset=hdf5_data) total_data_points += len(hdf5_data) print(f'Total data points: {total_data_points}')