def test_create_dataset_and_clean(self): info = generate_random_dataset_in_raw_data(output_dir=self.output_dir, num_runs=20, input_size=(100, 100, 3), output_size=(1, ), continuous=True, store_hdf5=False) cleaner_config_dict = { 'output_path': self.output_dir, 'data_loader_config': { 'data_directories': info['episode_directories'], 'input_size': (150, 150, 1) }, 'training_validation_split': 0.7, } data_cleaner = DataCleaner(config=DataCleaningConfig().create( config_dict=cleaner_config_dict)) data_cleaner.clean() data_loader_train = DataLoader(config=DataLoaderConfig().create( config_dict={ 'output_path': self.output_dir, 'hdf5_files': glob(f'{self.output_dir}/train*.hdf5') })) data_loader_train.load_dataset() data_loader_validation = DataLoader(config=DataLoaderConfig().create( config_dict={ 'output_path': self.output_dir, 'hdf5_files': glob(f'{self.output_dir}/validation*.hdf5') })) data_loader_validation.load_dataset() ratio = len(data_loader_train.get_dataset()) / ( 0. + len(data_loader_train.get_dataset()) + len(data_loader_validation.get_dataset())) self.assertTrue(ratio > 0.6) self.assertTrue(ratio < 0.8)
def test_split_hdf5_chunks(self): info = generate_random_dataset_in_raw_data(output_dir=self.output_dir, num_runs=20, input_size=(100, 100, 3), output_size=(1, ), continuous=True, store_hdf5=False) cleaner_config_dict = { 'output_path': self.output_dir, 'data_loader_config': { 'data_directories': info['episode_directories'], }, 'training_validation_split': 1.0, 'max_hdf5_size': 5 * 10**6 } data_cleaner = DataCleaner(config=DataCleaningConfig().create( config_dict=cleaner_config_dict)) data_cleaner.clean() for hdf5_file in glob(f'{self.output_dir}/train*.hdf5'): data_loader = DataLoader(config=DataLoaderConfig().create( config_dict={ 'output_path': self.output_dir, 'hdf5_files': [hdf5_file] })) data_loader.load_dataset() self.assertTrue( data_loader.get_dataset().get_memory_size() < 6 * 10**6)
def test_create_hdf5_files_subsampled_in_time(self): num_runs = 10 split = 1.0 subsample = 3 config_dict = { 'output_path': self.output_dir, 'training_validation_split': split, 'store_hdf5': True, 'subsample_hdf5': subsample, 'separate_raw_data_runs': True } config = DataSaverConfig().create(config_dict=config_dict) self.data_saver = DataSaver(config=config) info = generate_dummy_dataset(self.data_saver, num_runs=num_runs) self.data_saver.create_train_validation_hdf5_files() config = DataLoaderConfig().create( config_dict={ 'output_path': self.output_dir, 'hdf5_files': [os.path.join(self.output_dir, 'train.hdf5')] }) training_data_loader = DataLoader(config=config) training_data_loader.load_dataset() training_data = training_data_loader.get_dataset() self.assertEqual( len(training_data), sum([ np.ceil((el - 1) / subsample) + 1 for el in info['episode_lengths'] ]))
def create_train_validation_hdf5_files( self, runs: List[str] = None, input_size: List[int] = None) -> None: all_runs = runs if runs is not None else self._get_runs() number_of_training_runs = int(self._config.training_validation_split * len(all_runs)) train_runs = all_runs[0:number_of_training_runs] validation_runs = all_runs[number_of_training_runs:] for file_name, runs in zip(['train', 'validation'], [train_runs, validation_runs]): config = DataLoaderConfig().create( config_dict={ 'data_directories': runs, 'output_path': self._config.output_path, 'subsample': self._config.subsample_hdf5, 'input_size': input_size }) data_loader = DataLoader(config=config) data_loader.load_dataset() create_hdf5_file_from_dataset(filename=os.path.join( self._config.output_path, file_name + '.hdf5'), dataset=data_loader.get_dataset()) cprint(f'created {file_name}.hdf5', self._logger)
def test_clip_first_x_frames(self): info = generate_random_dataset_in_raw_data(output_dir=self.output_dir, num_runs=20, input_size=(100, 100, 3), output_size=(1, ), continuous=True, store_hdf5=False) cleaner_config_dict = { 'output_path': self.output_dir, 'data_loader_config': { 'data_directories': info['episode_directories'], 'subsample': 2 }, 'training_validation_split': 1.0, 'remove_first_n_timestamps': 5, } data_cleaner = DataCleaner(config=DataCleaningConfig().create( config_dict=cleaner_config_dict)) data_cleaner.clean() data_loader = DataLoader(config=DataLoaderConfig().create( config_dict={ 'output_path': self.output_dir, 'hdf5_files': glob(f'{self.output_dir}/train*.hdf5') })) data_loader.load_dataset() self.assertEqual( sum(int((e - 5) / 2) + 1 for e in info['episode_lengths']), len(data_loader.get_dataset()))
def test_data_loader_from_raw_path_dirs(self): self.info = generate_dummy_dataset(self.data_saver, num_runs=20, input_size=(100, 100, 3), output_size=(3, ), continuous=False) config_dict = { 'data_directories': [self.output_dir], 'output_path': self.output_dir, } config = DataLoaderConfig().create(config_dict=config_dict) data_loader = DataLoader(config=config) data_loader.load_dataset() config = DataLoaderConfig().create(config_dict=config_dict) for d in config.data_directories: self.assertTrue(os.path.isdir(d))
def test_data_batch(self): self.info = generate_dummy_dataset(self.data_saver, num_runs=20, input_size=(100, 100, 3), output_size=(3, ), continuous=False) config_dict = { 'data_directories': self.info['episode_directories'], 'output_path': self.output_dir, 'random_seed': 1, 'batch_size': 3 } data_loader = DataLoader(config=DataLoaderConfig().create( config_dict=config_dict)) data_loader.load_dataset() for batch in data_loader.get_data_batch(): self.assertEqual(len(batch), config_dict['batch_size']) break
def test_data_loading(self): self.info = generate_dummy_dataset(self.data_saver, num_runs=20, input_size=(100, 100, 3), output_size=(3, ), continuous=False) config_dict = { 'data_directories': self.info['episode_directories'], 'output_path': self.output_dir, } config = DataLoaderConfig().create(config_dict=config_dict) data_loader = DataLoader(config=config) data_loader.load_dataset() # assert nothing is empty for k in ['observations', 'actions', 'rewards', 'done']: data = eval(f'data_loader.get_dataset().{k}') self.assertTrue(len(data) > 0) self.assertTrue(sum(data[0].shape) > 0)
def test_data_subsample(self): self.info = generate_dummy_dataset(self.data_saver, num_runs=20, input_size=(100, 100, 3), output_size=(3, ), continuous=False) subsample = 4 config_dict = { 'data_directories': self.info['episode_directories'], 'output_path': self.output_dir, 'random_seed': 1, 'batch_size': 3, 'subsample': subsample } data_loader = DataLoader(config=DataLoaderConfig().create( config_dict=config_dict)) data_loader.load_dataset() self.assertTrue( sum([ np.ceil((el - 1) / subsample) + 1 for el in self.info['episode_lengths'] ]), len(data_loader.get_dataset()))
def __init__(self, config: DataSaverConfig): self._config = config self._logger = get_logger( name=get_filename_without_extension(__file__), output_path=self._config.output_path, quiet=False) cprint(f'initiate', self._logger) if not self._config.saving_directory.startswith('/'): self._config.saving_directory = os.path.join( os.environ['HOME'], self._config.saving_directory) if self._config.store_on_ram_only: self._dataset = Dataset(max_size=self._config.max_size) # used to keep track of replay buffer size on file system if not self._config.store_on_ram_only \ and os.path.isdir(os.path.dirname(self._config.saving_directory)) \ and self._config.max_size != -1: data_loader = DataLoader(config=DataLoaderConfig().create( config_dict={ 'data_directories': [ os.path.join( os.path.dirname(self._config.saving_directory), run) for run in sorted( os.listdir( os.path.dirname( self._config.saving_directory))) ], 'output_path': self._config.output_path, 'store': False # don't store config })) data_loader.load_dataset() self._frame_counter = len(data_loader.get_dataset()) else: self._frame_counter = 0
def test_create_train_validation_hdf5_files(self): num_runs = 10 split = 0.7 config_dict = { 'output_path': self.output_dir, 'training_validation_split': split, 'store_hdf5': True, 'separate_raw_data_runs': True } config = DataSaverConfig().create(config_dict=config_dict) self.data_saver = DataSaver(config=config) info = generate_dummy_dataset(self.data_saver, num_runs=num_runs) self.data_saver.create_train_validation_hdf5_files() config = DataLoaderConfig().create( config_dict={ 'output_path': self.output_dir, 'hdf5_files': [os.path.join(self.output_dir, 'train.hdf5')] }) training_data_loader = DataLoader(config=config) training_data_loader.load_dataset() training_data = training_data_loader.get_dataset() config = DataLoaderConfig().create( config_dict={ 'output_path': self.output_dir, 'hdf5_files': [os.path.join(self.output_dir, 'validation.hdf5')] }) validation_data_loader = DataLoader(config=config) validation_data_loader.load_dataset() validation_data = validation_data_loader.get_dataset() self.assertEqual(len(training_data), sum(info['episode_lengths'][:int(split * num_runs)])) self.assertEqual(len(validation_data), sum(info['episode_lengths'][int(split * num_runs):]))
def test_line_world_augmentation(self): line_image = np.ones((100, 100, 3)) line_image[:, 40:43, 0:2] = 0 info = generate_random_dataset_in_raw_data( output_dir=self.output_dir, num_runs=20, input_size=(100, 100, 3), output_size=(1, ), continuous=True, fixed_input_value=line_image, store_hdf5=False) cleaner_config_dict = { 'output_path': self.output_dir, 'data_loader_config': { 'data_directories': info['episode_directories'], 'input_size': (1, 64, 64) }, 'training_validation_split': 0.7, 'remove_first_n_timestamps': 5, 'binary_maps_as_target': True, 'invert_binary_maps': True, 'augment_background_noise': 0.1, 'augment_background_textured': 0.9, 'texture_directory': 'textured_dataset', 'augment_empty_images': 0.1 } data_cleaner = DataCleaner(config=DataCleaningConfig().create( config_dict=cleaner_config_dict)) data_cleaner.clean() data_loader = DataLoader(config=DataLoaderConfig().create( config_dict={ 'output_path': self.output_dir, 'hdf5_files': glob(f'{self.output_dir}/train*.hdf5') })) data_loader.load_dataset() data_loader.get_dataset().plot()
def test_generate_random_dataset_in_raw_data(self): num_runs = 10 # generate network network = eval(architecture_base_config['architecture']).Net( config=ArchitectureConfig().create( config_dict=architecture_base_config)) # generate dummy dataset info = generate_random_dataset_in_raw_data( output_dir=self.output_dir, num_runs=num_runs, input_size=network.input_size, output_size=network.output_size, continuous=not network.discrete, ) data_loader_config = { 'output_path': self.output_dir, 'data_directories': info['episode_directories'], } data_loader = DataLoader(config=DataLoaderConfig().create( config_dict=data_loader_config)) data_loader.load_dataset() self.assertEqual(sum(d != 0 for d in data_loader.get_dataset().done), num_runs)
def test_generate_random_dataset_with_train_validation_hdf5(self): num_runs = 10 # generate network network = eval(architecture_base_config['architecture']).Net( config=ArchitectureConfig().create( config_dict=architecture_base_config)) # generate dummy dataset info = generate_random_dataset_in_raw_data( output_dir=self.output_dir, num_runs=num_runs, input_size=network.input_size, output_size=network.output_size, continuous=not network.discrete, store_hdf5=True) data_loader_config = { 'output_path': self.output_dir, 'hdf5_files': [os.path.join(self.output_dir, 'train.hdf5')] } data_loader = DataLoader(config=DataLoaderConfig().create( config_dict=data_loader_config)) data_loader.load_dataset() self.assertNotEqual( sum(d != 0 for d in data_loader.get_dataset().done), 0)
def test_sample_batch(self): self.info = generate_dummy_dataset(self.data_saver, num_runs=20, input_size=(100, 100, 3), output_size=(3, ), continuous=False) max_num_batches = 2 config_dict = { 'data_directories': self.info['episode_directories'], 'output_path': self.output_dir, 'random_seed': 1, 'batch_size': 3 } data_loader = DataLoader(config=DataLoaderConfig().create( config_dict=config_dict)) data_loader.load_dataset() first_batch = [] index = 0 for index, batch in enumerate( data_loader.sample_shuffled_batch( max_number_of_batches=max_num_batches)): if index == 0: first_batch = deepcopy(batch) self.assertEqual(len(batch), config_dict['batch_size']) self.assertEqual(index, max_num_batches - 1) # test sampling seed for reproduction config_dict['random_seed'] = 2 data_loader = DataLoader(config=DataLoaderConfig().create( config_dict=config_dict)) data_loader.load_dataset() second_batch = [] for index, batch in enumerate( data_loader.sample_shuffled_batch( max_number_of_batches=max_num_batches)): second_batch = deepcopy(batch) break self.assertNotEqual(np.sum(np.asarray(first_batch.observations[0])), np.sum(np.asarray(second_batch.observations[0]))) config_dict['random_seed'] = 1 data_loader = DataLoader(config=DataLoaderConfig().create( config_dict=config_dict)) data_loader.load_dataset() third_batch = [] for index, batch in enumerate( data_loader.sample_shuffled_batch( max_number_of_batches=max_num_batches)): third_batch = deepcopy(batch) break self.assertEqual(np.sum(np.asarray(first_batch.observations[0])), np.sum(np.asarray(third_batch.observations[0])))
class DomainAdaptationTrainer(Trainer): def __init__(self, config: TrainerConfig, network: BaseNet, quiet: bool = False): super().__init__(config, network, quiet=True) self._config.epsilon = 0.2 if self._config.epsilon == "default" else self._config.epsilon self.target_data_loader = DataLoader(config=self._config.target_data_loader_config) self.target_data_loader.load_dataset() self._domain_adaptation_criterion = eval(f'{self._config.domain_adaptation_criterion}()') \ if not self._config.domain_adaptation_criterion == 'default' else MMDLossZhao() self._domain_adaptation_criterion.to(self._device) if not quiet: self._optimizer = eval(f'torch.optim.{self._config.optimizer}')(params=self._net.parameters(), lr=self._config.learning_rate, weight_decay=self._config.weight_decay) lambda_function = lambda f: 1 - f / self._config.scheduler_config.number_of_epochs self._scheduler = torch.optim.lr_scheduler.LambdaLR(self._optimizer, lr_lambda=lambda_function) \ if self._config.scheduler_config is not None else None self._logger = get_logger(name=get_filename_without_extension(__file__), output_path=config.output_path, quiet=False) cprint(f'Started.', self._logger) def train(self, epoch: int = -1, writer=None) -> str: self.put_model_on_device() total_error = [] task_error = [] domain_error = [] for source_batch, target_batch in zip(self.data_loader.sample_shuffled_batch(), self.target_data_loader.sample_shuffled_batch()): self._optimizer.zero_grad() targets = data_to_tensor(source_batch.actions).type(self._net.dtype).to(self._device) # task loss predictions = self._net.forward(source_batch.observations, train=True) task_loss = (1 - self._config.epsilon) * self._criterion(predictions, targets).mean() # add domain adaptation loss domain_loss = self._config.epsilon * self._domain_adaptation_criterion( self._net.get_features(source_batch.observations, train=True), self._net.get_features(target_batch.observations, train=True)) loss = task_loss + domain_loss loss.backward() if self._config.gradient_clip_norm != -1: nn.utils.clip_grad_norm_(self._net.parameters(), self._config.gradient_clip_norm) self._optimizer.step() self._net.global_step += 1 task_error.append(task_loss.cpu().detach()) domain_error.append(domain_loss.cpu().detach()) total_error.append(loss.cpu().detach()) self.put_model_back_to_original_device() if self._scheduler is not None: self._scheduler.step() task_error_distribution = Distribution(task_error) domain_error_distribution = Distribution(domain_error) total_error_distribution = Distribution(total_error) if writer is not None: writer.set_step(self._net.global_step) writer.write_distribution(task_error_distribution, 'training/task_error') writer.write_distribution(domain_error_distribution, 'training/domain_error') writer.write_distribution(total_error_distribution, 'training/total_error') if self._config.store_output_on_tensorboard and epoch % 30 == 0: writer.write_output_image(predictions, 'source/predictions') writer.write_output_image(targets, 'source/targets') writer.write_output_image(torch.stack(source_batch.observations), 'source/inputs') writer.write_output_image(self._net.forward(target_batch.observations, train=True), 'target/predictions') writer.write_output_image(torch.stack(target_batch.observations), 'target/inputs') return f' training task: {self._config.criterion} {task_error_distribution.mean: 0.3e} ' \ f'[{task_error_distribution.std:0.2e}]' \ f' domain: {self._config.domain_adaptation_criterion} {domain_error_distribution.mean: 0.3e} ' \ f'[{domain_error_distribution.std:0.2e}]'
class Evaluator: def __init__(self, config: EvaluatorConfig, network: BaseNet, quiet: bool = False): self._config = config self._net = network self.data_loader = DataLoader(config=self._config.data_loader_config) if not quiet: self._logger = get_logger( name=get_filename_without_extension(__file__), output_path=config.output_path, quiet=False) if type(self) == Evaluator else None cprint(f'Started.', self._logger) self._device = torch.device( "cuda" if self._config.device in ['gpu', 'cuda'] and torch.cuda.is_available() else "cpu") self._criterion = eval( f'{self._config.criterion}(reduction=\'none\', {self._config.criterion_args_str})' ) self._criterion.to(self._device) self._lowest_validation_loss = None self.data_loader.load_dataset() self._minimum_error = float(10**6) self._original_model_device = self._net.get_device( ) if self._net is not None else None def put_model_on_device(self, device: str = None): self._original_model_device = self._net.get_device() self._net.set_device( torch.device(self._config.device) if device is None else torch. device(device)) def put_model_back_to_original_device(self): self._net.set_device(self._original_model_device) def evaluate(self, epoch: int = -1, writer=None, tag: str = 'validation') -> Tuple[str, bool]: self.put_model_on_device() total_error = [] # for batch in tqdm(self.data_loader.get_data_batch(), ascii=True, desc='evaluate'): for batch in self.data_loader.get_data_batch(): with torch.no_grad(): predictions = self._net.forward(batch.observations, train=False) targets = data_to_tensor(batch.actions).type( self._net.dtype).to(self._device) error = self._criterion(predictions, targets).mean() total_error.append(error) error_distribution = Distribution(total_error) self.put_model_back_to_original_device() if writer is not None: writer.write_distribution(error_distribution, tag) if self._config.store_output_on_tensorboard and (epoch % 30 == 0 or tag == 'test'): writer.write_output_image(predictions, f'{tag}/predictions') writer.write_output_image(targets, f'{tag}/targets') writer.write_output_image(torch.stack(batch.observations), f'{tag}/inputs') msg = f' {tag} {self._config.criterion} {error_distribution.mean: 0.3e} [{error_distribution.std:0.2e}]' best_checkpoint = False if self._lowest_validation_loss is None or error_distribution.mean < self._lowest_validation_loss: self._lowest_validation_loss = error_distribution.mean best_checkpoint = True return msg, best_checkpoint def evaluate_extensive(self) -> None: """ Extra offline evaluation methods for an extensive evaluation at the end of training :return: None """ self.put_model_on_device('cpu') self.data_loader.get_dataset().subsample(10) dataset = self.data_loader.get_dataset() predictions = self._net.forward(dataset.observations, train=False).detach().cpu() #error = predictions - torch.stack(dataset.actions) self.put_model_back_to_original_device() # save_output_plots(output_dir=self._config.output_path, # data={'expert': np.stack(dataset.actions), # 'network': predictions.numpy(), # 'difference': error.numpy()}) # create_output_video(output_dir=self._config.output_path, # observations=dataset.observations, # actions={'expert': np.stack(dataset.actions), # 'network': predictions.numpy()}) create_output_video_segmentation_network( output_dir=self._config.output_path, observations=torch.stack(dataset.observations).numpy(), predictions=predictions.numpy()) def remove(self): self.data_loader.remove() [h.close() for h in self._logger.handlers]