def test_create_dataset_and_clean(self):
     info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                num_runs=20,
                                                input_size=(100, 100, 3),
                                                output_size=(1, ),
                                                continuous=True,
                                                store_hdf5=False)
     cleaner_config_dict = {
         'output_path': self.output_dir,
         'data_loader_config': {
             'data_directories': info['episode_directories'],
             'input_size': (150, 150, 1)
         },
         'training_validation_split': 0.7,
     }
     data_cleaner = DataCleaner(config=DataCleaningConfig().create(
         config_dict=cleaner_config_dict))
     data_cleaner.clean()
     data_loader_train = DataLoader(config=DataLoaderConfig().create(
         config_dict={
             'output_path': self.output_dir,
             'hdf5_files': glob(f'{self.output_dir}/train*.hdf5')
         }))
     data_loader_train.load_dataset()
     data_loader_validation = DataLoader(config=DataLoaderConfig().create(
         config_dict={
             'output_path': self.output_dir,
             'hdf5_files': glob(f'{self.output_dir}/validation*.hdf5')
         }))
     data_loader_validation.load_dataset()
     ratio = len(data_loader_train.get_dataset()) / (
         0. + len(data_loader_train.get_dataset()) +
         len(data_loader_validation.get_dataset()))
     self.assertTrue(ratio > 0.6)
     self.assertTrue(ratio < 0.8)
 def test_split_hdf5_chunks(self):
     info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                num_runs=20,
                                                input_size=(100, 100, 3),
                                                output_size=(1, ),
                                                continuous=True,
                                                store_hdf5=False)
     cleaner_config_dict = {
         'output_path': self.output_dir,
         'data_loader_config': {
             'data_directories': info['episode_directories'],
         },
         'training_validation_split': 1.0,
         'max_hdf5_size': 5 * 10**6
     }
     data_cleaner = DataCleaner(config=DataCleaningConfig().create(
         config_dict=cleaner_config_dict))
     data_cleaner.clean()
     for hdf5_file in glob(f'{self.output_dir}/train*.hdf5'):
         data_loader = DataLoader(config=DataLoaderConfig().create(
             config_dict={
                 'output_path': self.output_dir,
                 'hdf5_files': [hdf5_file]
             }))
         data_loader.load_dataset()
         self.assertTrue(
             data_loader.get_dataset().get_memory_size() < 6 * 10**6)
    def test_create_hdf5_files_subsampled_in_time(self):
        num_runs = 10
        split = 1.0
        subsample = 3
        config_dict = {
            'output_path': self.output_dir,
            'training_validation_split': split,
            'store_hdf5': True,
            'subsample_hdf5': subsample,
            'separate_raw_data_runs': True
        }
        config = DataSaverConfig().create(config_dict=config_dict)
        self.data_saver = DataSaver(config=config)
        info = generate_dummy_dataset(self.data_saver, num_runs=num_runs)
        self.data_saver.create_train_validation_hdf5_files()

        config = DataLoaderConfig().create(
            config_dict={
                'output_path': self.output_dir,
                'hdf5_files': [os.path.join(self.output_dir, 'train.hdf5')]
            })
        training_data_loader = DataLoader(config=config)
        training_data_loader.load_dataset()
        training_data = training_data_loader.get_dataset()

        self.assertEqual(
            len(training_data),
            sum([
                np.ceil((el - 1) / subsample) + 1
                for el in info['episode_lengths']
            ]))
    def create_train_validation_hdf5_files(
            self,
            runs: List[str] = None,
            input_size: List[int] = None) -> None:
        all_runs = runs if runs is not None else self._get_runs()

        number_of_training_runs = int(self._config.training_validation_split *
                                      len(all_runs))
        train_runs = all_runs[0:number_of_training_runs]
        validation_runs = all_runs[number_of_training_runs:]

        for file_name, runs in zip(['train', 'validation'],
                                   [train_runs, validation_runs]):
            config = DataLoaderConfig().create(
                config_dict={
                    'data_directories': runs,
                    'output_path': self._config.output_path,
                    'subsample': self._config.subsample_hdf5,
                    'input_size': input_size
                })
            data_loader = DataLoader(config=config)
            data_loader.load_dataset()
            create_hdf5_file_from_dataset(filename=os.path.join(
                self._config.output_path, file_name + '.hdf5'),
                                          dataset=data_loader.get_dataset())
            cprint(f'created {file_name}.hdf5', self._logger)
 def test_clip_first_x_frames(self):
     info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                num_runs=20,
                                                input_size=(100, 100, 3),
                                                output_size=(1, ),
                                                continuous=True,
                                                store_hdf5=False)
     cleaner_config_dict = {
         'output_path': self.output_dir,
         'data_loader_config': {
             'data_directories': info['episode_directories'],
             'subsample': 2
         },
         'training_validation_split': 1.0,
         'remove_first_n_timestamps': 5,
     }
     data_cleaner = DataCleaner(config=DataCleaningConfig().create(
         config_dict=cleaner_config_dict))
     data_cleaner.clean()
     data_loader = DataLoader(config=DataLoaderConfig().create(
         config_dict={
             'output_path': self.output_dir,
             'hdf5_files': glob(f'{self.output_dir}/train*.hdf5')
         }))
     data_loader.load_dataset()
     self.assertEqual(
         sum(int((e - 5) / 2) + 1 for e in info['episode_lengths']),
         len(data_loader.get_dataset()))
    def test_data_loader_from_raw_path_dirs(self):
        self.info = generate_dummy_dataset(self.data_saver,
                                           num_runs=20,
                                           input_size=(100, 100, 3),
                                           output_size=(3, ),
                                           continuous=False)
        config_dict = {
            'data_directories': [self.output_dir],
            'output_path': self.output_dir,
        }
        config = DataLoaderConfig().create(config_dict=config_dict)
        data_loader = DataLoader(config=config)
        data_loader.load_dataset()

        config = DataLoaderConfig().create(config_dict=config_dict)
        for d in config.data_directories:
            self.assertTrue(os.path.isdir(d))
    def test_data_batch(self):
        self.info = generate_dummy_dataset(self.data_saver,
                                           num_runs=20,
                                           input_size=(100, 100, 3),
                                           output_size=(3, ),
                                           continuous=False)
        config_dict = {
            'data_directories': self.info['episode_directories'],
            'output_path': self.output_dir,
            'random_seed': 1,
            'batch_size': 3
        }
        data_loader = DataLoader(config=DataLoaderConfig().create(
            config_dict=config_dict))
        data_loader.load_dataset()

        for batch in data_loader.get_data_batch():
            self.assertEqual(len(batch), config_dict['batch_size'])
            break
    def test_data_loading(self):
        self.info = generate_dummy_dataset(self.data_saver,
                                           num_runs=20,
                                           input_size=(100, 100, 3),
                                           output_size=(3, ),
                                           continuous=False)
        config_dict = {
            'data_directories': self.info['episode_directories'],
            'output_path': self.output_dir,
        }
        config = DataLoaderConfig().create(config_dict=config_dict)
        data_loader = DataLoader(config=config)
        data_loader.load_dataset()

        # assert nothing is empty
        for k in ['observations', 'actions', 'rewards', 'done']:
            data = eval(f'data_loader.get_dataset().{k}')
            self.assertTrue(len(data) > 0)
            self.assertTrue(sum(data[0].shape) > 0)
 def test_data_subsample(self):
     self.info = generate_dummy_dataset(self.data_saver,
                                        num_runs=20,
                                        input_size=(100, 100, 3),
                                        output_size=(3, ),
                                        continuous=False)
     subsample = 4
     config_dict = {
         'data_directories': self.info['episode_directories'],
         'output_path': self.output_dir,
         'random_seed': 1,
         'batch_size': 3,
         'subsample': subsample
     }
     data_loader = DataLoader(config=DataLoaderConfig().create(
         config_dict=config_dict))
     data_loader.load_dataset()
     self.assertTrue(
         sum([
             np.ceil((el - 1) / subsample) + 1
             for el in self.info['episode_lengths']
         ]), len(data_loader.get_dataset()))
    def __init__(self, config: DataSaverConfig):
        self._config = config
        self._logger = get_logger(
            name=get_filename_without_extension(__file__),
            output_path=self._config.output_path,
            quiet=False)
        cprint(f'initiate', self._logger)

        if not self._config.saving_directory.startswith('/'):
            self._config.saving_directory = os.path.join(
                os.environ['HOME'], self._config.saving_directory)

        if self._config.store_on_ram_only:
            self._dataset = Dataset(max_size=self._config.max_size)

        # used to keep track of replay buffer size on file system
        if not self._config.store_on_ram_only \
                and os.path.isdir(os.path.dirname(self._config.saving_directory)) \
                and self._config.max_size != -1:
            data_loader = DataLoader(config=DataLoaderConfig().create(
                config_dict={
                    'data_directories': [
                        os.path.join(
                            os.path.dirname(self._config.saving_directory),
                            run) for run in sorted(
                                os.listdir(
                                    os.path.dirname(
                                        self._config.saving_directory)))
                    ],
                    'output_path':
                    self._config.output_path,
                    'store':
                    False  # don't store config
                }))
            data_loader.load_dataset()
            self._frame_counter = len(data_loader.get_dataset())
        else:
            self._frame_counter = 0
    def test_create_train_validation_hdf5_files(self):
        num_runs = 10
        split = 0.7
        config_dict = {
            'output_path': self.output_dir,
            'training_validation_split': split,
            'store_hdf5': True,
            'separate_raw_data_runs': True
        }
        config = DataSaverConfig().create(config_dict=config_dict)
        self.data_saver = DataSaver(config=config)
        info = generate_dummy_dataset(self.data_saver, num_runs=num_runs)
        self.data_saver.create_train_validation_hdf5_files()

        config = DataLoaderConfig().create(
            config_dict={
                'output_path': self.output_dir,
                'hdf5_files': [os.path.join(self.output_dir, 'train.hdf5')]
            })
        training_data_loader = DataLoader(config=config)
        training_data_loader.load_dataset()
        training_data = training_data_loader.get_dataset()

        config = DataLoaderConfig().create(
            config_dict={
                'output_path': self.output_dir,
                'hdf5_files':
                [os.path.join(self.output_dir, 'validation.hdf5')]
            })
        validation_data_loader = DataLoader(config=config)
        validation_data_loader.load_dataset()
        validation_data = validation_data_loader.get_dataset()

        self.assertEqual(len(training_data),
                         sum(info['episode_lengths'][:int(split * num_runs)]))
        self.assertEqual(len(validation_data),
                         sum(info['episode_lengths'][int(split * num_runs):]))
 def test_line_world_augmentation(self):
     line_image = np.ones((100, 100, 3))
     line_image[:, 40:43, 0:2] = 0
     info = generate_random_dataset_in_raw_data(
         output_dir=self.output_dir,
         num_runs=20,
         input_size=(100, 100, 3),
         output_size=(1, ),
         continuous=True,
         fixed_input_value=line_image,
         store_hdf5=False)
     cleaner_config_dict = {
         'output_path': self.output_dir,
         'data_loader_config': {
             'data_directories': info['episode_directories'],
             'input_size': (1, 64, 64)
         },
         'training_validation_split': 0.7,
         'remove_first_n_timestamps': 5,
         'binary_maps_as_target': True,
         'invert_binary_maps': True,
         'augment_background_noise': 0.1,
         'augment_background_textured': 0.9,
         'texture_directory': 'textured_dataset',
         'augment_empty_images': 0.1
     }
     data_cleaner = DataCleaner(config=DataCleaningConfig().create(
         config_dict=cleaner_config_dict))
     data_cleaner.clean()
     data_loader = DataLoader(config=DataLoaderConfig().create(
         config_dict={
             'output_path': self.output_dir,
             'hdf5_files': glob(f'{self.output_dir}/train*.hdf5')
         }))
     data_loader.load_dataset()
     data_loader.get_dataset().plot()
    def test_generate_random_dataset_in_raw_data(self):
        num_runs = 10
        # generate network
        network = eval(architecture_base_config['architecture']).Net(
            config=ArchitectureConfig().create(
                config_dict=architecture_base_config))

        # generate dummy dataset
        info = generate_random_dataset_in_raw_data(
            output_dir=self.output_dir,
            num_runs=num_runs,
            input_size=network.input_size,
            output_size=network.output_size,
            continuous=not network.discrete,
        )
        data_loader_config = {
            'output_path': self.output_dir,
            'data_directories': info['episode_directories'],
        }
        data_loader = DataLoader(config=DataLoaderConfig().create(
            config_dict=data_loader_config))
        data_loader.load_dataset()
        self.assertEqual(sum(d != 0 for d in data_loader.get_dataset().done),
                         num_runs)
    def test_generate_random_dataset_with_train_validation_hdf5(self):
        num_runs = 10
        # generate network
        network = eval(architecture_base_config['architecture']).Net(
            config=ArchitectureConfig().create(
                config_dict=architecture_base_config))

        # generate dummy dataset
        info = generate_random_dataset_in_raw_data(
            output_dir=self.output_dir,
            num_runs=num_runs,
            input_size=network.input_size,
            output_size=network.output_size,
            continuous=not network.discrete,
            store_hdf5=True)
        data_loader_config = {
            'output_path': self.output_dir,
            'hdf5_files': [os.path.join(self.output_dir, 'train.hdf5')]
        }
        data_loader = DataLoader(config=DataLoaderConfig().create(
            config_dict=data_loader_config))
        data_loader.load_dataset()
        self.assertNotEqual(
            sum(d != 0 for d in data_loader.get_dataset().done), 0)
    def test_sample_batch(self):
        self.info = generate_dummy_dataset(self.data_saver,
                                           num_runs=20,
                                           input_size=(100, 100, 3),
                                           output_size=(3, ),
                                           continuous=False)
        max_num_batches = 2
        config_dict = {
            'data_directories': self.info['episode_directories'],
            'output_path': self.output_dir,
            'random_seed': 1,
            'batch_size': 3
        }
        data_loader = DataLoader(config=DataLoaderConfig().create(
            config_dict=config_dict))
        data_loader.load_dataset()
        first_batch = []
        index = 0
        for index, batch in enumerate(
                data_loader.sample_shuffled_batch(
                    max_number_of_batches=max_num_batches)):
            if index == 0:
                first_batch = deepcopy(batch)
            self.assertEqual(len(batch), config_dict['batch_size'])
        self.assertEqual(index, max_num_batches - 1)

        # test sampling seed for reproduction
        config_dict['random_seed'] = 2
        data_loader = DataLoader(config=DataLoaderConfig().create(
            config_dict=config_dict))
        data_loader.load_dataset()
        second_batch = []
        for index, batch in enumerate(
                data_loader.sample_shuffled_batch(
                    max_number_of_batches=max_num_batches)):
            second_batch = deepcopy(batch)
            break
        self.assertNotEqual(np.sum(np.asarray(first_batch.observations[0])),
                            np.sum(np.asarray(second_batch.observations[0])))
        config_dict['random_seed'] = 1
        data_loader = DataLoader(config=DataLoaderConfig().create(
            config_dict=config_dict))
        data_loader.load_dataset()
        third_batch = []
        for index, batch in enumerate(
                data_loader.sample_shuffled_batch(
                    max_number_of_batches=max_num_batches)):
            third_batch = deepcopy(batch)
            break
        self.assertEqual(np.sum(np.asarray(first_batch.observations[0])),
                         np.sum(np.asarray(third_batch.observations[0])))
Example #16
0
class DomainAdaptationTrainer(Trainer):

    def __init__(self, config: TrainerConfig, network: BaseNet, quiet: bool = False):
        super().__init__(config, network, quiet=True)

        self._config.epsilon = 0.2 if self._config.epsilon == "default" else self._config.epsilon

        self.target_data_loader = DataLoader(config=self._config.target_data_loader_config)
        self.target_data_loader.load_dataset()
        self._domain_adaptation_criterion = eval(f'{self._config.domain_adaptation_criterion}()') \
            if not self._config.domain_adaptation_criterion == 'default' else MMDLossZhao()
        self._domain_adaptation_criterion.to(self._device)

        if not quiet:
            self._optimizer = eval(f'torch.optim.{self._config.optimizer}')(params=self._net.parameters(),
                                                                            lr=self._config.learning_rate,
                                                                            weight_decay=self._config.weight_decay)

            lambda_function = lambda f: 1 - f / self._config.scheduler_config.number_of_epochs
            self._scheduler = torch.optim.lr_scheduler.LambdaLR(self._optimizer, lr_lambda=lambda_function) \
                if self._config.scheduler_config is not None else None

            self._logger = get_logger(name=get_filename_without_extension(__file__),
                                      output_path=config.output_path,
                                      quiet=False)
            cprint(f'Started.', self._logger)

    def train(self, epoch: int = -1, writer=None) -> str:
        self.put_model_on_device()
        total_error = []
        task_error = []
        domain_error = []
        for source_batch, target_batch in zip(self.data_loader.sample_shuffled_batch(),
                                              self.target_data_loader.sample_shuffled_batch()):
            self._optimizer.zero_grad()
            targets = data_to_tensor(source_batch.actions).type(self._net.dtype).to(self._device)
            # task loss
            predictions = self._net.forward(source_batch.observations, train=True)
            task_loss = (1 - self._config.epsilon) * self._criterion(predictions, targets).mean()

            # add domain adaptation loss
            domain_loss = self._config.epsilon * self._domain_adaptation_criterion(
                self._net.get_features(source_batch.observations, train=True),
                self._net.get_features(target_batch.observations, train=True))

            loss = task_loss + domain_loss
            loss.backward()
            if self._config.gradient_clip_norm != -1:
                nn.utils.clip_grad_norm_(self._net.parameters(),
                                         self._config.gradient_clip_norm)
            self._optimizer.step()
            self._net.global_step += 1
            task_error.append(task_loss.cpu().detach())
            domain_error.append(domain_loss.cpu().detach())
            total_error.append(loss.cpu().detach())
        self.put_model_back_to_original_device()

        if self._scheduler is not None:
            self._scheduler.step()

        task_error_distribution = Distribution(task_error)
        domain_error_distribution = Distribution(domain_error)
        total_error_distribution = Distribution(total_error)
        if writer is not None:
            writer.set_step(self._net.global_step)
            writer.write_distribution(task_error_distribution, 'training/task_error')
            writer.write_distribution(domain_error_distribution, 'training/domain_error')
            writer.write_distribution(total_error_distribution, 'training/total_error')
            if self._config.store_output_on_tensorboard and epoch % 30 == 0:
                writer.write_output_image(predictions, 'source/predictions')
                writer.write_output_image(targets, 'source/targets')
                writer.write_output_image(torch.stack(source_batch.observations), 'source/inputs')
                writer.write_output_image(self._net.forward(target_batch.observations, train=True),
                                          'target/predictions')
                writer.write_output_image(torch.stack(target_batch.observations), 'target/inputs')

        return f' training task: {self._config.criterion} {task_error_distribution.mean: 0.3e} ' \
               f'[{task_error_distribution.std:0.2e}]' \
               f' domain: {self._config.domain_adaptation_criterion} {domain_error_distribution.mean: 0.3e} ' \
               f'[{domain_error_distribution.std:0.2e}]'
Example #17
0
class Evaluator:
    def __init__(self,
                 config: EvaluatorConfig,
                 network: BaseNet,
                 quiet: bool = False):
        self._config = config
        self._net = network
        self.data_loader = DataLoader(config=self._config.data_loader_config)

        if not quiet:
            self._logger = get_logger(
                name=get_filename_without_extension(__file__),
                output_path=config.output_path,
                quiet=False) if type(self) == Evaluator else None
            cprint(f'Started.', self._logger)

        self._device = torch.device(
            "cuda" if self._config.device in ['gpu', 'cuda']
            and torch.cuda.is_available() else "cpu")
        self._criterion = eval(
            f'{self._config.criterion}(reduction=\'none\', {self._config.criterion_args_str})'
        )
        self._criterion.to(self._device)
        self._lowest_validation_loss = None
        self.data_loader.load_dataset()

        self._minimum_error = float(10**6)
        self._original_model_device = self._net.get_device(
        ) if self._net is not None else None

    def put_model_on_device(self, device: str = None):
        self._original_model_device = self._net.get_device()
        self._net.set_device(
            torch.device(self._config.device) if device is None else torch.
            device(device))

    def put_model_back_to_original_device(self):
        self._net.set_device(self._original_model_device)

    def evaluate(self,
                 epoch: int = -1,
                 writer=None,
                 tag: str = 'validation') -> Tuple[str, bool]:
        self.put_model_on_device()
        total_error = []
        #        for batch in tqdm(self.data_loader.get_data_batch(), ascii=True, desc='evaluate'):
        for batch in self.data_loader.get_data_batch():
            with torch.no_grad():
                predictions = self._net.forward(batch.observations,
                                                train=False)
                targets = data_to_tensor(batch.actions).type(
                    self._net.dtype).to(self._device)
                error = self._criterion(predictions, targets).mean()
                total_error.append(error)
        error_distribution = Distribution(total_error)
        self.put_model_back_to_original_device()
        if writer is not None:
            writer.write_distribution(error_distribution, tag)
            if self._config.store_output_on_tensorboard and (epoch % 30 == 0
                                                             or tag == 'test'):
                writer.write_output_image(predictions, f'{tag}/predictions')
                writer.write_output_image(targets, f'{tag}/targets')
                writer.write_output_image(torch.stack(batch.observations),
                                          f'{tag}/inputs')

        msg = f' {tag} {self._config.criterion} {error_distribution.mean: 0.3e} [{error_distribution.std:0.2e}]'

        best_checkpoint = False
        if self._lowest_validation_loss is None or error_distribution.mean < self._lowest_validation_loss:
            self._lowest_validation_loss = error_distribution.mean
            best_checkpoint = True
        return msg, best_checkpoint

    def evaluate_extensive(self) -> None:
        """
        Extra offline evaluation methods for an extensive evaluation at the end of training
        :return: None
        """
        self.put_model_on_device('cpu')
        self.data_loader.get_dataset().subsample(10)
        dataset = self.data_loader.get_dataset()
        predictions = self._net.forward(dataset.observations,
                                        train=False).detach().cpu()
        #error = predictions - torch.stack(dataset.actions)
        self.put_model_back_to_original_device()

        # save_output_plots(output_dir=self._config.output_path,
        #                   data={'expert': np.stack(dataset.actions),
        #                         'network': predictions.numpy(),
        #                         'difference': error.numpy()})
        # create_output_video(output_dir=self._config.output_path,
        #                     observations=dataset.observations,
        #                     actions={'expert': np.stack(dataset.actions),
        #                              'network': predictions.numpy()})
        create_output_video_segmentation_network(
            output_dir=self._config.output_path,
            observations=torch.stack(dataset.observations).numpy(),
            predictions=predictions.numpy())

    def remove(self):
        self.data_loader.remove()
        [h.close() for h in self._logger.handlers]