def test_clip_first_x_frames(self):
     info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                num_runs=20,
                                                input_size=(100, 100, 3),
                                                output_size=(1, ),
                                                continuous=True,
                                                store_hdf5=False)
     cleaner_config_dict = {
         'output_path': self.output_dir,
         'data_loader_config': {
             'data_directories': info['episode_directories'],
             'subsample': 2
         },
         'training_validation_split': 1.0,
         'remove_first_n_timestamps': 5,
     }
     data_cleaner = DataCleaner(config=DataCleaningConfig().create(
         config_dict=cleaner_config_dict))
     data_cleaner.clean()
     data_loader = DataLoader(config=DataLoaderConfig().create(
         config_dict={
             'output_path': self.output_dir,
             'hdf5_files': glob(f'{self.output_dir}/train*.hdf5')
         }))
     data_loader.load_dataset()
     self.assertEqual(
         sum(int((e - 5) / 2) + 1 for e in info['episode_lengths']),
         len(data_loader.get_dataset()))
 def test_create_dataset_and_clean(self):
     info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                num_runs=20,
                                                input_size=(100, 100, 3),
                                                output_size=(1, ),
                                                continuous=True,
                                                store_hdf5=False)
     cleaner_config_dict = {
         'output_path': self.output_dir,
         'data_loader_config': {
             'data_directories': info['episode_directories'],
             'input_size': (150, 150, 1)
         },
         'training_validation_split': 0.7,
     }
     data_cleaner = DataCleaner(config=DataCleaningConfig().create(
         config_dict=cleaner_config_dict))
     data_cleaner.clean()
     data_loader_train = DataLoader(config=DataLoaderConfig().create(
         config_dict={
             'output_path': self.output_dir,
             'hdf5_files': glob(f'{self.output_dir}/train*.hdf5')
         }))
     data_loader_train.load_dataset()
     data_loader_validation = DataLoader(config=DataLoaderConfig().create(
         config_dict={
             'output_path': self.output_dir,
             'hdf5_files': glob(f'{self.output_dir}/validation*.hdf5')
         }))
     data_loader_validation.load_dataset()
     ratio = len(data_loader_train.get_dataset()) / (
         0. + len(data_loader_train.get_dataset()) +
         len(data_loader_validation.get_dataset()))
     self.assertTrue(ratio > 0.6)
     self.assertTrue(ratio < 0.8)
 def test_split_hdf5_chunks(self):
     info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                num_runs=20,
                                                input_size=(100, 100, 3),
                                                output_size=(1, ),
                                                continuous=True,
                                                store_hdf5=False)
     cleaner_config_dict = {
         'output_path': self.output_dir,
         'data_loader_config': {
             'data_directories': info['episode_directories'],
         },
         'training_validation_split': 1.0,
         'max_hdf5_size': 5 * 10**6
     }
     data_cleaner = DataCleaner(config=DataCleaningConfig().create(
         config_dict=cleaner_config_dict))
     data_cleaner.clean()
     for hdf5_file in glob(f'{self.output_dir}/train*.hdf5'):
         data_loader = DataLoader(config=DataLoaderConfig().create(
             config_dict={
                 'output_path': self.output_dir,
                 'hdf5_files': [hdf5_file]
             }))
         data_loader.load_dataset()
         self.assertTrue(
             data_loader.get_dataset().get_memory_size() < 6 * 10**6)
Ejemplo n.º 4
0
    def test_train_model_on_external_dataset_as_hdf5(self):
        network = eval(self.experiment_config['architecture_config']['architecture']).Net(
            config=ArchitectureConfig().create(config_dict=self.experiment_config['architecture_config'])
        )
        external_dataset = f'{os.environ["PWD"]}/test_dir/external_dataset'
        os.makedirs(external_dataset, exist_ok=True)
        info = generate_random_dataset_in_raw_data(output_dir=external_dataset,
                                                   num_runs=5,
                                                   input_size=network.input_size,
                                                   output_size=network.output_size,
                                                   continuous=not network.discrete,
                                                   store_hdf5=True)
        self.assertTrue(os.path.isfile(os.path.join(external_dataset, 'train.hdf5')))
        self.assertTrue(os.path.isfile(os.path.join(external_dataset, 'validation.hdf5')))

        self.experiment_config["trainer_config"]["data_loader_config"]["hdf5_files"] = [os.path.join(external_dataset,
                                                                                                     'train.hdf5')]
        self.experiment_config["evaluator_config"]["data_loader_config"]["hdf5_files"] = [os.path.join(external_dataset,
                                                                                                       'validation.hdf5')]
        experiment = Experiment(config=ExperimentConfig().create(config_dict=self.experiment_config))
        experiment.run()

        # check if 5 + 2 checkpoints were stored in torch_checkpoints
        self.assertTrue(len([f for f in os.listdir(os.path.join(self.output_dir, 'torch_checkpoints'))
                             if f.endswith('ckpt')]), 4)
        shutil.rmtree(external_dataset, ignore_errors=True)
Ejemplo n.º 5
0
 def test_train_model_on_generated_dataset_with_tensorboard(self):
     network = eval(self.experiment_config['architecture_config']['architecture']).Net(
         config=ArchitectureConfig().create(config_dict=self.experiment_config['architecture_config'])
     )
     info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                num_runs=5,
                                                input_size=network.input_size,
                                                output_size=network.output_size,
                                                continuous=not network.discrete,
                                                store_hdf5=True)
     self.experiment_config['tensorboard'] = True
     experiment = Experiment(config=ExperimentConfig().create(config_dict=self.experiment_config))
     experiment.run()
     self.assertGreater(len(glob(os.path.join(self.output_dir, 'events.*'))), 0)
Ejemplo n.º 6
0
    def test_train_model_on_generated_dataset(self):
        network = eval(self.experiment_config['architecture_config']['architecture']).Net(
            config=ArchitectureConfig().create(config_dict=self.experiment_config['architecture_config'])
        )
        info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                   num_runs=5,
                                                   input_size=network.input_size,
                                                   output_size=network.output_size,
                                                   continuous=not network.discrete,
                                                   store_hdf5=True)
        experiment = Experiment(config=ExperimentConfig().create(config_dict=self.experiment_config))
        experiment.run()

        print(os.listdir(os.path.join(self.output_dir, 'torch_checkpoints')))
        # check if checkpoints were stored in torch_checkpoints
        self.assertEqual(5, len([f for f in os.listdir(os.path.join(self.output_dir, 'torch_checkpoints'))
                                 if f.endswith('ckpt')]))
    def setUp(self) -> None:
        self.output_dir = f'{os.environ["PWD"]}/test_dir/{get_filename_without_extension(__file__)}'
        os.makedirs(self.output_dir, exist_ok=True)
        architecture_base_config['output_path'] = self.output_dir
        trainer_base_config['output_path'] = self.output_dir
        self.network = eval(architecture_base_config['architecture']).Net(
            config=ArchitectureConfig().create(config_dict=architecture_base_config)
        )
        # checksum network

        info = generate_random_dataset_in_raw_data(output_dir=self.output_dir,
                                                   num_runs=5,
                                                   input_size=self.network.input_size,
                                                   output_size=self.network.output_size,
                                                   continuous=not self.network.discrete)
        trainer_base_config['data_loader_config'] = {
            'data_directories': info['episode_directories'],
        }
    def test_evaluate_model_on_dataset(self):
        network = eval(architecture_base_config['architecture']).Net(
            config=ArchitectureConfig().create(
                config_dict=architecture_base_config))
        info = generate_random_dataset_in_raw_data(
            output_dir=self.output_dir,
            input_size=network.input_size,
            output_size=network.output_size,
            continuous=not network.discrete)

        # generate evaluator with correct data-loader
        evaluator_base_config['data_loader_config'] = {
            'data_directories': info['episode_directories'],
            'batch_size': 5
        }
        evaluator = Evaluator(
            config=EvaluatorConfig().create(config_dict=evaluator_base_config),
            network=network)
        # evaluate
        error_msg = evaluator.evaluate()
        self.assertFalse('nan' in error_msg)
    def test_generate_random_dataset_with_train_validation_hdf5(self):
        num_runs = 10
        # generate network
        network = eval(architecture_base_config['architecture']).Net(
            config=ArchitectureConfig().create(
                config_dict=architecture_base_config))

        # generate dummy dataset
        info = generate_random_dataset_in_raw_data(
            output_dir=self.output_dir,
            num_runs=num_runs,
            input_size=network.input_size,
            output_size=network.output_size,
            continuous=not network.discrete,
            store_hdf5=True)
        data_loader_config = {
            'output_path': self.output_dir,
            'hdf5_files': [os.path.join(self.output_dir, 'train.hdf5')]
        }
        data_loader = DataLoader(config=DataLoaderConfig().create(
            config_dict=data_loader_config))
        data_loader.load_dataset()
        self.assertNotEqual(
            sum(d != 0 for d in data_loader.get_dataset().done), 0)
    def test_generate_random_dataset_in_raw_data(self):
        num_runs = 10
        # generate network
        network = eval(architecture_base_config['architecture']).Net(
            config=ArchitectureConfig().create(
                config_dict=architecture_base_config))

        # generate dummy dataset
        info = generate_random_dataset_in_raw_data(
            output_dir=self.output_dir,
            num_runs=num_runs,
            input_size=network.input_size,
            output_size=network.output_size,
            continuous=not network.discrete,
        )
        data_loader_config = {
            'output_path': self.output_dir,
            'data_directories': info['episode_directories'],
        }
        data_loader = DataLoader(config=DataLoaderConfig().create(
            config_dict=data_loader_config))
        data_loader.load_dataset()
        self.assertEqual(sum(d != 0 for d in data_loader.get_dataset().done),
                         num_runs)
 def test_line_world_augmentation(self):
     line_image = np.ones((100, 100, 3))
     line_image[:, 40:43, 0:2] = 0
     info = generate_random_dataset_in_raw_data(
         output_dir=self.output_dir,
         num_runs=20,
         input_size=(100, 100, 3),
         output_size=(1, ),
         continuous=True,
         fixed_input_value=line_image,
         store_hdf5=False)
     cleaner_config_dict = {
         'output_path': self.output_dir,
         'data_loader_config': {
             'data_directories': info['episode_directories'],
             'input_size': (1, 64, 64)
         },
         'training_validation_split': 0.7,
         'remove_first_n_timestamps': 5,
         'binary_maps_as_target': True,
         'invert_binary_maps': True,
         'augment_background_noise': 0.1,
         'augment_background_textured': 0.9,
         'texture_directory': 'textured_dataset',
         'augment_empty_images': 0.1
     }
     data_cleaner = DataCleaner(config=DataCleaningConfig().create(
         config_dict=cleaner_config_dict))
     data_cleaner.clean()
     data_loader = DataLoader(config=DataLoaderConfig().create(
         config_dict={
             'output_path': self.output_dir,
             'hdf5_files': glob(f'{self.output_dir}/train*.hdf5')
         }))
     data_loader.load_dataset()
     data_loader.get_dataset().plot()
Ejemplo n.º 12
0
    def test_local_hdf5_file(self):
        # create fake hdf5 files
        info_0 = generate_random_dataset_in_raw_data(os.path.join(
            self.output_dir, 'fake_data_0'),
                                                     num_runs=2,
                                                     store_hdf5=True)
        info_1 = generate_random_dataset_in_raw_data(os.path.join(
            self.output_dir, 'fake_data_1'),
                                                     num_runs=3,
                                                     store_hdf5=True)
        # create experiment config using hdf5 files
        os.makedirs(os.path.join(self.output_dir, 'experiment_output'),
                    exist_ok=True)
        experiment_config = {
            'output_path': os.path.join(self.output_dir, 'experiment_output'),
            'fake_key_a': [1, 2, 3],
            'fake_key_b': {
                'fake_key_b_0': 'ok',
                'fake_key_b_1': {
                    'hdf5_files': [
                        os.path.join(self.output_dir, 'fake_data_0',
                                     'train.hdf5'),
                        os.path.join(self.output_dir, 'fake_data_1',
                                     'train.hdf5')
                    ]
                }
            },
            'fake_key_c': {
                'hdf5_files': []
            },
            'fake_key_d': {
                'hdf5_files': [
                    os.path.join(self.output_dir, 'fake_data_0',
                                 'validation.hdf5'),
                    os.path.join(self.output_dir, 'fake_data_1',
                                 'validation.hdf5')
                ]
            }
        }
        original_sizes = [
            int(subprocess.getoutput("stat --format %s " + v)) for v in [
                os.path.join(self.output_dir, 'fake_data_0', 'train.hdf5'),
                os.path.join(self.output_dir, 'fake_data_1', 'train.hdf5'),
                os.path.join(self.output_dir, 'fake_data_0',
                             'validation.hdf5'),
                os.path.join(self.output_dir, 'fake_data_1', 'validation.hdf5')
            ]
        ]
        print(f'original_sizes: {original_sizes}')

        with open(os.path.join(self.output_dir, 'experiment_config.yml'),
                  'w') as f:
            yaml.dump(experiment_config, f)

        # create and submit condor job using hdf5 files but save them locally
        job_config = {
            'config_file': os.path.join(self.output_dir,
                                        'experiment_config.yml'),
            'output_path': self.output_dir,
            'command':
            'python src/condor/test/dummy_python_script_check_hdf5.py',
            'wall_time_s': 60,
            'save_locally': True
        }
        condor_job = CondorJob(config=CondorJobConfig().create(
            config_dict=job_config))
        condor_job.write_job_file()
        condor_job.write_executable_file()
        condor_job.submit()
        wait_for_job_to_finish(condor_job.log_file)

        self.assertTrue(
            glob(os.path.join(condor_job.output_dir,
                              'FINISHED*'))[0].endswith('0'))
        # check jobs output file to control hdf5 files were loaded locally
        # compare file sizes to ensure same hdf5 files were copied
        with open(os.path.join(condor_job.output_dir, 'job.output'), 'r') as f:
            output_lines = f.readlines()

        hdf5_files = [
            l.split(' ')[1] for l in output_lines if l.startswith('HDF5_FILE')
        ]
        hdf5_file_sizes = [
            int(l.split(' ')[2]) for l in output_lines
            if l.startswith('HDF5_FILE')
        ]
        print(f'hdf5_files: {hdf5_files}')
        print(f'hdf5_file_sizes: {hdf5_file_sizes}')
        self.assertEqual(len(hdf5_file_sizes), len(original_sizes))
        for f in hdf5_files:
            self.assertTrue(f.startswith(condor_job.local_home))
        for js, rs in zip(hdf5_file_sizes, original_sizes):
            self.assertEqual(js, rs)