Beispiel #1
0
 def test_eval_preparation(self):
     dl = DataloaderImages((149, 117), ignore_useless_states=False)
     st = ModelTrainer(
         lambda: DeconvModelEfficient(),
         self.test_src_dir,
         self.eval_output_path,
         load_datasets_path=self.test_split_dir,
         cache_path=None,
         batch_size=2,
         train_print_frequency=10,
         epochs=0,
         num_workers=4,
         num_validation_samples=2,
         num_test_samples=self.num_test_samples,
         data_processing_function=dl.get_sensordata_and_flowfront,
         data_gather_function=get_filelist_within_folder_blacklisted,
         classification_evaluator_function=lambda summary_writer:
         SensorToFlowfrontEvaluator(summary_writer=summary_writer),
         data_root=test_resources.test_src_dir,
     )
     st.start_training()
     dirs = [e for e in self.eval_output_path.iterdir() if e.is_dir()]
     code_dir = dirs[0] / 'rtm-predictions'
     slurm_script = dirs[0] / 'run_model_eval.sh'
     self.assertTrue(os.path.isdir(code_dir))
     self.assertTrue(os.path.isfile(slurm_script))
     with open(slurm_script) as f:
         lines = f.read().splitlines()
         tokens = lines[-1].split()
         self.assertEqual(dirs[0], Path(tokens[-3]))
     st.writer.flush()
     st.writer.close()
Beispiel #2
0
 def test_training(self):
     num_epochs = 2
     dl = DataloaderImages((149, 117), ignore_useless_states=False)
     st = ModelTrainer(
         lambda: DeconvModelEfficient(),
         self.test_src_dir,
         self.training_save_path,
         load_datasets_path=self.test_split_dir,
         cache_path=None,
         batch_size=16,
         train_print_frequency=10,
         epochs=num_epochs,
         num_workers=4,
         num_validation_samples=2,
         num_test_samples=self.num_test_samples,
         data_processing_function=dl.get_sensordata_and_flowfront,
         data_gather_function=get_filelist_within_folder_blacklisted,
         loss_criterion=torch.nn.BCELoss(),
         classification_evaluator_function=lambda summary_writer:
         SensorToFlowfrontEvaluator(summary_writer=summary_writer),
         data_root=test_resources.test_src_dir,
     )
     st.start_training()
     dirs = [e for e in self.training_save_path.iterdir() if e.is_dir()]
     with open(dirs[0] / 'output.log') as f:
         content = f.read()
         epochs = re.findall('Mean Loss on Eval', content)
         self.assertEqual(num_epochs, len(epochs))
         # Check if steps are growing / if there are doubled steps in the output
         steps = [
             int(re.findall(r'\d+', x)[0])
             for x in re.findall(r'Duration of step.+\d:', content)
         ]
         self.assertEqual(len(set(steps)), len(steps))
Beispiel #3
0
 def test_training_load_optimizer(self):
     dl = DataloaderImages((149, 117), ignore_useless_states=False)
     st = ModelTrainer(
         lambda: DeconvModelEfficient(),
         self.test_src_dir,
         self.training_save_path,
         load_datasets_path=self.test_split_dir,
         cache_path=None,
         batch_size=16,
         train_print_frequency=10,
         epochs=self.expected_num_epochs_during_training,
         num_workers=4,
         num_validation_samples=2,
         num_test_samples=self.num_test_samples,
         data_processing_function=dl.get_sensordata_and_flowfront,
         data_gather_function=get_filelist_within_folder_blacklisted,
         loss_criterion=torch.nn.BCELoss(),
         optimizer_path=self.checkpoint,
         classification_evaluator_function=lambda summary_writer:
         SensorToFlowfrontEvaluator(summary_writer=summary_writer),
         data_root=test_resources.test_src_dir,
     )
     st.start_training()
     after = len(st.optimizer.state.keys())
     """ Optimizer has now more than 0 states, therefore was loaded """
     self.assertGreater(after, 0)
Beispiel #4
0
    def test_save_load_training(self):
        num_epochs = 2
        dl = DataloaderImages((149, 117), ignore_useless_states=False)
        st = ModelTrainer(
            lambda: DeconvModelEfficient(),
            self.test_src_dir,
            self.training_save_path,
            load_datasets_path=self.test_split_dir,
            cache_path=None,
            batch_size=16,
            train_print_frequency=10,
            epochs=num_epochs,
            num_workers=4,
            num_validation_samples=2,
            num_test_samples=self.num_test_samples,
            data_processing_function=dl.get_sensordata_and_flowfront,
            data_gather_function=get_filelist_within_folder_blacklisted,
            loss_criterion=torch.nn.BCELoss(),
            classification_evaluator_function=lambda summary_writer:
            SensorToFlowfrontEvaluator(summary_writer=summary_writer),
            data_root=test_resources.test_src_dir,
        )
        st.start_training()

        num_epochs = 2
        dl = DataloaderImages((149, 117), ignore_useless_states=False)
        st = ModelTrainer(
            lambda: DeconvModelEfficient(),
            self.test_src_dir,
            self.training_save_path,
            load_datasets_path=self.test_split_dir,
            cache_path=None,
            batch_size=16,
            train_print_frequency=10,
            epochs=num_epochs,
            num_workers=4,
            num_validation_samples=2,
            num_test_samples=self.num_test_samples,
            data_processing_function=dl.get_sensordata_and_flowfront,
            data_gather_function=get_filelist_within_folder_blacklisted,
            loss_criterion=torch.nn.BCELoss(),
            classification_evaluator_function=lambda summary_writer:
            SensorToFlowfrontEvaluator(summary_writer=summary_writer),
            data_root=test_resources.test_src_dir,
        )
        st.start_training()
Beispiel #5
0
    def test_eval(self):
        dl = DataloaderImages((149, 117), ignore_useless_states=False)
        st = ModelTrainer(
            lambda: DeconvModelEfficient(),
            self.test_src_dir,
            self.eval_output_path,
            load_datasets_path=self.test_split_dir,
            cache_path=None,
            batch_size=2,
            train_print_frequency=10,
            epochs=self.expected_num_epochs_during_training,
            num_workers=10,
            num_validation_samples=2,
            num_test_samples=self.num_test_samples,
            data_processing_function=dl.get_sensordata_and_flowfront,
            data_gather_function=get_filelist_within_folder_blacklisted,
            loss_criterion=torch.nn.BCELoss(),
            classification_evaluator_function=lambda summary_writer:
            SensorToFlowfrontEvaluator(self.eval_output_path /
                                       "eval_on_test_set",
                                       skip_images=False,
                                       summary_writer=summary_writer),
            data_root=test_resources.test_src_dir,
        )

        st.inference_on_test_set(
            self.eval_output_path,
            self.checkpoint,
            classification_evaluator_function=lambda summary_writer:
            SensorToFlowfrontEvaluator(self.eval_output_path /
                                       "eval_on_test_set",
                                       skip_images=False,
                                       summary_writer=summary_writer))

        with open(self.eval_output_path / "eval_on_test_set" /
                  "test_output.log") as f:
            content = f.read()
            loss = float(
                re.findall(r'\d+.\d+',
                           re.findall(r'Eval: \d+\.\d+', content)[0])[0])
            self.assertEqual(np.round(loss, 4), self.expected_loss)
        img_path = self.eval_output_path / 'eval_on_test_set' / 'images'
        list_all_imgs = list(img_path.glob('**/*.jpg'))
        self.assertEqual(len(list_all_imgs), self.expected_num_frames)
Beispiel #6
0
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path if args.demo is None else Path(args.demo),
        load_datasets_path=r.datasets_dryspots,
        cache_path=r.cache_path,
        batch_size=128,
        train_print_frequency=100,
        epochs=1000,
        num_workers=75,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        optimizer_function=lambda params: torch.optim.AdamW(params, lr=0.0001),
        classification_evaluator_function=lambda summary_writer:
        SensorToFlowfrontEvaluator(summary_writer=summary_writer),
        demo_path=args.demo,
        run_eval_step_before_training=True,
        resize_label_to=img_size if args.demo is not None else (0, 0))

    if not args.run_eval:
        m.start_training()
    else:
        m.inference_on_test_set(
            Path(args.eval),
            Path(args.checkpoint_path),
            lambda summary_writer: SensorToFlowfrontEvaluator(
                Path(args.eval) / "eval_on_test_set",
                skip_images=False,
                sensors_shape=(10, 8),
                print_n_images=5000),
    checkpoint_p = r.chkp_S1140_to_ff_0_basepr
    adv_output_dir = checkpoint_p.parent / "advanced_eval"

    m = ModelTrainer(
        lambda: DeconvModelEfficient(),
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path,
        load_datasets_path=r.datasets_dryspots,
        cache_path=r.cache_path,
        batch_size=2048,
        train_print_frequency=10,
        epochs=1000,
        num_workers=75,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        optimizer_function=lambda params: torch.optim.AdamW(params, lr=0.0001),
        classification_evaluator_function=lambda summary_writer:
        SensorToFlowfrontEvaluator(summary_writer=summary_writer),
    )

    adv_output_dir.mkdir(exist_ok=True)
    m.inference_on_test_set(
        output_path=adv_output_dir,
        checkpoint_path=checkpoint_p,
        classification_evaluator_function=lambda summary_writer:
        SensorToFlowfrontEvaluator(
            adv_output_dir, skip_images=False, print_n_images=5000))
                                        if args.demo is not None else False),
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path if args.demo is None else Path(args.demo),
        dataset_split_path=r.dataset_split,
        cache_path=r.cache_path,
        batch_size=128,
        train_print_frequency=100,
        epochs=1000,
        num_workers=75,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        optimizer_function=lambda params: torch.optim.AdamW(params, lr=0.001),
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator(),
        demo_path=args.demo,
        run_eval_step_before_training=True,
        resize_label_to=img_size if args.demo is not None else (0, 0))

    if not args.run_eval:
        m.start_training()
    else:
        m.inference_on_test_set(
            Path(args.eval),
            Path(args.checkpoint_path),
            lambda: SensorToFlowfrontEvaluator(Path(args.eval) /
                                               "eval_on_test_set",
                                               skip_images=False,
                                               sensors_shape=(5, 4),
                                               print_n_images=5000),
Beispiel #9
0
        dataset_paths,
        r.save_path,
        dataset_split_path=r.dataset_split,
        cache_path=r.cache_path,
        batch_size=batch_size,
        epochs=10,
        num_workers=num_workers,
        num_validation_samples=num_val,
        num_test_samples=num_test,
        data_root=r.data_root_every_step,
        data_processing_function=dl.get_sensor_to_perm_map,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator(
            skip_images=False,
            ignore_inp=True,
            save_path=Path("/cfs/home/s/c/schroeni/Images"
                           "/SensorFiber/")),
        dummy_epoch=False)

    if not args.eval:
        m.start_training()
    else:
        m.inference_on_test_set(
            Path(args.eval),
            Path(args.checkpoint_path),
            lambda: SensorToFlowfrontEvaluator(
                save_path=Path(args.eval) / "eval_on_test_set",
                skip_images=False,
                ignore_inp=True,
            ),
                                         freeze_nlayers=9,
                                         checkpoint_path=checkpoint_p,
                                         round_at=.8),
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path,
        dataset_split_path=r.dataset_split,
        cache_path=r.cache_path,
        batch_size=2048,
        train_print_frequency=100,
        epochs=1000,
        num_workers=75,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        optimizer_function=lambda params: torch.optim.AdamW(params, lr=0.0001),
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator(),
    )

    output_path = r.chkp_S80_to_ff2.parent
    m.inference_on_test_set(
        output_path / "eval_on_test_set_rounded.5",
        r.chkp_S80_to_ff2,
        lambda: SensorToFlowfrontEvaluator(output_path /
                                           "eval_on_test_set_rounded.5",
                                           skip_images=False,
                                           sensors_shape=(10, 8),
                                           print_n_images=5000),
    )
        dataset_paths,
        save_path,
        cache_path=None,
        batch_size=batch_size,
        epochs=150,
        num_workers=num_workers,
        num_validation_samples=num_val,
        num_test_samples=num_test,
        data_processing_function=dl.get_flowfront_to_perm_map,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        data_root=data_root,
        demo_path=data_folder,
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator(
            skip_images=False,
            ignore_inp=False,
            sensors_shape=(143, 111),
            save_path=save_path),
        dummy_epoch=False)

    if not eval:
        m.start_training()
    else:
        m.inference_on_test_set(
            Path(save_path) / Path(mode),
            Path(model_chkpts) / ("checkpoint" + mode + ".pth"),
            lambda: SensorToFlowfrontEvaluator(save_path=Path(save_path) /
                                               Path(mode) / "eval_on_test_set",
                                               skip_images=False,
                                               ignore_inp=True,
                                               sensors_shape=(143, 111)),
        lambda: DeconvModelEfficient(),
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path if args.demo is None else Path(args.demo),
        load_datasets_path=r.datasets_dryspots,
        cache_path=r.cache_path,
        batch_size=2048,
        train_print_frequency=100,
        epochs=1000,
        num_workers=75,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        optimizer_function=lambda params: torch.optim.AdamW(params, lr=1e-4),
        classification_evaluator_function=lambda summary_writer:
        SensorToFlowfrontEvaluator(summary_writer=summary_writer),
        demo_path=args.demo,
        # run_eval_step_before_training=True
    )

    if not args.run_eval:
        m.start_training()
    else:
        m.inference_on_test_set(
            Path(args.eval),
            Path(args.checkpoint_path),
            lambda summary_writer: SensorToFlowfrontEvaluator(
                Path(args.eval) / "eval_on_test_set", skip_images=False),
        )
Beispiel #13
0
    dl = DataloaderImages(image_size=(149, 117))
    m = ModelTrainer(
        lambda: DeconvModelEfficient(),
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path if args.demo is None else Path(args.demo),
        dataset_split_path=r.dataset_split,
        cache_path=r.cache_path,
        batch_size=2048,
        train_print_frequency=100,
        epochs=1000,
        num_workers=75,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        optimizer_function=lambda params: torch.optim.AdamW(params, lr=1e-4),
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator(),
        demo_path=args.demo,
        # run_eval_step_before_training=True
    )

    if not args.run_eval:
        m.start_training()
    else:
        m.inference_on_test_set(
            Path(args.eval), Path(args.checkpoint_path),
            lambda: SensorToFlowfrontEvaluator(
                Path(args.eval) / "eval_on_test_set", skip_images=False))
Beispiel #14
0
        lambda: OptimusPrime_c2D(batch_size),
        dataset_paths,
        r.save_path,
        cache_path=r.cache_path,
        batch_size=batch_size,
        epochs=150,
        num_workers=num_workers,
        num_validation_samples=num_val,
        num_test_samples=num_test,
        data_processing_function=dl.get_flowfront_to_perm_map,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        data_root=data_root,
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator(
            skip_images=False,
            ignore_inp=False,
            sensors_shape=(143, 111),
            save_path=img_save_path),
        dummy_epoch=False)

    if not args.eval:
        m.start_training()
    else:
        m.inference_on_test_set(
            Path(args.eval),
            Path(args.checkpoint_path),
            lambda summary_writer: SensorToFlowfrontEvaluator(
                save_path=Path(args.eval) / "eval_on_test_set",
                skip_images=False,
                ignore_inp=True,
                sensors_shape=(143, 111)),
if __name__ == "__main__":
    """
    Producing data only.

    torch_datasets_chunk_size = 300 000: each chunk = ca. 22 GB 
    torch_datasets_chunk_size = 75 000: each chunk = ca. 5.5 GB 
    """
    Utils.custom_mlflow.logging = False

    dl = DataloaderImages(image_size=(149, 117))
    m = ModelTrainer(
        lambda: DeconvModelEfficient(),
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path,
        dataset_split_path=r.dataset_split,
        cache_path=r.cache_path,
        batch_size=2048,
        num_workers=1,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator(),
        produce_torch_datasets_only=True,
        sampler=lambda data_source: torch.utils.data.SequentialSampler(
            data_source=data_source),
        torch_datasets_chunk_size=75000)

    m.start_training()
if __name__ == "__main__":
    dl = DataloaderImages((149, 117))

    checkpoint_p = r.chkp_S1140_to_ff_0_basepr
    adv_output_dir = checkpoint_p.parent / "advanced_eval"

    m = ModelTrainer(
        lambda: DeconvModelEfficient(),
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path,
        dataset_split_path=r.dataset_split,
        cache_path=r.cache_path,
        batch_size=2048,
        train_print_frequency=10,
        epochs=1000,
        num_workers=75,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        optimizer_function=lambda params: torch.optim.AdamW(params, lr=0.0001),
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator())

    adv_output_dir.mkdir(exist_ok=True)
    m.inference_on_test_set(
        output_path=adv_output_dir,
        checkpoint_path=checkpoint_p,
        classification_evaluator_function=lambda: SensorToFlowfrontEvaluator(
            adv_output_dir, skip_images=False, print_n_images=5000))
Beispiel #17
0
    m = ModelTrainer(
        lambda: Bumblebee2(),
        dataset_paths,
        r.save_path,
        cache_path=r.cache_path,
        batch_size=batch_size,
        epochs=150,
        num_workers=num_workers,
        num_validation_samples=num_val,
        num_test_samples=num_test,
        data_processing_function=dl.get_flowfront_to_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        data_root=data_root,
        classification_evaluator_function=lambda: None,
        dummy_epoch=False)

    if not args.eval:
        m.start_training()
    else:
        m.inference_on_test_set(
            Path(args.eval),
            Path(args.checkpoint_path),
            lambda summary_writer: SensorToFlowfrontEvaluator(
                summary_writer=summary_writer,
                save_path=Path(args.eval) / "eval_on_test_set",
                skip_images=False,
                ignore_inp=True,
            ),
        )
Beispiel #18
0
                                         round_at=.8),
        data_source_paths=r.get_data_paths_base_0(),
        save_path=r.save_path,
        load_datasets_path=r.datasets_dryspots,
        cache_path=r.cache_path,
        batch_size=2048,
        train_print_frequency=100,
        epochs=1000,
        num_workers=75,
        num_validation_samples=131072,
        num_test_samples=1048576,
        data_processing_function=dl.get_sensordata_and_flowfront,
        data_gather_function=get_filelist_within_folder_blacklisted,
        loss_criterion=torch.nn.MSELoss(),
        optimizer_function=lambda params: torch.optim.AdamW(params, lr=0.0001),
        classification_evaluator_function=lambda summary_writer:
        SensorToFlowfrontEvaluator(summary_writer=summary_writer),
    )

    output_path = r.chkp_S80_to_ff2.parent
    m.inference_on_test_set(
        output_path / "eval_on_test_set_rounded.5",
        r.chkp_S80_to_ff2,
        lambda summary_writer: SensorToFlowfrontEvaluator(
            output_path / "eval_on_test_set_rounded.5",
            skip_images=False,
            sensors_shape=(10, 8),
            print_n_images=5000
        ),
    )