Exemple #1
0
    def test_model_ckpt_using_pointnet2ms(self,):
        # Create a checkpt
        name = "model"
        self.run_path = os.path.join(DIR, "checkpt")
        print(self.run_path)
        if not os.path.exists(self.run_path):
            os.makedirs(self.run_path)

        model_checkpoint = ModelCheckpoint(self.run_path, name, "test", run_config=self.config, resume=False)
        dataset = MockDatasetGeometric(5)
        model = instantiate_model(self.config, dataset)
        model.set_input(dataset[0], "cpu")
        model.instantiate_optimizers(self.config)

        mock_metrics = {"current_metrics": {"acc": 12}, "stage": "test", "epoch": 10}
        model_checkpoint.save_best_models_under_current_metrics(model, mock_metrics)

        # Load checkpoint and initialize model
        model_checkpoint = ModelCheckpoint(self.run_path, name, "test", self.config, resume=True)
        model2 = model_checkpoint.create_model(dataset, weight_name="acc")

        self.assertEqual(str(model.optimizer.__class__.__name__), str(model2.optimizer.__class__.__name__))
        self.assertEqual(model.optimizer.defaults, model2.optimizer.defaults)
        self.assertEqual(model.schedulers["lr_scheduler"].state_dict(), model2.schedulers["lr_scheduler"].state_dict())
        self.assertEqual(model.schedulers["bn_scheduler"].state_dict(), model2.schedulers["bn_scheduler"].state_dict())

        shutil.rmtree(self.run_path)
        remove(os.path.join(ROOT, "{}.pt".format(name)))
        remove(os.path.join(DIR, "{}.pt".format(name)))
Exemple #2
0
def main(cfg):
    OmegaConf.set_struct(cfg, False)

    # Get device
    device = torch.device("cuda" if (torch.cuda.is_available() and cfg.cuda) else "cpu")
    log.info("DEVICE : {}".format(device))

    # Enable CUDNN BACKEND
    torch.backends.cudnn.enabled = cfg.enable_cudnn

    # Checkpoint
    checkpoint = ModelCheckpoint(cfg.checkpoint_dir, cfg.model_name, cfg.weight_name, strict=True)

    # Create model and datasets
    dataset = instantiate_dataset(checkpoint.data_config)
    model = checkpoint.create_model(dataset, weight_name=cfg.weight_name)
    log.info(model)
    log.info("Model size = %i", sum(param.numel() for param in model.parameters() if param.requires_grad))

    # Set dataloaders
    dataset.create_dataloaders(
        model, cfg.batch_size, cfg.shuffle, cfg.num_workers, cfg.precompute_multi_scale,
    )
    log.info(dataset)

    model.eval()
    if cfg.enable_dropout:
        model.enable_dropout_in_eval()
    model = model.to(device)

    tracker: BaseTracker = dataset.get_tracker(model, dataset, False, False)

    # Run training / evaluation
    run(cfg, model, dataset, device, tracker, checkpoint)
def main(cfg):
    OmegaConf.set_struct(cfg, False)

    # Get device
    device = torch.device("cuda" if (
        torch.cuda.is_available() and cfg.cuda) else "cpu")
    log.info("DEVICE : {}".format(device))

    # Enable CUDNN BACKEND
    torch.backends.cudnn.enabled = cfg.enable_cudnn

    # Checkpoint
    checkpoint = ModelCheckpoint(cfg.checkpoint_dir,
                                 cfg.model_name,
                                 cfg.weight_name,
                                 strict=True)

    # Setup the dataset config
    # Generic config
    train_dataset_cls = get_dataset_class(checkpoint.data_config)
    setattr(checkpoint.data_config, "class", train_dataset_cls.FORWARD_CLASS)
    setattr(checkpoint.data_config, "dataroot", cfg.input_path)

    # Datset specific configs
    if cfg.data:
        for key, value in cfg.data.items():
            checkpoint.data_config.update(key, value)

    # Create dataset and mdoel
    dataset = instantiate_dataset(checkpoint.data_config)
    model = checkpoint.create_model(dataset, weight_name=cfg.weight_name)
    log.info(model)
    log.info(
        "Model size = %i",
        sum(param.numel() for param in model.parameters()
            if param.requires_grad))

    # Set dataloaders
    dataset.create_dataloaders(
        model,
        cfg.batch_size,
        cfg.shuffle,
        cfg.num_workers,
        False,
    )
    log.info(dataset)

    model.eval()
    if cfg.enable_dropout:
        model.enable_dropout_in_eval()
    model = model.to(device)

    # Run training / evaluation
    if not os.path.exists(cfg.output_path):
        os.makedirs(cfg.output_path)

    run(model, dataset, device, cfg.output_path)
def main(cfg):
    OmegaConf.set_struct(cfg, False)

    # Get device
    device = torch.device("cuda" if (
        torch.cuda.is_available() and cfg.cuda) else "cpu")
    log.info("DEVICE : {}".format(device))

    # Enable CUDNN BACKEND
    torch.backends.cudnn.enabled = cfg.enable_cudnn

    # Checkpoint
    checkpoint = ModelCheckpoint(cfg.checkpoint_dir,
                                 cfg.model_name,
                                 cfg.weight_name,
                                 strict=True)

    # Setup the dataset config
    # Generic config

    dataset = instantiate_dataset(cfg.data)
    model = checkpoint.create_model(dataset, weight_name=cfg.weight_name)
    log.info(model)
    log.info(
        "Model size = %i",
        sum(param.numel() for param in model.parameters()
            if param.requires_grad))

    log.info(dataset)

    model.eval()
    if cfg.enable_dropout:
        model.enable_dropout_in_eval()
    model = model.to(device)

    # Run training / evaluation
    output_path = os.path.join(cfg.checkpoint_dir, cfg.data.name, "features")
    if not os.path.exists(output_path):
        os.makedirs(output_path, exist_ok=True)

    run(model, dataset, device, output_path, cfg)
Exemple #5
0
def main(cfg):
    OmegaConf.set_struct(
        cfg,
        False)  # This allows getattr and hasattr methods to function correctly
    if cfg.pretty_print:
        print(cfg.pretty())

    # Get device
    device = torch.device("cuda" if (
        torch.cuda.is_available() and cfg.training.cuda) else "cpu")
    log.info("DEVICE : {}".format(device))

    # Enable CUDNN BACKEND
    torch.backends.cudnn.enabled = cfg.training.enable_cudnn

    # Start Wandb if public
    launch_wandb(cfg, cfg.wandb.public and cfg.wandb.log)

    # Checkpoint
    checkpoint = ModelCheckpoint(
        cfg.training.checkpoint_dir,
        cfg.model_name,
        cfg.training.weight_name,
        run_config=cfg,
        resume=bool(cfg.training.checkpoint_dir),
    )

    # Create model and datasets
    if not checkpoint.is_empty:
        dataset = instantiate_dataset(checkpoint.data_config)
        model = checkpoint.create_model(dataset,
                                        weight_name=cfg.training.weight_name)
    else:
        dataset = instantiate_dataset(cfg.data)
        model = instantiate_model(cfg, dataset)
        model.instantiate_optimizers(cfg)
    log.info(model)
    model.log_optimizers()
    log.info(
        "Model size = %i",
        sum(param.numel() for param in model.parameters()
            if param.requires_grad))

    # Set dataloaders
    dataset.create_dataloaders(
        model,
        cfg.training.batch_size,
        cfg.training.shuffle,
        cfg.training.num_workers,
        cfg.training.precompute_multi_scale,
    )
    log.info(dataset)

    # Choose selection stage
    selection_stage = getattr(cfg, "selection_stage", "")
    checkpoint.selection_stage = dataset.resolve_saving_stage(selection_stage)
    tracker: BaseTracker = dataset.get_tracker(model, dataset, cfg.wandb.log,
                                               cfg.tensorboard.log)

    launch_wandb(cfg, not cfg.wandb.public and cfg.wandb.log)

    # Run training / evaluation
    model = model.to(device)
    visualizer = Visualizer(cfg.visualization, dataset.num_batches,
                            dataset.batch_size, os.getcwd())
    run(cfg, model, dataset, device, tracker, checkpoint, visualizer)