def test_pytorch_lightning_pruning_callback_monitor_is_invalid() -> None:

    study = optuna.create_study(pruner=DeterministicPruner(True))
    trial = study.ask()
    callback = PyTorchLightningPruningCallback(trial, "InvalidMonitor")

    trainer = pl.Trainer(
        max_epochs=1,
        enable_checkpointing=False,
        callbacks=[callback],
    )
    model = Model()

    with pytest.warns(UserWarning):
        callback.on_validation_end(trainer, model)
Beispiel #2
0
    def __call__(self, trial: optuna.trial.Trial) -> float:
        data = TreeDataModule(
            self._filename,
            batch_size=trial.suggest_int("batch_size", 32, 160, 32),
        )
        kwargs = {
            "lstm_size":
            trial.suggest_categorical("lstm_size", [512, 1024, 2048]),
            "dropout_prob":
            trial.suggest_float("dropout", 0.1, 0.5, step=0.1),
            "learning_rate":
            trial.suggest_float("lr", 1e-3, 1e-1, log=True),
            "weight_decay":
            trial.suggest_float("weight_decay", 1e-3, 1e-1, log=True),
        }
        model = RouteDistanceModel(**kwargs)

        gpus = int(torch.cuda.is_available())
        pruning_callback = PyTorchLightningPruningCallback(
            trial, monitor="val_monitor")
        trainer = Trainer(
            gpus=gpus,
            logger=True,  # become a tensorboard logger
            checkpoint_callback=False,
            callbacks=[pruning_callback],  # type: ignore
            max_epochs=EPOCHS,
        )
        trainer.fit(model, datamodule=data)
        return trainer.callback_metrics["val_monitor"].item()
Beispiel #3
0
def objective(trial: optuna.trial.Trial) -> float:

    # We optimize the number of layers, hidden units in each layer and dropouts.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    dropout = trial.suggest_float("dropout", 0.2, 0.5)
    output_dims = [
        trial.suggest_int("n_units_l{}".format(i), 4, 128, log=True)
        for i in range(n_layers)
    ]

    model = LightningNet(dropout, output_dims)
    datamodule = MNISTDataModule(data_dir=DIR, batch_size=BATCHSIZE)

    trainer = pl.Trainer(
        logger=True,
        limit_val_batches=PERCENT_VALID_EXAMPLES,
        checkpoint_callback=False,
        max_epochs=EPOCHS,
        gpus=-1 if torch.cuda.is_available() else None,
        callbacks=[PyTorchLightningPruningCallback(trial, monitor="val_acc")],
    )
    hyperparameters = dict(n_layers=n_layers,
                           dropout=dropout,
                           output_dims=output_dims)
    trainer.logger.log_hyperparams(hyperparameters)
    trainer.fit(model, datamodule=datamodule)

    return trainer.callback_metrics["val_acc"].item()
def objective(trial: optuna.Trial):
    # Filenames for each trial must be made unique in order to access each checkpoint.
    checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join(
        MODEL_DIR, "trial_{}".format(trial.number), "{epoch}"),
                                                       monitor="val_acc")

    # The default logger in PyTorch Lightning writes to event files to be consumed by
    # TensorBoard. We don't use any logger here as it requires us to implement several abstract
    # methods. Instead we setup a simple callback, that saves metrics from each validation step.
    metrics_callback = MetricsCallback()
    trainer = pl.Trainer(logger=True,
                         checkpoint_callback=checkpoint_callback,
                         max_epochs=50,
                         gpus=args.gpu if torch.cuda.is_available() else None,
                         callbacks=[metrics_callback],
                         early_stop_callback=PyTorchLightningPruningCallback(
                             trial, monitor="val_acc"))

    model = LeNet5(trial)
    bsz = trial.suggest_int("bsz", 32, 128, 32)
    train_loader = DataLoader(data_train, batch_size=bsz, shuffle=True)
    val_loader = DataLoader(data_val, batch_size=1)
    trainer.fit(model,
                train_dataloader=train_loader,
                val_dataloaders=val_loader)

    return metrics_callback.metrics[-1]["val_acc"]
Beispiel #5
0
def test_pytorch_lightning_pruning_callback_monitor_is_invalid() -> None:

    study = optuna.create_study(pruner=DeterministicPruner(True))
    trial = create_running_trial(study, 1.0)
    callback = PyTorchLightningPruningCallback(trial, "InvalidMonitor")

    trainer = pl.Trainer(
        min_epochs=0,  # Required to fire the callback after the first epoch.
        max_epochs=1,
        checkpoint_callback=False,
        callbacks=[callback],
    )
    model = Model()

    with pytest.warns(UserWarning):
        callback.on_validation_end(trainer, model)
def objective(trial):

    checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join(
        MODEL_DIR, "trial_{}".format(trial.number)),
                                                       monitor="accuracy")

    trainer = pl.Trainer(
        auto_select_gpus=True,
        gpus=1,
        precision=16,
        profiler=False,
        max_epochs=1,
        callbacks=[
            pl.callbacks.ProgressBar(),
            PyTorchLightningPruningCallback(trial, monitor="val_acc"),
        ],
        automatic_optimization=True,
        enable_pl_optimizer=True,
        logger=logger,
        accelerator="ddp",
        plugins="ddp_sharded",
    )

    model = LitModel(trial, num_classes=num_classes)
    dm = ImDataModule(trial,
                      df,
                      batch_size=batch_size,
                      num_classes=num_classes,
                      img_size=img_size)
    trainer.fit(model, dm)

    return trainer.callback_metrics["val_acc_step"].item()
Beispiel #7
0
def objective(trial):

    checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join(
        'checkpoints', "trial_{}".format(trial.number)),
                                                       monitor="val_loss")

    metrics_callback = MetricsCallback()
    run_name = create_run_name(args)
    logger = TensorBoardLogger(save_dir='runs_pl_temp/', name=run_name)

    trainer = pl.Trainer(
        gpus=1,
        logger=logger,
        max_epochs=args.max_epochs,
        gradient_clip_val=trial.suggest_uniform("clip", 0.1, 0.9),
        checkpoint_callback=checkpoint_callback,
        early_stop_callback=PyTorchLightningPruningCallback(
            trial, monitor="val_loss"),
        callbacks=[metrics_callback],
    )

    mlp = MLPGenreClassifierModel(args, trial)
    trainer.fit(mlp)

    return metrics_callback.metrics[-1]["val_loss"].item()
Beispiel #8
0
def optimize(trial: optuna.Trial, data_dict):
    gts = PurgedGroupTimeSeriesSplit(n_splits=5, group_gap=5)
    input_size = data_dict['data'].shape[-1]
    output_size = 1
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        os.path.join('models/', "trial_resnet_{}".format(trial.number)), monitor="val_mse", mode='min')
    logger = MetricsCallback()
    metrics = []
    sizes = []
    # trial_file = 'HPO/nn_hpo_2021-01-05.pkl'
    trial_file = None
    p = create_param_dict(trial, trial_file)
    p['batch_size'] = trial.suggest_int('batch_size', 8000, 15000)
    for i, (train_idx, val_idx) in enumerate(gts.split(data_dict['data'], groups=data_dict['era'])):
        model = Classifier(input_size, output_size, params=p)
        # model.apply(init_weights)
        dataset = FinData(
            data=data_dict['data'], target=data_dict['target'], era=data_dict['era'])
        dataloaders = create_dataloaders(
            dataset, indexes={'train': train_idx, 'val': val_idx}, batch_size=p['batch_size'])
        es = EarlyStopping(monitor='val_mse', patience=10,
                           min_delta=0.0005, mode='min')
        trainer = pl.Trainer(logger=False,
                             max_epochs=500,
                             gpus=1,
                             callbacks=[checkpoint_callback, logger, PyTorchLightningPruningCallback(
                                 trial, monitor='val_mse'), es],
                             precision=16)
        trainer.fit(
            model, train_dataloader=dataloaders['train'], val_dataloaders=dataloaders['val'])
        val_loss = logger.metrics[-1]['val_loss'].item()
        metrics.append(val_loss)
        sizes.append(len(train_idx))
    metrics_mean = weighted_mean(metrics, sizes)
    return metrics_mean
Beispiel #9
0
    def __call__(self, trial):
        # Filenames for each trial must be made unique in order to access each checkpoint.
        ckpt_path = os.path.join(self.hparams.output,
                                 trial.study.study_name,
                                 "trial_{}".format(trial.number),
                                 "{epoch:03d}")
        checkpoint_callback = pl.callbacks.ModelCheckpoint(ckpt_path, monitor=self.monitor_metric)

        # set hyperparameters under optimization
        hparams = copy.copy(self.hparams)
        for k, v in self.get_hparams(trial).items():
            setattr(hparams, k, v)
        model = self.model_cls(hparams)
        model.set_dataset(self.dataset)

        # The default logger in PyTorch Lightning writes to event files to be consumed by
        # TensorBoard. We don't use any logger here as it requires us to implement several abstract
        # methods. Instead we setup a simple callback, that saves metrics from each validation step.
        metrics_callback = MetricsCallback()

        # set up arguments required for integrating with optuna
        _targs = dict(
            logger=False,
            checkpoint_callback=checkpoint_callback,
            callbacks=[metrics_callback],
            early_stop_callback=PyTorchLightningPruningCallback(trial, monitor=self.monitor_metric),
        )
        _targs.update(self.targs)

        trainer = pl.Trainer(**_targs)
        trainer.fit(model)

        return metrics_callback.metrics[-1][self.monitor_metric]
def objective(trial):
    # PyTorch Lightning will try to restore model parameters from previous trials if checkpoint
    # filenames match. Therefore, the filenames for each trial must be made unique.
    checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join(
        MODEL_DIR, 'trial_{}'.format(trial.number)),
                                                       save_best_only=False)

    # The default logger in PyTorch Lightning writes to event files to be consumed by
    # TensorBoard. We create a simple logger instead that holds the log in memory so that the
    # final accuracy can be obtained after optimization. When using the default logger, the
    # final accuracy could be stored in an attribute of the `Trainer` instead.
    logger = DictLogger(trial.number)

    trainer = pl.Trainer(logger=logger,
                         val_percent_check=PERCENT_TEST_EXAMPLES,
                         checkpoint_callback=checkpoint_callback,
                         max_nb_epochs=EPOCHS,
                         gpus=0 if torch.cuda.is_available() else None,
                         early_stop_callback=PyTorchLightningPruningCallback(
                             trial, monitor='accuracy'))

    model = LightningNet(trial)
    trainer.fit(model)

    return logger.metrics[-1]['accuracy']
Beispiel #11
0
def create_trainer(cfg, tags=None, trial=None, callbacks=None):
    if trial:
        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            f'trial#{trial.number}')
        new_callbacks = [PyTorchLightningPruningCallback(trial, 'val_loss')]
        if callbacks:
            new_callbacks.extend(callbacks)

        trainer = pl.Trainer(logger=False,
                             callbacks=new_callbacks,
                             checkpoint_callback=checkpoint_callback,
                             max_epochs=400,
                             progress_bar_refresh_rate=0,
                             weights_summary=None)
    else:
        trainer = pl.Trainer(
            logger=NeptuneLogger(project_name='yoniosin/amygdala',
                                 tags=tags,
                                 params=flatten(cfg, reducer='path')),
            max_epochs=cfg.learner.max_epochs,
            # callbacks=[pl.callbacks.EarlyStopping('val_loss', patience=200)]
            # fast_dev_run=True
        )

    return trainer
Beispiel #12
0
def objective(trial):
    # Filenames for each trial must be made unique in order to access each checkpoint.
    checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join(
        MODEL_DIR, "trial_{}".format(trial.number), "{epoch}"),
                                                       monitor="val_acc")

    # The default logger in PyTorch Lightning writes to event files to be consumed by
    # TensorBoard. We don't use any logger here as it requires us to implement several abstract
    # methods. Instead we setup a simple callback, that saves metrics from each validation step.
    metrics_callback = MetricsCallback()
    trainer = pl.Trainer(
        logger=False,
        limit_val_batches=PERCENT_VALID_EXAMPLES,
        checkpoint_callback=checkpoint_callback,
        max_epochs=EPOCHS,
        gpus=1 if torch.cuda.is_available() else None,
        callbacks=[
            metrics_callback,
            PyTorchLightningPruningCallback(trial, monitor="val_acc")
        ],
    )

    model = LightningNet(trial)
    trainer.fit(model)

    return metrics_callback.metrics[-1]["val_acc"].item()
Beispiel #13
0
def objective(trial, args):
    params = get_trial_params(trial)
    params['hidden_size'] = 2**params['hidden_size']
    params['acc_grads'] = 2**params['acc_grads']

    early_stopper = EarlyStopping(
        monitor='val_loss', min_delta=0.005, patience=3, mode='min')
    callbacks = [early_stopper, PyTorchLightningPruningCallback(
        trial, monitor="val_loss")]

    if args.model_type == 'attnlstm':
        params['attn_width'] = trial.suggest_int("attn_width", 3, 64)

    if 'split' in args.val_mode:
        dataset_hour = args.data.split('_')[-1]
        logger = MLFlowLogger(experiment_name=f'Optuna_{dataset_hour}h_{args.val_mode[-1]}_split')
        print(f'Optuna_{dataset_hour}_{args.val_mode[-1]}_split')
        val_losses = []
        for _split_id in range(int(args.val_mode[-1])):
            print(f"Split {_split_id} Trial {trial.number}")
            args.__dict__["split_id"] = 0
            for key in params:
                args.__dict__[str(key)] = params.get(key)
            model = LitLSTM(args)
            trainer = Trainer(
                logger=logger,
                callbacks=callbacks,
                **get_trainer_params(args),
            )
            logger.log_hyperparams(model.args)
            args.__dict__["val_mode"] = args.val_mode
            args.__dict__["split_id"] = _split_id
            model._get_data(args, data_mode='init')
            trainer.fit(model)
            trainer.test(model, test_dataloaders=model.test_dataloader())
            # logger.finalize()
            val_losses.append(model.metrics['val_loss'])

        # log mean val loss for later retrieval of best model
        mean_val_loss = torch.stack(val_losses).mean()
        logger.log_metrics({"mean_val_loss": mean_val_loss}, step=0)
        logger.finalize()
        return mean_val_loss

    elif args.val_mode == 'full':
        logger = MLFlowLogger(experiment_name='Optuna_full')
        for key in params:
            args.__dict__[str(key)] = params.get(key)
        model = LitLSTM(args)
        trainer = Trainer(
            logger=logger,
            callbacks=callbacks,
            **get_trainer_params(args),
        )
        logger.log_hyperparams(model.args)
        trainer.fit(model)
        trainer.test(model, test_dataloaders=model.test_dataloader())
        model.save_preds_and_targets(to_disk=True)
        logger.finalize()
        return model.metrics['val_loss']
Beispiel #14
0
def build_trainer(run_config, hyperparameters, trial=None):
    '''
    Set up optuna trainer
    '''

    if 'progress_bar_refresh_rate' in hyperparameters:
        p_refresh = hyperparameters['progress_bar_refresh_rate']
    else:
        p_refresh = 5

    # set epochs, gpus, gradient clipping, etc.
    # if 'no_gpu' in run config, then use CPU
    trainer_kwargs = {
        'max_epochs': hyperparameters['max_epochs'],
        "gpus": 0 if 'no_gpu' in run_config else 1,
        "num_sanity_val_steps": 0,
        "progress_bar_refresh_rate": p_refresh,
        "gradient_clip_val": hyperparameters['grad_clip']
    }

    # set auto learning rate finder param
    if 'auto_lr_find' in hyperparameters and hyperparameters['auto_lr_find']:
        trainer_kwargs['auto_lr_find'] = hyperparameters['auto_lr_find']

    # Create tensorboard logger
    lgdir = os.path.join(run_config['tb']['dir_full'],
                         run_config['tb']['name'])
    if not os.path.exists(lgdir):
        os.makedirs(lgdir)
    logger = TensorBoardLogger(run_config['tb']['dir_full'],
                               name=run_config['tb']['name'],
                               version="version_" +
                               str(random.randint(0, 10000000)))
    if not os.path.exists(logger.log_dir):
        os.makedirs(logger.log_dir)
    print("Tensorboard logging at ", logger.log_dir)
    trainer_kwargs["logger"] = logger

    # Save top three model checkpoints
    trainer_kwargs["checkpoint_callback"] = ModelCheckpoint(
        filepath=os.path.join(
            logger.log_dir,
            "{epoch}-{val_micro_f1:.2f}-{val_acc:.2f}-{val_auroc:.2f}"),
        save_top_k=3,
        verbose=True,
        monitor=run_config['optuna']['monitor_metric'],
        mode='max')

    # if we use pruning, use the pytorch lightning pruning callback
    if run_config["optuna"]['pruning']:
        trainer_kwargs[
            'early_stop_callback'] = PyTorchLightningPruningCallback(
                trial, monitor=run_config['optuna']['monitor_metric'])

    trainer = pl.Trainer(**trainer_kwargs)

    return trainer, trainer_kwargs, logger.log_dir
def objective(trial, **kwargs):
    # # Categorical parameter
    # optimizer = trial.suggest_categorical('optimizer', ['MomentumSGD', 'Adam'])
    # # Int parameter
    # num_layers = trial.suggest_int('num_layers', 1, 3)
    # # Uniform parameter
    dropout_prob = trial.suggest_uniform('dropout_prob', 0.0, 1.0)
    # # Loguniform parameter
    # learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    # # Discrete-uniform parameter
    # drop_path_rate = trial.suggest_discrete_uniform('drop_path_rate', 0.0, 1.0, 0.01)
    print("dropout_prob: {}".format(dropout_prob))
    kwargs.update(dropout_prob=dropout_prob)

    # Filenames for each trial must be made unique in order to access each checkpoint.
    # checkpoint_callback = pl.callbacks.ModelCheckpoint(
    #     os.path.join(MODEL_DIR, "trial_{}".format(trial.number), "{epoch}"), monitor="val_acc"
    # )

    # The default logger in PyTorch Lightning writes to event files to be consumed by
    # TensorBoard. We don't use any logger here as it requires us to implement several abstract
    # methods. Instead we setup a simple callback, that saves metrics from each validation step.
    metrics_callback = MetricsCallback()
    """ Main training routine specific for this project. """
    # ------------------------
    # 1 INIT a model and the LIGHTNING Experiment class
    # ------------------------
    model = MLP(**kwargs)
    experiment = ImageClassificationExperiment(model=model, **kwargs)

    # ------------------------
    # 2 INIT TRAINER
    # ------------------------
    kwargs.update({
        "logger":
        False,
        # "checkpoint_callback": checkpoint_callback,
        "callbacks": [metrics_callback],
        "early_stop_callback":
        PyTorchLightningPruningCallback(trial, monitor="val_loss")
    })

    valid_kwargs = inspect.signature(pl.Trainer.__init__).parameters
    trainer_kwargs = dict(
        (name, kwargs[name]) for name in valid_kwargs if name in kwargs)

    trainer = pl.Trainer(**trainer_kwargs)
    # ------------------------
    # 3 START TRAINING
    # ------------------------

    trainer.fit(experiment)

    return metrics_callback.metrics[-1]["val_loss"].item()
Beispiel #16
0
def get_callbacks(trial):
    metrics_callback = MetricsCallback()
    early_stop_callback = pl.callbacks.EarlyStopping(monitor='val_R%_@1',
                                                     patience=5,
                                                     strict=True,
                                                     verbose=False,
                                                     mode='max')
    return [
        metrics_callback, early_stop_callback,
        PyTorchLightningPruningCallback(trial, monitor="val_R%_@1")
    ]
Beispiel #17
0
def objective(trial):
    config = Module.Config()

    config.output_dir = None

    module = Module(config)
    # Filenames for each trial must be made unique in order to access each
    # checkpoint.
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        os.path.join("TODO", "trial_{}".format(trial.number), "{epoch}"),
        monitor="val_acc",
    )

    # The default logger in PyTorch Lightning writes to event files to be
    # consumed by TensorBoard. We don't use any logger here as it requires us
    # to implement several abstract methods. Instead we setup a simple
    # callback, that saves metrics from each validation step.
    metrics_callback = MetricsCallback()
    trainer = pl.Trainer(
        logger=False,
        checkpoint_callback=checkpoint_callback,
        max_epochs=21,
        gpus=1,
        callbacks=[metrics_callback],
        early_stop_callback=PyTorchLightningPruningCallback(
            trial, monitor="accuracy"),
    )

    logger = CustomLogger(
        save_dir=config.training_config.output_dir,
        name=config.training_config.experiment_name,
        version=f"seed={config.training_config.seed}",
    )

    period = max(1, config.training_config.n_epochs // 5)
    trainer = pl.Trainer(
        gpus=1,
        gradient_clip_val=50.0,
        max_epochs=config.training_config.n_epochs,
        check_val_every_n_epoch=period,
        num_sanity_val_steps=0,
        checkpoint_callback=pl.callbacks.ModelCheckpoint(
            filepath=os.path.join(logger.log_dir, "checkpoints"),
            save_top_k=-1,
            save_last=True,
            run_eval=config.training_config.run_eval,
        ),
        logger=logger,
    )

    trainer.fit(module)

    return metrics_callback.metrics[-1]["val_acc"].item()
Beispiel #18
0
    def objective(trial: optuna.trial.Trial) -> float:

        trainer = pl.Trainer(
            max_epochs=2,
            enable_checkpointing=False,
            callbacks=[PyTorchLightningPruningCallback(trial, monitor="accuracy")],
        )

        model = Model()
        trainer.fit(model)

        return 1.0
Beispiel #19
0
def objective(trial):
    """
    Optuna function to optimize
    
    See https://github.com/optuna/optuna/blob/master/examples/pytorch_lightning_simple.py
    """
    # sample
    hidden_size_exp = trial.suggest_int("hidden_size_exp", 1, 8)
    hidden_size = 2**hidden_size_exp

    layers = trial.suggest_int("layers", 1, 12)

    # Load model
    pt_model = m_fn(xs, ys, hidden_size, layers)
    model_name = type(pt_model).__name__

    # Wrap in lightning
    patience = 2
    model = PL_MODEL(
        pt_model,
        lr=3e-4,
        patience=patience,
    ).to(device)

    save_dir = f"../outputs/{timestamp}/{dataset_name}_{model_name}/{trial.number}"
    Path(save_dir).mkdir(exist_ok=True, parents=True)
    trainer = pl.Trainer(
        # Training length
        min_epochs=2,
        max_epochs=40,
        limit_train_batches=max_iters // batch_size,
        limit_val_batches=max_iters // batch_size // 5,
        # Misc
        gradient_clip_val=20,
        terminate_on_nan=True,
        # GPU
        gpus=1,
        amp_level='O1',
        precision=16,
        # Callbacks
        default_root_dir=save_dir,
        logger=False,
        callbacks=[
            EarlyStopping(monitor='loss/val', patience=patience * 2),
            PyTorchLightningPruningCallback(trial, monitor="loss/val")
        ],
    )
    trainer.fit(model, dl_train, dl_val)

    # Run on all val data, using test mode
    r = trainer.test(model, test_dataloaders=dl_val, verbose=False)
    return r[0]['loss/test']
Beispiel #20
0
def objective(trial):
    # sample
    hidden_size_exp = trial.suggest_int("hidden_size_exp", 2, 8)
    hidden_size = 2**hidden_size_exp
    
    layers = trial.suggest_int("layers", 2, 12)
    
    # Load model
    pt_model = m_fn(xs, ys, hidden_size, layers)
    model_name = type(pt_model).__name__
    
    # Wrap in lightning
    patience = 2
    model = PL_MODEL(pt_model,
                     lr=3e-4, patience=patience,
                    weight_decay=4e-5
                    ).to(device)

    
    # The default logger in PyTorch Lightning writes to event files to be consumed by
    # TensorBoard. We don't use any logger here as it requires us to implement several abstract
    # methods. Instead we setup a simple callback, that saves metrics from each validation step.
#     metrics_callback = MetricsCallback()
    
    save_dir = f"../outputs/{timestamp}/{dataset_name}_{model_name}/{trial.number}"
    Path(save_dir).mkdir(exist_ok=True, parents=True)
    trainer = pl.Trainer(
        # Training length
        min_epochs=2,
        max_epochs=100,
        limit_train_batches=max_iters//batch_size,
        limit_val_batches=max_iters//batch_size//5,
        # Misc
        gradient_clip_val=20,
        terminate_on_nan=True,
        # GPU
        gpus=1,
        amp_level='O1',
        precision=16,
        # Callbacks
        default_root_dir=save_dir,
        logger=False,
        callbacks=[
#             metrics_callback, 
                   EarlyStopping(monitor='loss/val', patience=patience * 2),
                   PyTorchLightningPruningCallback(trial, monitor="loss/val")],
    )
    trainer.fit(model, dl_train, dl_val)
    
    # Run on all val data, using test mode
    r = trainer.test(model, test_dataloader=dl_val, verbose=False)
    return r[0]['loss/test']
Beispiel #21
0
    def objective(trial: optuna.trial.Trial) -> float:

        trainer = pl.Trainer(
            max_epochs=1,
            accelerator="ddp_cpu",
            num_processes=2,
            checkpoint_callback=False,
            callbacks=[PyTorchLightningPruningCallback(trial, monitor="accuracy")],
        )

        model = ModelDDP()
        trainer.fit(model)

        return 1.0
Beispiel #22
0
    def objective(trial):
        # type: (optuna.trial.Trial) -> float

        trainer = pl.Trainer(
            early_stop_callback=PyTorchLightningPruningCallback(trial, monitor="accuracy"),
            min_epochs=0,  # Required to fire the callback after the first epoch.
            max_epochs=2,
            checkpoint_callback=False,
        )

        model = Model()
        trainer.fit(model)

        return 1.0
Beispiel #23
0
    def __call__(self, trial):

        # The default logger in PyTorch Lightning writes to event files
        # to be consumed by TensorBoard. We don't use any logger here as
        # it requires us to implement several abstract methods. Instead
        # we setup a simple callback, that saves metrics from each
        # validation step.
        metrics_callback = MetricsCallback()

        # Define parameters
        parameters = {
            'n_input':
            self.n_input,
            'n_classes':
            self.n_classes,
            'n_layers':
            trial.suggest_int('n_layers', *self.bounds['n_layers']),
            'dropout':
            trial.suggest_uniform('dropout', *self.bounds['dropout']),
            'batch_size':
            trial.suggest_int('batch_size', *self.bounds['batch_size']),
            'learning_rate':
            trial.suggest_float('learning_rate',
                                *self.bounds['learning_rate'],
                                log=True),
            'max_epochs':
            trial.suggest_int('max_epochs', *self.bounds['max_epochs'])
        }
        for i in range(parameters['n_layers']):
            parameters['n_units_l{}'.format(i)] = trial.suggest_int(
                'n_units_l{}'.format(i), *self.bounds['n_units_l'], log=True)

        # Construct trainer object and train
        trainer = Trainer(
            logger=False,
            checkpoint_callback=False,
            distributed_backend='dp',
            max_epochs=parameters['max_epochs'],
            verbose=False,
            gpus=-1 if self.use_gpu else None,
            callbacks=[metrics_callback],
            early_stop_callback=PyTorchLightningPruningCallback(
                trial, monitor="val_loss"),
        )

        model = LightningNet(parameters, self.data)
        trainer.fit(model)

        return metrics_callback.metrics[-1]["val_loss"]
Beispiel #24
0
    def objective(trial):
        # type: (optuna.trial.Trial) -> float

        trainer = pl.Trainer(
            early_stop_callback=PyTorchLightningPruningCallback(
                trial, monitor='accuracy'),
            min_nb_epochs=
            0,  # Required to fire the callback after the first epoch.
            max_nb_epochs=2,
        )
        trainer.checkpoint_callback = None  # Disable unrelated checkpoint callbacks.

        model = Model()
        trainer.fit(model)

        return 1.0
Beispiel #25
0
def objective(trial: Trial):
    model = EEGNetHPO(trial)
    data = EEGData()

    checkpoint_callback = pl.callbacks.ModelCheckpoint(f'trial#{trial.number}')
    metrics_callback = MetricCallback()
    trainer = pl.Trainer(
        logger=False,
        callbacks=[metrics_callback, PyTorchLightningPruningCallback(trial, 'val_loss')],
        checkpoint_callback=checkpoint_callback,
        max_epochs=400,
        progress_bar_refresh_rate=0,
        weights_summary=None
    )

    trainer.fit(model, datamodule=data)
    return metrics_callback.metric
Beispiel #26
0
    def objective(trial: optuna.Trial):
        log_dir = os.path.join(args.log_dir, 'trial_{}'.format(trial.number))
        checkpoint_callback = pl.callbacks.ModelCheckpoint(dirpath=log_dir,
                                                           monitor='val_ce',
                                                           mode='min')

        data = SemEvalDataModule(path_train=args.path_train,
                                 path_val=args.path_val,
                                 batch_size=trial.suggest_categorical(
                                     'batch_size', choices=[16, 32, 64]),
                                 num_workers=args.workers)
        data.prepare_data()
        data.setup('fit')

        epochs = trial.suggest_categorical('epochs', choices=[3, 4, 5])
        lr_bert = trial.suggest_loguniform('lr_bert', 1e-6, 1e-4)
        lr_class = trial.suggest_loguniform('lr_class', 1e-5, 1e-3)
        weight_decay = trial.suggest_loguniform('weight_decay', 1e-3, 1e-1)

        total_steps = epochs * len(data.data_train)
        effective_steps = total_steps // (min(args.gpus, 1) * args.num_nodes *
                                          args.accumulate_grad_batches)

        model = SentBert(out_classes=3,
                         lr_bert=lr_bert,
                         lr_class=lr_class,
                         weight_decay=weight_decay,
                         train_steps=effective_steps)
        metrics_callback = MetricsCallback()
        pruning_callback = PyTorchLightningPruningCallback(trial,
                                                           monitor='val_ce')
        trainer = pl.Trainer.from_argparse_args(args,
                                                default_root_dir=args.log_dir,
                                                max_epochs=epochs,
                                                checkpoint_callback=True,
                                                accelerator='ddp',
                                                auto_select_gpus=True,
                                                num_sanity_val_steps=0,
                                                profiler='simple',
                                                callbacks=[
                                                    checkpoint_callback,
                                                    metrics_callback,
                                                    pruning_callback
                                                ])
        trainer.fit(model=model, datamodule=data)
        return metrics_callback.metrics[-1]['val_ce'].item()
Beispiel #27
0
def objective_for_binary_unet(args, trial: optuna.trial.Trial):
    args.lr = trial.suggest_loguniform("lr", low=1e-5, high=1e-2)
    args.edge_weight = trial.suggest_uniform("edge_weight", low=1, high=5)
    args.wf = trial.suggest_int("wf", low=2, high=4)
    args.depth = trial.suggest_int("depth", low=4, high=6)

    pl_pruning_callback = PyTorchLightningPruningCallback(
        trial, "val/f1_score")
    ckpt_callback = train_binary_unet_model(args,
                                            callbacks=[pl_pruning_callback])

    best_f1_score = ckpt_callback.best_model_score.detach().cpu().numpy().item(
    )
    trial.set_user_attr("best_val_f1", best_f1_score)
    trial.set_user_attr("best_model_path", ckpt_callback.best_model_path)

    return best_f1_score
Beispiel #28
0
def objective(trial: optuna.trial.Trial) -> float:

    dataset = wds.WebDataset("/run/media/jacob/data/FACT_Dataset/fact-gamma-10-{0000..0062}.tar").shuffle(20000).decode()
    dataset_2 = wds.WebDataset("/run/media/jacob/data/FACT_Dataset/fact-proton-10-{0000..0010}.tar").shuffle(20000).decode()
    test_dataset_2 = wds.WebDataset("/run/media/jacob/data/FACT_Dataset/fact-gamma-10-{0063..0072}.tar").decode()
    test_dataset = wds.WebDataset("/run/media/jacob/data/FACT_Dataset/fact-proton-10-{0011..0013}.tar").decode()
    dataset = SampleEqually([dataset, dataset_2])
    test_dataset = SampleEqually([test_dataset_2, test_dataset])

    train_loader = DataLoader(dataset, num_workers=16, batch_size=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, num_workers=4, batch_size=1, pin_memory=True)

    # We optimize the number of layers, hidden units in each layer and dropouts.
    config = {
        "sample_ratio_one": trial.suggest_uniform("sample_ratio_one", 0.1, 0.9),
        "sample_radius_one": trial.suggest_uniform("sample_radius_one", 0.1, 0.9),
        "sample_max_neighbor": trial.suggest_int("sample_max_neighbor", 8, 72),
        "sample_ratio_two": trial.suggest_uniform("sample_ratio_two", 0.1, 0.9),
        "sample_radius_two": trial.suggest_uniform("sample_radius_two", 0.1, 0.9),
        "fc_1": trial.suggest_int("fc_1", 128, 256),
        "fc_1_out": trial.suggest_int("fc_1_out", 32, 128),
        "fc_2_out": trial.suggest_int("fc_2_out", 16, 96),
        "dropout": trial.suggest_uniform("dropout", 0.1, 0.9),
    }

    num_classes = 2
    import pytorch_lightning as pl
    model = LitPointNet2(num_classes, lr=0.0001, config=config)

    trainer = pl.Trainer(
        logger=True,
        limit_val_batches=10000,
        limit_train_batches=10000,
        checkpoint_callback=False,
        auto_lr_find=True,
        max_epochs=20,
        gpus=1,
        callbacks=[PyTorchLightningPruningCallback(trial, monitor="val/loss")],
    )
    trainer.logger.log_hyperparams(config)
    trainer.tune(model=model, train_dataloader=train_loader, val_dataloaders=test_loader)
    trainer.fit(model=model, train_dataloader=train_loader, val_dataloaders=test_loader)

    return trainer.callback_metrics["val/loss"].item()
Beispiel #29
0
def objective(trial):
    # Filenames for each trial must be made unique in order to access each checkpoint.
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        os.path.join(MODEL_DIR, "trial_{}".format(trial.number), "{epoch}"), monitor="val_acc"
    )

    trainer = pl.Trainer(
        logger=False,
        limit_val_batches=PERCENT_VALID_EXAMPLES,
        checkpoint_callback=checkpoint_callback,
        max_epochs=EPOCHS,
        gpus=1 if torch.cuda.is_available() else None,
        callbacks=[PyTorchLightningPruningCallback(trial, monitor="val_acc", mode="max")],
    )

    model = LightningNet(trial)
    trainer.fit(model)

    return trainer.callback_metrics["val_acc"].item()
Beispiel #30
0
        def objective(trial):
            model = BaseMLPModel(
                trial=trial,
                hparams=hparams,
                input_size=sample_size * len(train_features),
                sample_size=sample_size,
                output_size=output_size,
                station_name=station_name,
                target=target,
                features=train_features,
                features_periodic=train_features_periodic,
                features_nonperiodic=train_features_nonperiodic,
                train_dataset=train_dataset,
                val_dataset=val_dataset,
                test_dataset=test_dataset,
                scaler_X=train_valid_dataset.scaler_X,
                scaler_Y=train_valid_dataset.scaler_Y,
                output_dir=output_dir)

            # most basic trainer, uses good defaults
            trainer = Trainer(gpus=1 if torch.cuda.is_available() else None,
                              precision=32,
                              min_epochs=1,
                              max_epochs=20,
                              default_root_dir=output_dir,
                              fast_dev_run=fast_dev_run,
                              logger=True,
                              checkpoint_callback=False,
                              callbacks=[
                                  PyTorchLightningPruningCallback(
                                      trial, monitor="valid/MSE")
                              ])

            trainer.fit(model)

            # Don't Log
            # hyperparameters = model.hparams
            # trainer.logger.log_hyperparams(hyperparameters)

            return trainer.callback_metrics.get("valid/MSE")