Esempio n. 1
0
    def test_train_validate(self, cfg, trainer):
        for key in ["test"]:
            if key in cfg.dataset.keys():
                del cfg.dataset[key]

        model = HydraMixin.create_model(cfg)
        trainer.fit(model)
Esempio n. 2
0
def test_schedule_length_correct(torch, cfg, hydra, gpus, gpu_count,
                                 accum_grad_batches, num_nodes):
    cfg["trainer"]["params"]["gpus"] = gpus
    cfg["trainer"]["params"]["accumulate_grad_batches"] = accum_grad_batches
    cfg["trainer"]["params"]["num_nodes"] = num_nodes
    cfg["trainer"]["params"]["fast_dev_run"] = False
    cfg["trainer"]["params"]["max_epochs"] = 10
    cfg["dataset"]["train"]["params"]["size"] = 10000
    cfg["dataset"]["validate"]["params"]["size"] = 10000
    cfg["dataset"]["test"]["params"]["size"] = 10000
    cfg["model"]["params"]

    model = HydraMixin.create_model(cfg)
    model.get_datasets()
    _, schedulers = model.configure_optimizers()
    schedule = schedulers[0]
    assert schedule["interval"] == "step"
    assert schedule["frequency"] == 1

    scheduler = schedule["scheduler"]

    max_lr = cfg.schedule["params"]["max_lr"]
    div_factor = cfg.schedule["params"]["div_factor"]
    cfg.schedule["params"]["final_div_factor"]
    num_epochs = cfg.trainer["params"]["max_epochs"]
    expected_steps = (math.ceil(
        len(model.train_dataloader()) /
        (gpu_count * accum_grad_batches * num_nodes)) * num_epochs)
    cfg.schedule["params"]["pct_start"]

    assert abs(scheduler.get_last_lr()[0] - max_lr / div_factor) <= 1e-5
    assert scheduler.total_steps == expected_steps
Esempio n. 3
0
def test_configure_multiple_optimizers(torch, cfg, hydra, scheduled):
    num_optimizers = 2
    cfg["optimizer"] = [
        cfg["optimizer"],
    ] * num_optimizers

    if not scheduled:
        del cfg["schedule"]
    else:
        cfg["schedule"] = [
            cfg["schedule"],
        ] * num_optimizers

    model = HydraMixin.create_model(cfg)
    model.get_optimizer_parameters = lambda idx: model.parameters()
    model.get_datasets()

    if not scheduled:
        optims = model.configure_optimizers()
        assert len(optims) == 2
        assert isinstance(optims[0], torch.optim.Adam)
        assert isinstance(optims[1], torch.optim.Adam)
    else:
        optims, schedulers = model.configure_optimizers()
        assert len(optims) == 2
        assert len(schedulers) == 2
        assert isinstance(optims[0], torch.optim.Adam)
        assert isinstance(optims[1], torch.optim.Adam)
        assert isinstance(schedulers[0], dict)
        assert isinstance(schedulers[0]["scheduler"],
                          torch.optim.lr_scheduler.OneCycleLR)
        assert isinstance(schedulers[1], dict)
        assert isinstance(schedulers[1]["scheduler"],
                          torch.optim.lr_scheduler.OneCycleLR)
Esempio n. 4
0
def test_get_lr(scheduled, cfg, hydra):
    if not scheduled:
        del cfg["schedule"]

    model = HydraMixin.create_model(cfg)
    model.get_datasets()
    model.configure_optimizers()
    assert model.get_lr() == cfg["optimizer"]["params"]["lr"]
Esempio n. 5
0
def test_configure_optimizer_warn_no_monitor_key(torch, cfg, hydra):
    cfg["model"]["params"]
    del cfg["schedule"]["monitor"]
    model = HydraMixin.create_model(cfg)
    model.get_datasets()

    with pytest.warns(UserWarning):
        optims, schedulers = model.configure_optimizers()
Esempio n. 6
0
def test_configure_optimizer_missing_keys(torch, cfg, hydra, missing):
    cfg["model"]["params"]
    del cfg["schedule"][missing]
    model = HydraMixin.create_model(cfg)
    model.get_datasets()

    with pytest.raises(pl.utilities.exceptions.MisconfigurationException):
        optims, schedulers = model.configure_optimizers()
Esempio n. 7
0
def auto_lr_find(cfg: DictConfig,
                 model: pl.LightningModule) -> Optional[float]:
    r"""Performs automatic learning rate selection using PyTorch Lightning.
    This is essentially a wrapper function that invokes PyTorch Lightning's
    auto LR selection using Hydra inputs. The model's learning rate is
    automatically set to the selected learning rate, and the selected
    learning rate is logged. If possible, a plot of the learning rate
    selection curve will also be produced.

    Args:

        cfg (DictConfig):
            The Hydra config

        model (LightningModule):
            The model to select a learning rate for.

    Returns:
        The learning rate if one was found, otherwise ``None``.
    """
    lr = None
    try:
        model.prepare_data()
        lr_trainer: pl.Trainer = HydraMixin.instantiate(cfg.trainer)
        if hasattr(lr_trainer, "tuner"):
            # pl >= 1.0.0
            lr_finder = lr_trainer.tuner.lr_find(model)
        else:
            # pl <= 1.0.0
            lr_finder = lr_trainer.lr_find(model)  # type: ignore
        lr = lr_finder.suggestion()
        log.info("Found learning rate %f", lr)
        cfg.optimizer["params"]["lr"] = lr

        # save lr curve figure
        try:
            cwd = os.getcwd()
            path = os.path.join(cwd, "lr_curve.png")
            fig = lr_finder.plot(suggest=True)
            log.info("Saving LR curve to %s", path)
            fig.savefig(path)
            plt.close()
        except Exception as err:
            log.exception(err)
            log.info("No learning rate curve was saved")

    except Exception as err:
        log.exception(err)
        log.info(
            "Learning rate auto-find failed, using learning rate specified in config"
        )
        _exceptions.append(err)
    finally:
        cfg.trainer["params"]["auto_lr_find"] = False

    return lr
Esempio n. 8
0
def test_get_datasets_forced(cfg, force, mocker):
    model = HydraMixin.create_model(cfg)
    mock = mocker.MagicMock(spec_set=bool, name="train_ds")
    model.get_datasets()
    model.train_ds = mock
    model.get_datasets(force=force)

    if force:
        assert model.train_ds != mock
    else:
        assert model.train_ds == mock
Esempio n. 9
0
def test_test_dataloader(cfg, present):
    if not present:
        del cfg.dataset["test"]
    model = HydraMixin.create_model(cfg)
    model.get_datasets()
    dataloader = model.test_dataloader()

    if present:
        assert isinstance(dataloader, torch.utils.data.DataLoader)
    else:
        assert dataloader is None
Esempio n. 10
0
def test_recursive_instantiate(cfg, params, key):
    cfg["model"]["params"]["criterion"] = {
        key: "torch.nn.BCELoss",
        "params": {}
    }

    if params:
        cfg["model"]["params"]["criterion"]["params"] = {"reduction": "none"}

    model = HydraMixin.create_model(cfg)
    assert isinstance(model.criterion, torch.nn.BCELoss)
    assert model.hparams.keys() == cfg.keys()
Esempio n. 11
0
def test_configure_optimizer(torch, cfg, hydra, scheduled):
    if not scheduled:
        del cfg["schedule"]
    model = HydraMixin.create_model(cfg)
    model.get_datasets()

    if not scheduled:
        optim = model.configure_optimizers()
        assert isinstance(optim, torch.optim.Adam)
    else:
        optims, schedulers = model.configure_optimizers()
        assert isinstance(optims[0], torch.optim.Adam)
        assert isinstance(schedulers[0], dict)
        assert isinstance(schedulers[0]["scheduler"],
                          torch.optim.lr_scheduler.OneCycleLR)
Esempio n. 12
0
def test_dataloader_override_batch_size(cfg, subset):
    model_batch_size = cfg.dataset.train["batch_size"]
    new_batch_size = model_batch_size + 1

    cfg.dataset[subset]["batch_size"] = new_batch_size

    model = HydraMixin.create_model(cfg)
    model.get_datasets()

    if subset == "test":
        dataloader = model.test_dataloader()
    else:
        dataloader = model.val_dataloader()
    train_dl = model.train_dataloader()

    assert train_dl.batch_size == model_batch_size
    assert dataloader.batch_size == new_batch_size
Esempio n. 13
0
def test_get_lr_multiple_optimizers(scheduled, cfg, hydra):
    num_optimizers = 2
    cfg["optimizer"] = [
        cfg["optimizer"],
    ] * num_optimizers

    if not scheduled:
        del cfg["schedule"]
    else:
        cfg["schedule"] = [
            cfg["schedule"],
        ] * num_optimizers

    model = HydraMixin.create_model(cfg)
    model.get_optimizer_parameters = lambda idx: model.parameters()
    model.configure_optimizers()
    lr1 = model.get_lr(0, 0)
    lr2 = model.get_lr(1, 0)
    assert lr1 == cfg["optimizer"][0]["params"]["lr"]
    assert lr2 == cfg["optimizer"][1]["params"]["lr"]
Esempio n. 14
0
def test_dataloader_from_subset(cfg, subset, split):
    if subset == "test":
        if split:
            cfg.dataset["test"] = 10
        del cfg.dataset["validate"]
    else:
        if split:
            cfg.dataset["validate"] = 10
        del cfg.dataset["test"]

    model = HydraMixin.create_model(cfg)
    model.get_datasets()

    if subset == "test":
        dataloader = model.test_dataloader()
    else:
        dataloader = model.val_dataloader()
    train_dl = model.train_dataloader()

    # TODO should check shuffle=False, but this is hidden in dataloader.sampler
    assert isinstance(dataloader, torch.utils.data.DataLoader)
    for key in ["pin_memory", "batch_size", "num_workers", "drop_last"]:
        assert getattr(dataloader, key) == getattr(train_dl, key)
Esempio n. 15
0
def test_recursive_instantiate_list(hydra, key):
    cfg = {
        key:
        "torch.nn.Sequential",
        "params": [
            {
                key: "torch.nn.Linear",
                "params": {
                    "in_features": 10,
                    "out_features": 10,
                },
            },
            {
                key: "torch.nn.Linear",
                "params": {
                    "in_features": 10,
                    "out_features": 10,
                },
            },
        ],
    }

    model = HydraMixin.instantiate(cfg)
    assert isinstance(model, torch.nn.Sequential)
Esempio n. 16
0
 def test_train_validate_test(self, cfg, trainer):
     model = HydraMixin.create_model(cfg)
     trainer.fit(model)
Esempio n. 17
0
def test_recursive_instantiate_preserves_cfg(cfg):
    key = {"_target_": "torch.nn.BCELoss", "params": {"reduction": "none"}}
    cfg["model"]["params"]["criterion"] = key
    model = HydraMixin.create_model(cfg)
    assert "criterion" in model.hparams["model"]["params"].keys()
    assert model.hparams["model"]["params"]["criterion"] == key
Esempio n. 18
0
 def trainer(self, cfg):
     return HydraMixin.instantiate(cfg.trainer)
Esempio n. 19
0
def main(
    cfg: DictConfig,
    process_results_fn: Optional[Callable[[Tuple[Any, Any]],
                                          Any]] = None) -> None:
    r"""Main method for training/testing of a model using PyTorch Lightning and Hydra.

    This method is robust to exceptions (other than :class:`SystemExit` or :class:`KeyboardInterrupt`),
    making it useful when using Hydra's multirun feature. If one combination of hyperparameters results in
    an exception, other combinations will still be attempted. This behavior can be overriden by providing
    a ``check_exceptions`` bool value under ``config.trainer``. Such an override is useful when writing tests.

    Automatic learning rate selection is handled using :func:`auto_lr_find`.

    Training / testing is automatically performed based on the configuration keys present in ``config.dataset``.

    Additionally, the following Hydra overrides are supported:
        * ``trainer.load_from_checkpoint`` - Load model weights (but not training state) from a checkpoint
        * ``trainer.test_only`` - Skip training even if a training dataset is given

    Args:

        cfg (DictConfig):
            The Hydra config

        process_results_fn (callable, optional):
            If given, call ``process_results_fn`` on the ``(train_results, test_results)`` tuple returned by
            this method. This is useful for processing training/testing results into a scalar return value
            when using an optimization sweeper (like Ax).

    Example::

        >>> # define main method as per Hydra that calls combustion.main()
        >>> @hydra.main(config_path="./conf", config_name="config")
        >>> def main(cfg):
        >>>     combustion.main(cfg)
        >>>
        >>> if __name__ == "__main__":
        >>>     main()

    Example (multirun from config file)::

        >>> combustion.initialize(config_path="./conf", config_name="config")
        >>>
        >>> @hydra.main(config_path="./conf", config_name="config")
        >>> def main(cfg):
        >>>     return combustion.main(cfg)
        >>>
        >>> if __name__ == "__main__":
        >>>     main()
        >>>     combustion.check_exceptions()

    Example (inference-time command)::
        ``python -m my_module trainer.load_from_checkpoint=foo.ckpt trainer.test_only=True``
    """
    train_results, test_results = None, None
    model: pl.LightningModule
    trainer: Optional[pl.Trainer] = None

    try:
        _log_versions()
        log.info("Configuration: \n%s", OmegaConf.to_yaml(cfg))

        deterministic = cfg.trainer.params.get("deterministic", False)
        if deterministic:
            seed_val = 42
            log.info(
                "Determinstic training requested, seeding everything with %d",
                seed_val)
            pl.seed_everything(seed_val)

        # instantiate model (and optimizer) selected in yaml
        # see pytorch lightning docs: https://pytorch-lightning.rtfd.io/en/latest
        try:
            model = HydraMixin.create_model(cfg)
        except RuntimeError:
            log.error("Failed to instantiate model")
            log.error("Model Config:\n%s", cfg.model.to_yaml())
            raise

        # preprocess data
        preprocess_training_path = cfg.trainer.get("preprocess_train_path",
                                                   None)
        preprocess_training_epochs = cfg.trainer.get("preprocess_train_epochs",
                                                     1)
        if preprocess_training_path is not None:
            if not os.path.isdir(preprocess_training_path):
                raise NotADirectoryError(preprocess_training_path)

            # clean non-empty directory
            if os.listdir(preprocess_training_path):
                pattern = preprocess_training_path + "*example*.pth"
                files = glob(pattern)
                log.info("Cleaning destination directory")
                [os.remove(f) for f in files]

            log.info("Writing preprocessed training set to %s",
                     preprocess_training_path)
            model.prepare_data()
            train_ds = model.train_dataloader().dataset  # type: ignore
            for i in range(preprocess_training_epochs):
                save_torch(train_ds, preprocess_training_path,
                           f"epoch_{i}_example_")
            log.info(
                "Finished writing preprocessed training set. Update Hydra config and rerun training."
            )
            return

        # load model checkpoint if requested and not resume_from_checkpoint
        load_from_checkpoint = cfg.trainer.get("load_from_checkpoint", None)
        resume_from_checkpoint = cfg.trainer.params.get(
            "resume_from_checkpoint", None)
        if load_from_checkpoint is not None:
            if resume_from_checkpoint is not None:
                log.info(
                    "Skipping checkpoint loading because resume_from_checkpoint was given"
                )
            else:
                log.info("Loading checkpoint %s", load_from_checkpoint)
                # TODO this still needs some work
                model = model.__class__.load_from_checkpoint(
                    load_from_checkpoint)
                model._hparams = cfg

        # run auto learning rate find if requested
        lr_find = cfg.trainer.params.get("auto_lr_find", False)
        fast_dev_run = cfg.trainer.params.get("fast_dev_run", False)
        test_only = cfg.trainer.get("test_only", False)
        if lr_find:
            if fast_dev_run:
                log.info(
                    "Skipping auto learning rate find when fast_dev_run is true"
                )
            elif test_only:
                log.info(
                    "Skipping auto learning rate find when test_only is true")
            else:
                auto_lr_find(cfg, model)

        # instantiate trainer with params as selected in yaml
        # handles tensorboard, checkpointing, etc
        trainer = HydraMixin.instantiate(cfg.trainer)
        trainer = typing.cast(pl.Trainer, trainer)

        # train
        if "train" in cfg.dataset:
            if test_only:
                log.info("test_only flag was set, skipping training")
            else:
                log.info("Starting training")
                train_results = trainer.fit(model)
                log.info("Train results: %s", train_results)
        else:
            log.info("No training dataset given")

        # test
        if "test" in cfg.dataset:
            log.info("Starting testing")
            test_results = trainer.test(model)
            log.info("Test results: %s", test_results)
        else:
            log.info("No test dataset given")

        log.info("Finished!")

    # guard to continue when using Hydra multiruns
    # SystemExit/KeyboardInterrupt are not caught and will trigger shutdown
    except Exception as err:
        catch_exceptions = cfg.trainer.get("catch_exceptions", True)
        if catch_exceptions:
            log.exception(err)
            _exceptions.append(err)
        else:
            raise err

    finally:
        # flush logger to ensure free memory for next run
        if trainer is not None:
            experiment = trainer.logger.experiment  # type: ignore
            if experiment is not None and hasattr(experiment, "flush"):
                experiment.flush()
                log.debug("Flushed experiment to disk")
            if experiment is not None and hasattr(experiment, "close"):
                experiment.close()
                log.debug("Closed experiment writer")

    # postprocess results if desired (e.g. to scalars for bayesian optimization)
    if process_results_fn is not None:
        log.debug("Running results postprocessing")
        output = process_results_fn(train_results,
                                    test_results)  # type: ignore
    else:
        output = train_results, test_results

    return output  # type: ignore
Esempio n. 20
0
def test_instantiate_report_error(hydra, target, exception):
    cfg = {"_target_": target}
    with pytest.raises(exception):
        HydraMixin.instantiate(cfg)
Esempio n. 21
0
def test_get_datasets_missing_items(cfg, missing, present):
    for k in missing:
        del cfg.dataset[k]
    model = HydraMixin.create_model(cfg)
    model.get_datasets()
Esempio n. 22
0
def test_get_datasets(cfg, check):
    model = HydraMixin.create_model(cfg)
    model.get_datasets()
    assert hasattr(model, check)
    assert isinstance(getattr(model, check), torch.utils.data.Dataset)
Esempio n. 23
0
def test_create_model(cfg, hydra):
    model = HydraMixin.create_model(cfg)
    assert isinstance(model, Subclass)
    assert model.hparams.keys() == cfg.keys()