Esempio n. 1
0
def auto_lr_find(cfg: DictConfig,
                 model: pl.LightningModule) -> Optional[float]:
    r"""Performs automatic learning rate selection using PyTorch Lightning.
    This is essentially a wrapper function that invokes PyTorch Lightning's
    auto LR selection using Hydra inputs. The model's learning rate is
    automatically set to the selected learning rate, and the selected
    learning rate is logged. If possible, a plot of the learning rate
    selection curve will also be produced.

    Args:

        cfg (DictConfig):
            The Hydra config

        model (LightningModule):
            The model to select a learning rate for.

    Returns:
        The learning rate if one was found, otherwise ``None``.
    """
    lr = None
    try:
        model.prepare_data()
        lr_trainer: pl.Trainer = HydraMixin.instantiate(cfg.trainer)
        if hasattr(lr_trainer, "tuner"):
            # pl >= 1.0.0
            lr_finder = lr_trainer.tuner.lr_find(model)
        else:
            # pl <= 1.0.0
            lr_finder = lr_trainer.lr_find(model)  # type: ignore
        lr = lr_finder.suggestion()
        log.info("Found learning rate %f", lr)
        cfg.optimizer["params"]["lr"] = lr

        # save lr curve figure
        try:
            cwd = os.getcwd()
            path = os.path.join(cwd, "lr_curve.png")
            fig = lr_finder.plot(suggest=True)
            log.info("Saving LR curve to %s", path)
            fig.savefig(path)
            plt.close()
        except Exception as err:
            log.exception(err)
            log.info("No learning rate curve was saved")

    except Exception as err:
        log.exception(err)
        log.info(
            "Learning rate auto-find failed, using learning rate specified in config"
        )
        _exceptions.append(err)
    finally:
        cfg.trainer["params"]["auto_lr_find"] = False

    return lr
Esempio n. 2
0
def test_recursive_instantiate_list(hydra, key):
    cfg = {
        key:
        "torch.nn.Sequential",
        "params": [
            {
                key: "torch.nn.Linear",
                "params": {
                    "in_features": 10,
                    "out_features": 10,
                },
            },
            {
                key: "torch.nn.Linear",
                "params": {
                    "in_features": 10,
                    "out_features": 10,
                },
            },
        ],
    }

    model = HydraMixin.instantiate(cfg)
    assert isinstance(model, torch.nn.Sequential)
Esempio n. 3
0
def main(
    cfg: DictConfig,
    process_results_fn: Optional[Callable[[Tuple[Any, Any]],
                                          Any]] = None) -> None:
    r"""Main method for training/testing of a model using PyTorch Lightning and Hydra.

    This method is robust to exceptions (other than :class:`SystemExit` or :class:`KeyboardInterrupt`),
    making it useful when using Hydra's multirun feature. If one combination of hyperparameters results in
    an exception, other combinations will still be attempted. This behavior can be overriden by providing
    a ``check_exceptions`` bool value under ``config.trainer``. Such an override is useful when writing tests.

    Automatic learning rate selection is handled using :func:`auto_lr_find`.

    Training / testing is automatically performed based on the configuration keys present in ``config.dataset``.

    Additionally, the following Hydra overrides are supported:
        * ``trainer.load_from_checkpoint`` - Load model weights (but not training state) from a checkpoint
        * ``trainer.test_only`` - Skip training even if a training dataset is given

    Args:

        cfg (DictConfig):
            The Hydra config

        process_results_fn (callable, optional):
            If given, call ``process_results_fn`` on the ``(train_results, test_results)`` tuple returned by
            this method. This is useful for processing training/testing results into a scalar return value
            when using an optimization sweeper (like Ax).

    Example::

        >>> # define main method as per Hydra that calls combustion.main()
        >>> @hydra.main(config_path="./conf", config_name="config")
        >>> def main(cfg):
        >>>     combustion.main(cfg)
        >>>
        >>> if __name__ == "__main__":
        >>>     main()

    Example (multirun from config file)::

        >>> combustion.initialize(config_path="./conf", config_name="config")
        >>>
        >>> @hydra.main(config_path="./conf", config_name="config")
        >>> def main(cfg):
        >>>     return combustion.main(cfg)
        >>>
        >>> if __name__ == "__main__":
        >>>     main()
        >>>     combustion.check_exceptions()

    Example (inference-time command)::
        ``python -m my_module trainer.load_from_checkpoint=foo.ckpt trainer.test_only=True``
    """
    train_results, test_results = None, None
    model: pl.LightningModule
    trainer: Optional[pl.Trainer] = None

    try:
        _log_versions()
        log.info("Configuration: \n%s", OmegaConf.to_yaml(cfg))

        deterministic = cfg.trainer.params.get("deterministic", False)
        if deterministic:
            seed_val = 42
            log.info(
                "Determinstic training requested, seeding everything with %d",
                seed_val)
            pl.seed_everything(seed_val)

        # instantiate model (and optimizer) selected in yaml
        # see pytorch lightning docs: https://pytorch-lightning.rtfd.io/en/latest
        try:
            model = HydraMixin.create_model(cfg)
        except RuntimeError:
            log.error("Failed to instantiate model")
            log.error("Model Config:\n%s", cfg.model.to_yaml())
            raise

        # preprocess data
        preprocess_training_path = cfg.trainer.get("preprocess_train_path",
                                                   None)
        preprocess_training_epochs = cfg.trainer.get("preprocess_train_epochs",
                                                     1)
        if preprocess_training_path is not None:
            if not os.path.isdir(preprocess_training_path):
                raise NotADirectoryError(preprocess_training_path)

            # clean non-empty directory
            if os.listdir(preprocess_training_path):
                pattern = preprocess_training_path + "*example*.pth"
                files = glob(pattern)
                log.info("Cleaning destination directory")
                [os.remove(f) for f in files]

            log.info("Writing preprocessed training set to %s",
                     preprocess_training_path)
            model.prepare_data()
            train_ds = model.train_dataloader().dataset  # type: ignore
            for i in range(preprocess_training_epochs):
                save_torch(train_ds, preprocess_training_path,
                           f"epoch_{i}_example_")
            log.info(
                "Finished writing preprocessed training set. Update Hydra config and rerun training."
            )
            return

        # load model checkpoint if requested and not resume_from_checkpoint
        load_from_checkpoint = cfg.trainer.get("load_from_checkpoint", None)
        resume_from_checkpoint = cfg.trainer.params.get(
            "resume_from_checkpoint", None)
        if load_from_checkpoint is not None:
            if resume_from_checkpoint is not None:
                log.info(
                    "Skipping checkpoint loading because resume_from_checkpoint was given"
                )
            else:
                log.info("Loading checkpoint %s", load_from_checkpoint)
                # TODO this still needs some work
                model = model.__class__.load_from_checkpoint(
                    load_from_checkpoint)
                model._hparams = cfg

        # run auto learning rate find if requested
        lr_find = cfg.trainer.params.get("auto_lr_find", False)
        fast_dev_run = cfg.trainer.params.get("fast_dev_run", False)
        test_only = cfg.trainer.get("test_only", False)
        if lr_find:
            if fast_dev_run:
                log.info(
                    "Skipping auto learning rate find when fast_dev_run is true"
                )
            elif test_only:
                log.info(
                    "Skipping auto learning rate find when test_only is true")
            else:
                auto_lr_find(cfg, model)

        # instantiate trainer with params as selected in yaml
        # handles tensorboard, checkpointing, etc
        trainer = HydraMixin.instantiate(cfg.trainer)
        trainer = typing.cast(pl.Trainer, trainer)

        # train
        if "train" in cfg.dataset:
            if test_only:
                log.info("test_only flag was set, skipping training")
            else:
                log.info("Starting training")
                train_results = trainer.fit(model)
                log.info("Train results: %s", train_results)
        else:
            log.info("No training dataset given")

        # test
        if "test" in cfg.dataset:
            log.info("Starting testing")
            test_results = trainer.test(model)
            log.info("Test results: %s", test_results)
        else:
            log.info("No test dataset given")

        log.info("Finished!")

    # guard to continue when using Hydra multiruns
    # SystemExit/KeyboardInterrupt are not caught and will trigger shutdown
    except Exception as err:
        catch_exceptions = cfg.trainer.get("catch_exceptions", True)
        if catch_exceptions:
            log.exception(err)
            _exceptions.append(err)
        else:
            raise err

    finally:
        # flush logger to ensure free memory for next run
        if trainer is not None:
            experiment = trainer.logger.experiment  # type: ignore
            if experiment is not None and hasattr(experiment, "flush"):
                experiment.flush()
                log.debug("Flushed experiment to disk")
            if experiment is not None and hasattr(experiment, "close"):
                experiment.close()
                log.debug("Closed experiment writer")

    # postprocess results if desired (e.g. to scalars for bayesian optimization)
    if process_results_fn is not None:
        log.debug("Running results postprocessing")
        output = process_results_fn(train_results,
                                    test_results)  # type: ignore
    else:
        output = train_results, test_results

    return output  # type: ignore
Esempio n. 4
0
 def trainer(self, cfg):
     return HydraMixin.instantiate(cfg.trainer)
Esempio n. 5
0
def test_instantiate_report_error(hydra, target, exception):
    cfg = {"_target_": target}
    with pytest.raises(exception):
        HydraMixin.instantiate(cfg)