limit_train_batches=30,
    # val_check_interval=20,
    # limit_val_batches=1,
    # fast_dev_run=True,
    # logger=logger,
    # profiler=True,
    callbacks=[lr_logger],
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,
    loss=QuantileLoss(),
    log_interval=10,
    log_val_interval=1,
    reduce_on_plateau_patience=3,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

# # find optimal learning rate
# # remove logging and artificial epoch size
# tft.hparams.log_interval = -1
# tft.hparams.log_val_interval = -1
# trainer.limit_train_batches = 1.0
# # run learning rate finder
# res = trainer.tuner.lr_find(
Beispiel #2
0
Datei: TFT.py Projekt: NHQ/tempy
    max_epochs=1000,
    #min_epochs=100,
    gpus=0,
    weights_summary="top",
    gradient_clip_val=0.14578,
    limit_train_batches=30,
    # val_check_interval=20,
    # limit_val_batches=1,
    # fast_dev_run=True,
    # logger=logger,
    # profiler=True,
    callbacks=[lr_logger, early_stop_callback],
)

tft = TemporalFusionTransformer.load_from_checkpoint(
    "/home/johnny/tempy/lightning_logs/version_36/checkpoints/epoch=157-step=4739.ckpt"
)  #/home/johnny/tempy/lightning_logs/version_31/checkpoints/epoch=20-step=83.ckpt")#best_model_path)
"""
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.02,
    hidden_size=12,
    attention_head_size=6,
    dropout=0.1,
    hidden_continuous_size=12,
    output_size=8,
    loss=QuantileLoss(),
    log_interval=10,
    #log_val_interval=1,
    reduce_on_plateau_patience=10,
)
Beispiel #3
0
    def train(
        self,
        max_epochs=25,
        hidden_size=16,
        lstm_layers=1,
        dropout=0.1,
        attention_head_size=4,
        reduce_on_plateau_patience=4,
        hidden_continuous_size=8,
        learning_rate=1e-3,
        gradient_clip_val=0.1,
    ):
        # configure network and trainer
        # create dataloaders for model
        batch_size = 128
        train_dataloader = self.intern_training.to_dataloader(
            train=True, batch_size=batch_size)
        val_dataloader = self._intern_validation.to_dataloader(
            train=False, batch_size=batch_size * 10)

        pl.seed_everything(42)

        early_stop_callback = EarlyStopping(monitor="val_loss",
                                            min_delta=1e-4,
                                            patience=10,
                                            verbose=False,
                                            mode="min")
        # lr_logger = LearningRateMonitor()

        trainer = pl.Trainer(
            max_epochs=max_epochs,
            gpus=0,
            weights_summary=None,
            gradient_clip_val=gradient_clip_val,
            # limit_train_batches=30,  # coment in for training, running validation every 30 batches
            # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
            callbacks=[early_stop_callback],
        )

        self.model = TemporalFusionTransformer.from_dataset(
            self.intern_training,
            learning_rate=learning_rate,
            hidden_size=hidden_size,
            attention_head_size=attention_head_size,
            dropout=dropout,
            hidden_continuous_size=hidden_continuous_size,
            lstm_layers=lstm_layers,
            output_size=len(self.quantiles),  # 3 quantiles by default
            loss=QuantileLoss(self.quantiles),
            reduce_on_plateau_patience=reduce_on_plateau_patience,
        )

        # res = trainer.tuner.lr_find(
        #     self.model,
        #     train_dataloader=train_dataloader,
        #     val_dataloaders=val_dataloader,
        #     max_lr=10.0,
        #     min_lr=1e-6,
        # )

        # self.model = TemporalFusionTransformer.from_dataset(
        #     self.intern_training,
        #     learning_rate=res.suggestion(), # using the suggested learining rate
        #     hidden_size=hidden_size,
        #     attention_head_size=attention_head_size,
        #     dropout=dropout,
        #     hidden_continuous_size=hidden_continuous_size,
        #     output_size=len(self.quantiles),  # 3 quantiles by default
        #     loss=QuantileLoss(self.quantiles),
        #     reduce_on_plateau_patience=reduce_on_plateau_patience,
        # )

        # fit network
        trainer.fit(
            self.model,
            train_dataloader=train_dataloader,
            val_dataloaders=val_dataloader,
        )
    def objective(trial: optuna.Trial) -> float:
        # Filenames for each trial must be made unique in order to access each checkpoint.
        checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join(
            model_path, "trial_{}".format(trial.number), "{epoch}"),
                                                           monitor="val_loss")

        # The default logger in PyTorch Lightning writes to event files to be consumed by
        # TensorBoard. We don't use any logger here as it requires us to implement several abstract
        # methods. Instead we setup a simple callback, that saves metrics from each validation step.
        metrics_callback = MetricsCallback()
        learning_rate_callback = LearningRateMonitor()
        logger = TensorBoardLogger(log_dir,
                                   name="optuna",
                                   version=trial.number)
        gradient_clip_val = trial.suggest_loguniform("gradient_clip_val",
                                                     *gradient_clip_val_range)
        trainer = pl.Trainer(
            checkpoint_callback=checkpoint_callback,
            max_epochs=max_epochs,
            gradient_clip_val=gradient_clip_val,
            gpus=[0] if torch.cuda.is_available() else None,
            callbacks=[
                metrics_callback,
                learning_rate_callback,
                PyTorchLightningPruningCallback(trial, monitor="val_loss"),
            ],
            logger=logger,
            **trainer_kwargs,
        )

        # create model
        hidden_size = trial.suggest_int("hidden_size",
                                        *hidden_size_range,
                                        log=True)
        model = TemporalFusionTransformer.from_dataset(
            train_dataloader.dataset,
            dropout=trial.suggest_uniform("dropout", *dropout_range),
            hidden_size=hidden_size,
            hidden_continuous_size=trial.suggest_int(
                "hidden_continuous_size",
                hidden_continuous_size_range[0],
                min(hidden_continuous_size_range[1], hidden_size),
                log=True,
            ),
            attention_head_size=trial.suggest_int("attention_head_size",
                                                  *attention_head_size_range),
            log_interval=-1,
            **kwargs,
        )
        # find good learning rate
        if use_learning_rate_finder:
            lr_trainer = pl.Trainer(
                gradient_clip_val=gradient_clip_val,
                gpus=[0] if torch.cuda.is_available() else None,
                logger=False,
            )
            res = lr_trainer.tuner.lr_find(
                model,
                train_dataloader=train_dataloader,
                val_dataloaders=val_dataloader,
                early_stop_threshold=10000.0,
                min_lr=learning_rate_range[0],
                num_training=100,
                max_lr=learning_rate_range[1],
            )

            loss_finite = np.isfinite(res.results["loss"])
            lr_smoothed, loss_smoothed = sm.nonparametric.lowess(
                np.asarray(res.results["loss"])[loss_finite],
                np.asarray(res.results["lr"])[loss_finite],
                frac=1.0 / 10.0,
            )[10:-1].T
            optimal_idx = np.gradient(loss_smoothed).argmin()
            optimal_lr = lr_smoothed[optimal_idx]
            print(f"Using learning rate of {optimal_lr:.3g}")
            model.hparams.learning_rate = optimal_lr
        else:
            model.hparams.learning_rate = trial.suggest_loguniform(
                "learning_rate_range", *learning_rate_range)

        # fit
        trainer.fit(model,
                    train_dataloader=train_dataloader,
                    val_dataloaders=val_dataloader)

        # report result
        return metrics_callback.metrics[-1]["val_loss"].item()
Beispiel #5
0
    # of the gradient for recurrent neural networks
    gradient_clip_val=1e-3,
    limit_train_batches=30,
    # fast_dev_run=True,
    early_stop_callback=early_stop_callback,
    callbacks=[lr_logger],
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.15,
    hidden_size=16,  # most important hyperparameter apart from learning rate
    # number of attention heads. Set to up to 4 for large datasets
    attention_head_size=1,
    dropout=0.1,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,
    # reduce learning rate if no improvement in validation loss after x epochs
    # reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

# find optimal learning rate
# res = trainer.lr_find(
#     tft,
#     train_dataloader=train_dataloader,
#     val_dataloaders=val_dataloader,
#     max_lr=10.0,