Example #1
0
    def training(self,
                 manager,
                 nb_epochs,
                 checkpointdir=None,
                 fold_index=None,
                 scheduler=None,
                 with_validation=True,
                 save_after_epochs=1,
                 add_labels=False):
        """ Train the model.

        Parameters
        ----------
        manager: a pynet DataManager
            a manager containing the train and validation data.
        nb_epochs: int, default 100
            the number of epochs.
        checkpointdir: str, default None
            a destination folder where intermediate models/historues will be
            saved.
        fold_index: int, default None
            the index of the fold to use for the training, default use all the
            available folds.
        scheduler: torch.optim.lr_scheduler, default None
            a scheduler used to reduce the learning rate.
        with_validation: bool, default True
            if set use the validation dataset.
        save_after_epochs: int, default 1
            determines when the model is saved and represents the number of
            epochs before saving.

        Returns
        -------
        train_history, valid_history: History
            the train/validation history.
        """
        if self.resume and "scheduler" in self.checkpoint:
            scheduler.load_state_dict(self.checkpoint["scheduler"])
        if checkpointdir is not None and not os.path.isdir(checkpointdir):
            os.mkdir(checkpointdir)
        train_history = History(name="train")
        if with_validation is not None:
            valid_history = History(name="validation")
        else:
            valid_history = None
        logger.info("Loss function {0}.".format(self.loss))
        logger.info("Optimizer function {0}.".format(self.optimizer))
        folds = range(manager.number_of_folds)
        if fold_index is not None:
            folds = [fold_index]
        for fold in folds:
            logger.debug("Running fold {0}...".format(fold))
            reset_weights(self.model, self.checkpoint)
            loaders = manager.get_dataloader(train=True,
                                             validation=with_validation,
                                             fold_index=fold)
            for epoch in range(nb_epochs):
                logger.debug("Running epoch {0}:".format(fold))
                logger.debug("  notify observers with signal 'before_epoch'.")
                self.notify_observers("before_epoch", epoch=epoch, fold=fold)
                observers_kwargs = {}
                logger.debug("  train.")
                loss, values = self.train(loaders.train)
                observers_kwargs["loss"] = loss
                observers_kwargs.update(values)
                if scheduler is not None:
                    logger.debug("  update scheduler.")
                    scheduler.step(loss)
                logger.debug("  update train history.")
                train_history.log((fold, epoch), loss=loss, **values)
                train_history.summary()
                if (checkpointdir is not None
                        and epoch % save_after_epochs == 0):
                    logger.debug("  create checkpoint.")
                    checkpoint(model=self.model,
                               epoch=epoch,
                               fold=fold,
                               outdir=checkpointdir,
                               optimizer=self.optimizer,
                               scheduler=scheduler)
                    train_history.save(outdir=checkpointdir,
                                       epoch=epoch,
                                       fold=fold)
                if with_validation:
                    logger.debug("  validation.")
                    y_pred, loss, values = self.test(loaders.validation)
                    observers_kwargs["val_loss"] = loss
                    observers_kwargs.update(
                        dict(("val_{0}".format(key), val)
                             for key, val in values.items()))
                    observers_kwargs["val_pred"] = y_pred
                    logger.debug("  update validation history.")
                    valid_history.log((fold, epoch), loss=loss, **values)
                    valid_history.summary()
                    if (checkpointdir is not None
                            and epoch % save_after_epochs == 0):
                        logger.debug("  create checkpoint.")
                        valid_history.save(outdir=checkpointdir,
                                           epoch=epoch,
                                           fold=fold)
                logger.debug("  notify observers with signal 'after_epoch'.")
                self.notify_observers("after_epoch",
                                      epoch=epoch,
                                      fold=fold,
                                      **observers_kwargs)
                logger.debug("End epoch.".format(fold))
            logger.debug("End fold.")
        return train_history, valid_history
Example #2
0
    def training(self, manager: AbstractDataManager, nb_epochs: int, checkpointdir=None,
                 fold_index=None, epoch_index=None,
                 scheduler=None, with_validation=True, with_visualization=False,
                 nb_epochs_per_saving=1, exp_name=None, standard_optim=True,
                 gpu_time_profiling=False, **kwargs_train):
        """ Train the model.

        Parameters
        ----------
        manager: a pynet DataManager
            a manager containing the train and validation data.
        nb_epochs: int, default 100
            the number of epochs.
        checkpointdir: str, default None
            a destination folder where intermediate models/historues will be
            saved.
        fold_index: int or [int] default None
            the index(es) of the fold(s) to use for the training, default use all the
            available folds.
        epoch_index: int, default None
            the iteration where to start the counting from
        scheduler: torch.optim.lr_scheduler, default None
            a scheduler used to reduce the learning rate.
        with_validation: bool, default True
            if set use the validation dataset.
        with_visualization: bool, default False,
            whether it uses a visualizer that will plot the losses/metrics/images in a WebApp framework
            during the training process
        nb_epochs_per_saving: int, default 1,
            the number of epochs after which the model+optimizer's parameters are saved
        exp_name: str, default None
            the experience name that will be launched
        Returns
        -------
        train_history, valid_history: History
            the train/validation history.
        """

        train_history = History(name="Train_%s"%(exp_name or ""))
        if with_validation is not None:
            valid_history = History(name="Validation_%s"%(exp_name or ""))
        else:
            valid_history = None
        train_visualizer, valid_visualizer = None, None
        if with_visualization:
            train_visualizer = Visualizer(train_history)
            if with_validation:
                valid_visualizer = Visualizer(valid_history, offset_win=10)
        print(self.loss)
        print(self.optimizer)
        folds = range(manager.get_nb_folds())
        if fold_index is not None:
            if isinstance(fold_index, int):
                folds = [fold_index]
            elif isinstance(fold_index, list):
                folds = fold_index
        if epoch_index is None:
            epoch_index = 0
        init_optim_state = deepcopy(self.optimizer.state_dict())
        init_model_state = deepcopy(self.model.state_dict())
        if scheduler is not None:
            init_scheduler_state = deepcopy(scheduler.state_dict())
        for fold in folds:
            # Initialize everything before optimizing on a new fold
            self.optimizer.load_state_dict(init_optim_state)
            self.model.load_state_dict(init_model_state)
            if scheduler is not None:
                scheduler.load_state_dict(init_scheduler_state)
            loader = manager.get_dataloader(
                train=True,
                validation=True,
                fold_index=fold)
            for epoch in range(nb_epochs):
                self.notify_observers("before_epoch", epoch=epoch, fold=fold)
                loss, values = self.train(loader.train, train_visualizer, fold, epoch,
                                          standard_optim=standard_optim,
                                          gpu_time_profiling=gpu_time_profiling, **kwargs_train)

                train_history.log((fold, epoch+epoch_index), loss=loss, **values)
                train_history.summary()
                if scheduler is not None:
                    scheduler.step()
                    print('Scheduler lr: {}'.format(scheduler.get_lr()), flush=True)
                    print('Optimizer lr: %f'%self.optimizer.param_groups[0]['lr'], flush=True)
                if checkpointdir is not None and (epoch % nb_epochs_per_saving == 0 or epoch == nb_epochs-1) \
                        and epoch > 0:
                    checkpoint(
                        model=self.model,
                        epoch=epoch+epoch_index,
                        fold=fold,
                        outdir=checkpointdir,
                        name=exp_name,
                        optimizer=self.optimizer)
                    train_history.save(
                        outdir=checkpointdir,
                        epoch=epoch+epoch_index,
                        fold=fold)
                if with_validation:
                    _, _, _, loss, values = self.test(loader.validation,
                                                      standard_optim=standard_optim, **kwargs_train)
                    valid_history.log((fold, epoch+epoch_index), validation_loss=loss, **values)
                    valid_history.summary()
                    if valid_visualizer is not None:
                        valid_visualizer.refresh_current_metrics()
                    if checkpointdir is not None and (epoch % nb_epochs_per_saving == 0 or epoch == nb_epochs-1) \
                            and epoch > 0:
                        valid_history.save(
                            outdir=checkpointdir,
                            epoch=epoch+epoch_index,
                            fold=fold)
                self.notify_observers("after_epoch", epoch=epoch, fold=fold)
        return train_history, valid_history
Example #3
0
    def training(self,
                 manager,
                 nb_epochs,
                 checkpointdir=None,
                 fold_index=None,
                 scheduler=None,
                 with_validation=True):
        """ Train the model.

        Parameters
        ----------
        manager: a pynet DataManager
            a manager containing the train and validation data.
        nb_epochs: int, default 100
            the number of epochs.
        checkpointdir: str, default None
            a destination folder where intermediate models/historues will be
            saved.
        fold_index: int, default None
            the index of the fold to use for the training, default use all the
            available folds.
        scheduler: torch.optim.lr_scheduler, default None
            a scheduler used to reduce the learning rate.
        with_validation: bool, default True
            if set use the validation dataset.

        Returns
        -------
        train_history, valid_history: History
            the train/validation history.
        """
        if checkpointdir is not None and not os.path.isdir(checkpointdir):
            os.mkdir(checkpointdir)
        train_history = History(name="train")
        if with_validation is not None:
            valid_history = History(name="validation")
        else:
            valid_history = None
        print(self.loss)
        print(self.optimizer)
        folds = range(manager.number_of_folds)
        if fold_index is not None:
            folds = [fold_index]
        for fold in folds:
            reset_weights(self.model)
            loaders = manager.get_dataloader(train=True,
                                             validation=True,
                                             fold_index=fold)
            for epoch in range(nb_epochs):
                self.notify_observers("before_epoch", epoch=epoch, fold=fold)
                observers_kwargs = {}
                loss, values = self.train(loaders.train)
                observers_kwargs["loss"] = loss
                observers_kwargs.update(values)
                if scheduler is not None:
                    scheduler.step(loss)
                train_history.log((fold, epoch), loss=loss, **values)
                train_history.summary()
                if checkpointdir is not None:
                    checkpoint(model=self.model,
                               epoch=epoch,
                               fold=fold,
                               outdir=checkpointdir,
                               optimizer=self.optimizer)
                    train_history.save(outdir=checkpointdir,
                                       epoch=epoch,
                                       fold=fold)
                if with_validation:
                    _, loss, values = self.test(loaders.validation)
                    observers_kwargs["val_loss"] = loss
                    observers_kwargs.update(
                        dict(("val_{0}".format(key), val)
                             for key, val in values.items()))
                    valid_history.log((fold, epoch), loss=loss, **values)
                    valid_history.summary()
                    if checkpointdir is not None:
                        valid_history.save(outdir=checkpointdir,
                                           epoch=epoch,
                                           fold=fold)
                self.notify_observers("after_epoch",
                                      epoch=epoch,
                                      fold=fold,
                                      **observers_kwargs)
        return train_history, valid_history