def training(self, manager, nb_epochs, checkpointdir=None, fold_index=None, scheduler=None, with_validation=True, save_after_epochs=1, add_labels=False): """ Train the model. Parameters ---------- manager: a pynet DataManager a manager containing the train and validation data. nb_epochs: int, default 100 the number of epochs. checkpointdir: str, default None a destination folder where intermediate models/historues will be saved. fold_index: int, default None the index of the fold to use for the training, default use all the available folds. scheduler: torch.optim.lr_scheduler, default None a scheduler used to reduce the learning rate. with_validation: bool, default True if set use the validation dataset. save_after_epochs: int, default 1 determines when the model is saved and represents the number of epochs before saving. Returns ------- train_history, valid_history: History the train/validation history. """ if self.resume and "scheduler" in self.checkpoint: scheduler.load_state_dict(self.checkpoint["scheduler"]) if checkpointdir is not None and not os.path.isdir(checkpointdir): os.mkdir(checkpointdir) train_history = History(name="train") if with_validation is not None: valid_history = History(name="validation") else: valid_history = None logger.info("Loss function {0}.".format(self.loss)) logger.info("Optimizer function {0}.".format(self.optimizer)) folds = range(manager.number_of_folds) if fold_index is not None: folds = [fold_index] for fold in folds: logger.debug("Running fold {0}...".format(fold)) reset_weights(self.model, self.checkpoint) loaders = manager.get_dataloader(train=True, validation=with_validation, fold_index=fold) for epoch in range(nb_epochs): logger.debug("Running epoch {0}:".format(fold)) logger.debug(" notify observers with signal 'before_epoch'.") self.notify_observers("before_epoch", epoch=epoch, fold=fold) observers_kwargs = {} logger.debug(" train.") loss, values = self.train(loaders.train) observers_kwargs["loss"] = loss observers_kwargs.update(values) if scheduler is not None: logger.debug(" update scheduler.") scheduler.step(loss) logger.debug(" update train history.") train_history.log((fold, epoch), loss=loss, **values) train_history.summary() if (checkpointdir is not None and epoch % save_after_epochs == 0): logger.debug(" create checkpoint.") checkpoint(model=self.model, epoch=epoch, fold=fold, outdir=checkpointdir, optimizer=self.optimizer, scheduler=scheduler) train_history.save(outdir=checkpointdir, epoch=epoch, fold=fold) if with_validation: logger.debug(" validation.") y_pred, loss, values = self.test(loaders.validation) observers_kwargs["val_loss"] = loss observers_kwargs.update( dict(("val_{0}".format(key), val) for key, val in values.items())) observers_kwargs["val_pred"] = y_pred logger.debug(" update validation history.") valid_history.log((fold, epoch), loss=loss, **values) valid_history.summary() if (checkpointdir is not None and epoch % save_after_epochs == 0): logger.debug(" create checkpoint.") valid_history.save(outdir=checkpointdir, epoch=epoch, fold=fold) logger.debug(" notify observers with signal 'after_epoch'.") self.notify_observers("after_epoch", epoch=epoch, fold=fold, **observers_kwargs) logger.debug("End epoch.".format(fold)) logger.debug("End fold.") return train_history, valid_history
def training(self, manager: AbstractDataManager, nb_epochs: int, checkpointdir=None, fold_index=None, epoch_index=None, scheduler=None, with_validation=True, with_visualization=False, nb_epochs_per_saving=1, exp_name=None, standard_optim=True, gpu_time_profiling=False, **kwargs_train): """ Train the model. Parameters ---------- manager: a pynet DataManager a manager containing the train and validation data. nb_epochs: int, default 100 the number of epochs. checkpointdir: str, default None a destination folder where intermediate models/historues will be saved. fold_index: int or [int] default None the index(es) of the fold(s) to use for the training, default use all the available folds. epoch_index: int, default None the iteration where to start the counting from scheduler: torch.optim.lr_scheduler, default None a scheduler used to reduce the learning rate. with_validation: bool, default True if set use the validation dataset. with_visualization: bool, default False, whether it uses a visualizer that will plot the losses/metrics/images in a WebApp framework during the training process nb_epochs_per_saving: int, default 1, the number of epochs after which the model+optimizer's parameters are saved exp_name: str, default None the experience name that will be launched Returns ------- train_history, valid_history: History the train/validation history. """ train_history = History(name="Train_%s"%(exp_name or "")) if with_validation is not None: valid_history = History(name="Validation_%s"%(exp_name or "")) else: valid_history = None train_visualizer, valid_visualizer = None, None if with_visualization: train_visualizer = Visualizer(train_history) if with_validation: valid_visualizer = Visualizer(valid_history, offset_win=10) print(self.loss) print(self.optimizer) folds = range(manager.get_nb_folds()) if fold_index is not None: if isinstance(fold_index, int): folds = [fold_index] elif isinstance(fold_index, list): folds = fold_index if epoch_index is None: epoch_index = 0 init_optim_state = deepcopy(self.optimizer.state_dict()) init_model_state = deepcopy(self.model.state_dict()) if scheduler is not None: init_scheduler_state = deepcopy(scheduler.state_dict()) for fold in folds: # Initialize everything before optimizing on a new fold self.optimizer.load_state_dict(init_optim_state) self.model.load_state_dict(init_model_state) if scheduler is not None: scheduler.load_state_dict(init_scheduler_state) loader = manager.get_dataloader( train=True, validation=True, fold_index=fold) for epoch in range(nb_epochs): self.notify_observers("before_epoch", epoch=epoch, fold=fold) loss, values = self.train(loader.train, train_visualizer, fold, epoch, standard_optim=standard_optim, gpu_time_profiling=gpu_time_profiling, **kwargs_train) train_history.log((fold, epoch+epoch_index), loss=loss, **values) train_history.summary() if scheduler is not None: scheduler.step() print('Scheduler lr: {}'.format(scheduler.get_lr()), flush=True) print('Optimizer lr: %f'%self.optimizer.param_groups[0]['lr'], flush=True) if checkpointdir is not None and (epoch % nb_epochs_per_saving == 0 or epoch == nb_epochs-1) \ and epoch > 0: checkpoint( model=self.model, epoch=epoch+epoch_index, fold=fold, outdir=checkpointdir, name=exp_name, optimizer=self.optimizer) train_history.save( outdir=checkpointdir, epoch=epoch+epoch_index, fold=fold) if with_validation: _, _, _, loss, values = self.test(loader.validation, standard_optim=standard_optim, **kwargs_train) valid_history.log((fold, epoch+epoch_index), validation_loss=loss, **values) valid_history.summary() if valid_visualizer is not None: valid_visualizer.refresh_current_metrics() if checkpointdir is not None and (epoch % nb_epochs_per_saving == 0 or epoch == nb_epochs-1) \ and epoch > 0: valid_history.save( outdir=checkpointdir, epoch=epoch+epoch_index, fold=fold) self.notify_observers("after_epoch", epoch=epoch, fold=fold) return train_history, valid_history
def training(self, manager, nb_epochs, checkpointdir=None, fold_index=None, scheduler=None, with_validation=True): """ Train the model. Parameters ---------- manager: a pynet DataManager a manager containing the train and validation data. nb_epochs: int, default 100 the number of epochs. checkpointdir: str, default None a destination folder where intermediate models/historues will be saved. fold_index: int, default None the index of the fold to use for the training, default use all the available folds. scheduler: torch.optim.lr_scheduler, default None a scheduler used to reduce the learning rate. with_validation: bool, default True if set use the validation dataset. Returns ------- train_history, valid_history: History the train/validation history. """ if checkpointdir is not None and not os.path.isdir(checkpointdir): os.mkdir(checkpointdir) train_history = History(name="train") if with_validation is not None: valid_history = History(name="validation") else: valid_history = None print(self.loss) print(self.optimizer) folds = range(manager.number_of_folds) if fold_index is not None: folds = [fold_index] for fold in folds: reset_weights(self.model) loaders = manager.get_dataloader(train=True, validation=True, fold_index=fold) for epoch in range(nb_epochs): self.notify_observers("before_epoch", epoch=epoch, fold=fold) observers_kwargs = {} loss, values = self.train(loaders.train) observers_kwargs["loss"] = loss observers_kwargs.update(values) if scheduler is not None: scheduler.step(loss) train_history.log((fold, epoch), loss=loss, **values) train_history.summary() if checkpointdir is not None: checkpoint(model=self.model, epoch=epoch, fold=fold, outdir=checkpointdir, optimizer=self.optimizer) train_history.save(outdir=checkpointdir, epoch=epoch, fold=fold) if with_validation: _, loss, values = self.test(loaders.validation) observers_kwargs["val_loss"] = loss observers_kwargs.update( dict(("val_{0}".format(key), val) for key, val in values.items())) valid_history.log((fold, epoch), loss=loss, **values) valid_history.summary() if checkpointdir is not None: valid_history.save(outdir=checkpointdir, epoch=epoch, fold=fold) self.notify_observers("after_epoch", epoch=epoch, fold=fold, **observers_kwargs) return train_history, valid_history