def test_integration(self):
     train_gen = some_data_generator(OptimizerCheckpointTest.batch_size)
     valid_gen = some_data_generator(OptimizerCheckpointTest.batch_size)
     checkpointer = OptimizerCheckpoint(self.checkpoint_filename, period=1)
     self.model.fit_generator(train_gen,
                              valid_gen,
                              epochs=OptimizerCheckpointTest.epochs,
                              steps_per_epoch=5,
                              callbacks=[checkpointer])
Esempio n. 2
0
    def train(self,
              train_loader,
              valid_loader=None,
              *,
              callbacks=[],
              lr_schedulers=[],
              save_every_epoch=False,
              disable_tensorboard=False,
              epochs=1000,
              steps_per_epoch=None,
              validation_steps=None,
              seed=42):
        if seed is not None:
            # Make training deterministic.
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)

        # Copy callback list.
        callbacks = list(callbacks)

        tensorboard_writer = None
        initial_epoch = 1
        if self.logging:
            if not os.path.exists(self.directory):
                os.makedirs(self.directory)

            # Restarting optimization if needed.
            initial_epoch = self._load_epoch_state(lr_schedulers)

            callbacks += [
                CSVLogger(self.log_filename,
                          separator='\t',
                          append=initial_epoch != 1)
            ]

            callbacks += self._init_model_restoring_callbacks(
                initial_epoch, save_every_epoch)
            callbacks += [
                ModelCheckpoint(
                    self.model_checkpoint_filename,
                    verbose=False,
                    temporary_filename=self.model_checkpoint_tmp_filename)
            ]
            callbacks += [
                OptimizerCheckpoint(
                    self.optimizer_checkpoint_filename,
                    verbose=False,
                    temporary_filename=self.optimizer_checkpoint_tmp_filename)
            ]

            # We save the last epoch number after the end of the epoch so that the
            # _load_epoch_state() knows which epoch to restart the optimization.
            callbacks += [
                PeriodicSaveLambda(
                    lambda fd, epoch, logs: print(epoch, file=fd),
                    self.epoch_filename,
                    temporary_filename=self.epoch_tmp_filename,
                    open_mode='w')
            ]

            tensorboard_writer, cb_list = self._init_tensorboard_callbacks(
                disable_tensorboard)
            callbacks += cb_list

        # This method returns callbacks that checkpoints the LR scheduler if logging is enabled.
        # Otherwise, it just returns the list of LR schedulers with a BestModelRestore callback.
        callbacks += self._init_lr_scheduler_callbacks(lr_schedulers)

        try:
            return self.model.fit_generator(train_loader,
                                            valid_loader,
                                            epochs=epochs,
                                            steps_per_epoch=steps_per_epoch,
                                            validation_steps=validation_steps,
                                            initial_epoch=initial_epoch,
                                            callbacks=callbacks)
        finally:
            if tensorboard_writer is not None:
                tensorboard_writer.close()
 def test_checkpoints(self):
     checkpointer = OptimizerCheckpoint(self.checkpoint_filename, period=1)
     self._test_checkpointer(checkpointer)
Esempio n. 4
0
    def train(self,
              train_generator,
              valid_generator=None,
              *,
              callbacks=None,
              lr_schedulers=None,
              save_every_epoch=False,
              disable_tensorboard=False,
              epochs=1000,
              steps_per_epoch=None,
              validation_steps=None,
              batches_per_step=1,
              seed=42):
        # pylint: disable=too-many-locals
        """
        Trains or finetunes the attribute model on a dataset using a generator. If a previous training already occured
        and lasted a total of `n_previous` epochs, then the model's weights will be set to the last checkpoint and the
        training will be resumed for epochs range (`n_previous`, `epochs`].

        If the Experiment has logging enabled (i.e. self.logging is True), numerous callbacks will be automatically
        included. Notably, two :class:`~callbacks.ModelCheckpoint` objects will take care of saving the last and every
        new best (according to monitor mode) model weights in appropriate checkpoint files.
        :class:`~callbacks.OptimizerCheckpoint` and :class:`~callbacks.LRSchedulerCheckpoint` will also respectively
        handle the saving of the optimizer and LR scheduler's respective states for future retrieval. Moreover, a
        :class:`~callbacks.AtomicCSVLogger` will save all available epoch statistics in an output .tsv file. Lastly, a
        :class:`~callbacks.TensorBoardLogger` handles automatic TensorBoard logging of various neural network
        statistics.

        Args:
            train_generator: Generator-like object for the training set. See :func:`~Model.fit_generator()`
                for details on the types of generators supported.
            valid_generator (optional): Generator-like object for the validation set. See
                :func:`~Model.fit_generator()` for details on the types of generators supported.
                (Default value = None)
            callbacks (List[~poutyne.framework.callbacks.Callback]): List of callbacks that will be called during
                training.
                (Default value = None)
            lr_schedulers (List[~poutyne.framework.callbacks.lr_scheduler._PyTorchLRSchedulerWrapper]): List of
                learning rate schedulers.
                (Default value = None)
            save_every_epoch (bool, optional): Whether or not to save the experiment model's weights after
                every epoch.
                (Default value = False)
            disable_tensorboard (bool, optional): Whether or not to disable the automatic tensorboard logging
                callbacks.
                (Default value = False)
            epochs (int): Number of times the entire training dataset is seen.
                (Default value = 1000)
            steps_per_epoch (int, optional): Number of batch used during one epoch. Obviously, using this
                argument may cause one epoch not to see the entire training dataset or see it multiple times.
                (Defaults the number of steps needed to see the entire
                training dataset)
            validation_steps (int, optional): Same as for ``steps_per_epoch`` but for the validation dataset.
                (Defaults to ``steps_per_epoch`` if provided or the number of steps needed to see the entire
                validation dataset)
            batches_per_step (int): Number of batches on which to compute the running loss before
                backpropagating it through the network. Note that the total loss used for backpropagation is
                the mean of the `batches_per_step` batch losses.
                (Default value = 1)
            seed (int, optional): Seed used to make the sampling deterministic.
                (Default value = 42)

        Returns:
            List of dict containing the history of each epoch.
        """
        set_seeds(seed)

        callbacks = [] if callbacks is None else callbacks
        lr_schedulers = [] if lr_schedulers is None else lr_schedulers

        # Copy callback list.
        callbacks = list(callbacks)

        tensorboard_writer = None
        initial_epoch = 1
        if self.logging:
            if not os.path.exists(self.directory):
                os.makedirs(self.directory)

            # Restarting optimization if needed.
            initial_epoch = self._load_epoch_state(lr_schedulers)

            callbacks += [
                AtomicCSVLogger(self.log_filename,
                                separator='\t',
                                append=initial_epoch != 1,
                                temporary_filename=self.log_tmp_filename)
            ]

            callbacks += self._init_model_restoring_callbacks(
                initial_epoch, save_every_epoch)
            callbacks += [
                ModelCheckpoint(
                    self.model_checkpoint_filename,
                    verbose=False,
                    temporary_filename=self.model_checkpoint_tmp_filename)
            ]
            callbacks += [
                OptimizerCheckpoint(
                    self.optimizer_checkpoint_filename,
                    verbose=False,
                    temporary_filename=self.optimizer_checkpoint_tmp_filename)
            ]

            # We save the last epoch number after the end of the epoch so that the
            # _load_epoch_state() knows which epoch to restart the optimization.
            callbacks += [
                PeriodicSaveLambda(
                    lambda fd, epoch, logs: print(epoch, file=fd),
                    self.epoch_filename,
                    temporary_filename=self.epoch_tmp_filename,
                    open_mode='w')
            ]

            tensorboard_writer, cb_list = self._init_tensorboard_callbacks(
                disable_tensorboard)
            callbacks += cb_list

        # This method returns callbacks that checkpoints the LR scheduler if logging is enabled.
        # Otherwise, it just returns the list of LR schedulers with a BestModelRestore callback.
        callbacks += self._init_lr_scheduler_callbacks(lr_schedulers)

        try:
            return self.model.fit_generator(train_generator,
                                            valid_generator,
                                            epochs=epochs,
                                            steps_per_epoch=steps_per_epoch,
                                            validation_steps=validation_steps,
                                            batches_per_step=batches_per_step,
                                            initial_epoch=initial_epoch,
                                            callbacks=callbacks)
        finally:
            if tensorboard_writer is not None:
                tensorboard_writer.close()