Example #1
0
    def send(self, signal: Signal) -> None:
        """
        AUTHORS:
        --------

        :author: Alix Leroy

        DESCRIPTION:
        ------------

        Send the signal to the appropriate receivers

        PARAMETERS:
        -----------

        :param signal: (Signal): A signal to send

        RETURN:
        -------

        :return: None
        """
        # Get event and arguments from the signal to send
        event = signal.get_event()
        args = signal.get_arguments()

        # If the event in the list broadcast the signal
        if event.get_index() in self.connections:
            for connection in self.connections[event.get_index()]:
                receiver = connection.get_receiver()
                expected_arguments = connection.get_expected_arguments()

                # If only some specific keys have to be kept
                if expected_arguments is not None:
                    args = self.keep_arguments(
                        receiver=receiver,
                        expected_arguments=expected_arguments,
                        arguments=args)
                receiver()(
                    **args
                )  # Need twice the brackets because of the weak method reference

        # Else display an error notification
        else:
            Notification(
                DEEP_NOTIF_ERROR,
                "The following event '%s' is not connected to any receiver." %
                str(event.get_description()))
Example #2
0
 def send_epoch_end_signal(self, **kwargs):
     kwargs["epoch_index"] = self.epoch
     kwargs["loss"] = self.train_loss
     kwargs["losses"] = self.train_losses
     kwargs["metrics"] = self.train_metrics
     Thalamus().add_signal(
         signal=Signal(event=DEEP_EVENT_EPOCH_END, args=kwargs))
Example #3
0
    def send_save_params(self, inp=None) -> None:
        """
        AUTHORS:
        --------

        :author: Samuel Westlake
        :author: Alix Leroy

        DESCRIPTION:
        ------------

        Send the saving parameters to the Saver

        PARAMETERS:
        -----------

        :param inp: The input size of the model (required for ONNX models)

        RETURN:
        -------

        :return: None
        """
        Thalamus().add_signal(
            Signal(event=DEEP_EVENT_SEND_SAVE_PARAMS_FROM_TRAINER,
                   args={
                       "model": self.model,
                       "optimizer": self.optimizer,
                       "epoch_index": self.epoch,
                       "validation_loss": self.validation_loss,
                       "inp": inp
                   }))
Example #4
0
    def saving_required(self, saving_required: bool):
        """
        AUTHORS:
        --------

        :author: Alix Leroy

        DESCRIPTION:
        ------------

        Signal to send the model to be saved if require
        NB : Contains a signal, cannot be static

        PARAMETERS:
        -----------

        :param saving_required: (bool): Whether saving the model is required or not

        RETURN:
        -------

        None
        """
        if saving_required is True:
            Thalamus().add_signal(
                signal=Signal(event=DEEP_EVENT_SAVE_MODEL, args={}))
Example #5
0
 def send_training_loss(self):
     Thalamus().add_signal(
         Signal(event=DEEP_EVENT_SEND_TRAINING_LOSS,
                args={
                    DEEP_LOG_VALIDATION.var_name:
                    self._loss_data[DEEP_LOG_VALIDATION.var_name]
                }))
Example #6
0
 def save_model(self):
     Thalamus().add_signal(
         signal=Signal(
             event=DEEP_EVENT_SAVE_MODEL,
             args={}
         )
     )
Example #7
0
    def saving_required(self, saving_required: bool):
        """

        :param saving_required:
        :return:
        """

        if saving_required is True:
            Thalamus().add_signal(signal= Signal(event=DEEP_EVENT_SAVE_MODEL, args={"model": self.model}))
Example #8
0
    def __compute_overwatch_metric(self, num_minibatches_training,
                                   running_total_loss, running_losses,
                                   running_metrics, total_validation_loss,
                                   result_validation_losses,
                                   result_validation_metrics) -> None:

        # If the validation loss is None (No validation) we take the metric from the training as overwatch metric
        if total_validation_loss is None:
            data = dict([(TOTAL_LOSS,
                          running_total_loss / num_minibatches_training)] +
                        [(loss_name, value.item() / num_minibatches_training)
                         for (loss_name, value) in running_losses.items()] +
                        [(metric_name, value / num_minibatches_training)
                         for (metric_name, value) in running_metrics.items()])

            for key, value in data.items():
                if key == self.overwatch_metric.get_name():
                    self.overwatch_metric.set_value(value)
                    break
        else:
            data = dict([(TOTAL_LOSS, total_validation_loss)] +
                        [(loss_name, value.item())
                         for (loss_name,
                              value) in result_validation_losses.items()] +
                        [(metric_name, value / num_minibatches_training)
                         for (metric_name,
                              value) in result_validation_metrics.items()])

            for key, value in data.items():
                if key == self.overwatch_metric.get_name():
                    self.overwatch_metric.set_value(value)
                    break

        Thalamus().add_signal(
            Signal(event=DEEP_EVENT_OVERWATCH_METRIC_COMPUTED,
                   args={
                       "current_overwatch_metric":
                       copy.deepcopy(self.overwatch_metric)
                   }))
Example #9
0
 def send_validation_end_signal(**kwargs):
     Thalamus().add_signal(
         signal=Signal(event=DEEP_EVENT_VALIDATION_END, args=kwargs))
Example #10
0
 def send_training_end_signal(**kwargs):
     Thalamus().add_signal(
         signal=Signal(event=DEEP_EVENT_TRAINING_END, args=kwargs))
Example #11
0
 def send_batch_end_signal(**kwargs):
     Thalamus().add_signal(
         signal=Signal(event=DEEP_EVENT_BATCH_END, args=kwargs))
Example #12
0
 def send_batch_start_signal(**kwargs):
     Thalamus().add_signal(
         signal=Signal(event=DEEP_EVENT_BATCH_START, args=kwargs))
Example #13
0
    def __compute_overwatch_metric(self, num_minibatches_training,
                                   running_total_loss, running_losses,
                                   running_metrics, total_validation_loss,
                                   result_validation_losses,
                                   result_validation_metrics) -> None:
        """
        :author: Alix Leroy

        DESCRIPTION:
        ------------

        Compute the overwatch metric and send it to the saver

        PARAMETERS:
        -----------

        :param num_minibatches_training:
        :param running_total_loss:
        :param running_losses:
        :param running_metrics:
        :param total_validation_loss:
        :param result_validation_losses:
        :param result_validation_metrics:


        RETURN:
        -------

        :return:
        """

        # If the validation loss is None (No validation) we take the metric from the training as overwatch metric
        if total_validation_loss is None:
            data = dict([(TOTAL_LOSS,
                          running_total_loss / num_minibatches_training)] +
                        [(loss_name, value.item() / num_minibatches_training)
                         for (loss_name, value) in running_losses.items()] +
                        [(metric_name, value / num_minibatches_training)
                         for (metric_name, value) in running_metrics.items()])

            for key, value in data.items():
                if key == self.overwatch_metric.get_name():
                    self.overwatch_metric.set_value(value)
                    break
        else:
            data = dict([(TOTAL_LOSS, total_validation_loss)] +
                        [(loss_name, value.item())
                         for (loss_name,
                              value) in result_validation_losses.items()] +
                        [(metric_name, value / num_minibatches_training)
                         for (metric_name,
                              value) in result_validation_metrics.items()])

            for key, value in data.items():
                if key == self.overwatch_metric.get_name():
                    self.overwatch_metric.set_value(value)
                    break

        Thalamus().add_signal(
            Signal(event=DEEP_EVENT_OVERWATCH_METRIC_COMPUTED,
                   args={
                       "current_overwatch_metric":
                       copy.deepcopy(self.overwatch_metric)
                   }))
Example #14
0
    def __train(self, first_training: bool = True) -> None:
        """
        AUTHORS:
        --------

        :author: Alix Leroy

        DESCRIPTION:
        ------------

        Loop over the dataset to train the network

        PARAMETERS:
        -----------

        :param first_training (bool): Whether more epochs have been required after initial training or not

        RETURN:
        -------

        :return: None
        """
        if first_training is True:
            Thalamus().add_signal(
                signal=Signal(event=DEEP_EVENT_ON_TRAINING_START, args={}))

        for self.epoch in range(self.initial_epoch + 1, self.num_epochs + 1):

            Thalamus().add_signal(
                signal=Signal(event=DEEP_EVENT_ON_EPOCH_START,
                              args={
                                  "epoch_index": self.epoch,
                                  "num_epochs": self.num_epochs
                              }))

            # Shuffle the data if required
            if self.shuffle_method is not None:
                self.dataset.shuffle(self.shuffle_method)

            # Put model into train mode for the start of the epoch
            self.model.train()

            for minibatch_index, minibatch in enumerate(self.dataloader, 0):

                # Clean the given data
                inputs, labels, additional_data = self.clean_single_element_list(
                    minibatch)

                # zero the parameter gradients
                self.optimizer.zero_grad()

                # Set the data to the corresponding device
                inputs = self.to_device(inputs, self.model.device)
                labels = self.to_device(labels, self.model.device)
                additional_data = self.to_device(additional_data,
                                                 self.model.device)

                # Infer the output of the batch
                try:
                    outputs = self.model(*inputs)
                except RuntimeError as e:
                    Notification(DEEP_NOTIF_FATAL,
                                 "RuntimeError : %s" % str(e))
                except TypeError as e:
                    Notification(DEEP_NOTIF_FATAL, "TypeError : %s" % str(e))

                # Compute losses and metrics
                result_losses = self.compute_metrics(self.losses, inputs,
                                                     outputs, labels,
                                                     additional_data)
                result_metrics = self.compute_metrics(self.metrics, inputs,
                                                      outputs, labels,
                                                      additional_data)

                # Add weights to losses
                result_losses = dict_utils.apply_weight(
                    result_losses, vars(self.losses))

                # Sum all the result of the losses
                total_loss = sum_dict(result_losses)

                # Accumulates the gradient (by addition) for each parameter
                total_loss.backward()

                # Performs a parameter update based on the current gradient (stored in .grad attribute of a parameter)
                # and the update rule
                self.optimizer.step()

                # Detach the tensors from the network
                outputs, total_loss, result_losses, result_metrics = self.detach(
                    outputs=outputs,
                    total_loss=total_loss,
                    result_losses=result_losses,
                    result_metrics=result_metrics)

                # Send signal batch end
                Thalamus().add_signal(
                    Signal(event=DEEP_EVENT_ON_BATCH_END,
                           args={
                               "minibatch_index": minibatch_index + 1,
                               "num_minibatches": self.num_minibatches,
                               "epoch_index": self.epoch,
                               "total_loss": total_loss.item(),
                               "result_losses": result_losses,
                               "result_metrics": result_metrics
                           }))

            # Reset the dataset (transforms cache)
            self.dataset.reset()

            # Evaluate the model
            self.validation_loss, result_validation_losses, result_validation_metrics = self.__evaluate_epoch(
            )

            if self.tester is not None:
                num_minibatches_validation = self.tester.get_num_minibatches()
            else:
                num_minibatches_validation = None

            # Send signal epoch end
            Thalamus().add_signal(
                Signal(event=DEEP_EVENT_ON_EPOCH_END,
                       args={
                           "epoch_index":
                           self.epoch,
                           "num_epochs":
                           self.num_epochs,
                           "model":
                           weakref.ref(self.model),
                           "num_minibatches":
                           self.num_minibatches,
                           "total_validation_loss":
                           self.validation_loss,
                           "result_validation_losses":
                           result_validation_losses,
                           "result_validation_metrics":
                           result_validation_metrics,
                           "num_minibatches_validation":
                           num_minibatches_validation,
                       }))

        # Send signal end training
        Thalamus().add_signal(
            Signal(event=DEEP_EVENT_ON_TRAINING_END,
                   args={"model": self.model}))
Example #15
0
    def is_saving_required(self,
                           current_overwatch_metric: OverWatchMetric) -> bool:
        """
        AUTHORS:
        --------

        :author: Alix Leroy

        DESCRIPTION:
        ------------

        Check if saving the model is required

        PARAMETERS:
        -----------

        :param current_overwatch_metric_value->float: The value of the metric to over watch

        RETURN:
        -------

        :return->bool: Whether the model should be saved or not
        """
        save = False

        # Do not save at the first epoch
        if self.best_overwatch_metric is None:
            self.best_overwatch_metric = current_overwatch_metric
            save = False

        # If  the new metric has to be smaller than the best one
        if current_overwatch_metric.get_condition() == DEEP_COMPARE_SMALLER:
            # If the model improved since last batch => Save
            if self.best_overwatch_metric.get_value(
            ) > current_overwatch_metric.get_value():
                self.best_overwatch_metric = current_overwatch_metric
                save = True

            # No improvement => Return False
            else:
                save = False

        # If the new metric has to be bigger than the best one (e.g. The accuracy of a classification)
        elif current_overwatch_metric.get_condition() == DEEP_COMPARE_BIGGER:
            # If the model improved since last batch => Save
            if self.best_overwatch_metric.get_value(
            ) < current_overwatch_metric.get_value():
                self.best_overwatch_metric = current_overwatch_metric
                save = True

            # No improvement => Return False
            else:
                save = False

        else:
            Notification(
                DEEP_NOTIF_FATAL,
                "The following saving condition does not exist : " +
                str("test"))

        Thalamus().add_signal(signal=Signal(event=DEEP_EVENT_SAVING_REQUIRED,
                                            args={"saving_required": save}))
Example #16
0
    def on_batch_end(self, minibatch_index: int, num_minibatches: int,
                     epoch_index: int, total_loss: int, result_losses: dict,
                     result_metrics: dict):
        """
        AUTHORS:
        --------

        :author: Alix Leroy

        DESCRIPTION:
        ------------

        Called at the end of every batch

        PARAMETERS:
        -----------

        :param minibatch_index: int: Index of the current minibatch
        :param num_minibatches: int: Number of minibatches per epoch
        :param epoch_index: int: Index of the current epoch
        :param total_loss: int: The total loss
        :param result_losses: dict: List of resulting losses
        :param result_metrics: dict: List of resulting metrics

        RETURN:
        -------

        :return: None
        """
        # Save the running metrics
        self.running_total_loss = self.running_total_loss + total_loss
        self.running_losses = merge_sum_dict(self.running_losses,
                                             result_losses)
        self.running_metrics = merge_sum_dict(self.running_metrics,
                                              result_metrics)

        # If the user wants to print stats for each batch
        if DEEP_VERBOSE_BATCH.corresponds(self.verbose):

            # Print training loss and metrics on batch end
            Thalamus().add_signal(
                Signal(event=DEEP_EVENT_PRINT_TRAINING_BATCH_END,
                       args={
                           "losses": result_losses,
                           "total_loss": total_loss,
                           "metrics": result_metrics,
                           "num_minibatches": num_minibatches,
                           "minibatch_index": minibatch_index
                       }))

        # Save the data in memory
        if DEEP_MEMORIZE_BATCHES.corresponds(self.memorize):
            # Save the history in memory
            data = [datetime.datetime.now().strftime(TIME_FORMAT),
                    self.__time(),
                    epoch_index,
                    minibatch_index,
                    total_loss] + \
                    [value.item() for (loss_name, value) in result_losses.items()] + \
                    [value for (metric_name, value) in result_metrics.items()]
            self.train_batches_history.put(data)

        # Save the history after 10 batches
        if self.train_batches_history.qsize() > 10:
            self.save(only_batches=True)
Example #17
0
    def on_epoch_end(self, epoch_index: int, num_epochs: int,
                     num_minibatches: int, total_validation_loss: int,
                     result_validation_losses: dict,
                     result_validation_metrics: dict,
                     num_minibatches_validation: int):
        """
        AUTHORS:
        --------

        :author: Alix Leroy
        :author: Samuel Westlake

        DESCRIPTION:
        ------------

        Method for managing history at the end of each epoch

        PARAMETERS:
        -----------

        :param epoch_index: int: current epoch index
        :param num_epochs: int: total number of epoch
        :param num_minibatches: int: number of minibatches per epoch
        :param total_validation_loss:
        :param result_validation_losses:
        :param result_validation_metrics:
        :param num_minibatches_validation:

        RETURN:
        -------

        :return: None
        """
        # MANAGE TRAINING HISTORY
        if DEEP_VERBOSE_EPOCH.corresponds(
                self.verbose) or DEEP_VERBOSE_BATCH.corresponds(self.verbose):

            # Print the training loss and metrics on epoch end
            Thalamus().add_signal(
                Signal(event=DEEP_EVENT_PRINT_TRAINING_EPOCH_END,
                       args={
                           "losses": {
                               key: value / num_minibatches
                               for key, value in self.running_losses.items()
                           },
                           "total_loss":
                           self.running_total_loss / num_minibatches,
                           "metrics": {
                               key: value / num_minibatches
                               for key, value in self.running_metrics.items()
                           },
                       }))

        # If recording on batch or epoch
        if DEEP_MEMORIZE_BATCHES.corresponds(
                self.memorize) or DEEP_MEMORIZE_EPOCHS.corresponds(
                    self.memorize):
            data = [
                datetime.datetime.now().strftime(TIME_FORMAT),
                self.__time(),
                epoch_index,
                self.running_total_loss / num_minibatches
            ]\
                   + [value.item() / num_minibatches for (loss_name, value) in self.running_losses.items()]\
                   + [value / num_minibatches for (metric_name, value) in self.running_metrics.items()]
            self.train_epochs_history.put(data)

        self.running_total_loss = 0
        self.running_losses = {}
        self.running_metrics = {}

        # MANAGE VALIDATION HISTORY
        if total_validation_loss is not None:
            if DEEP_VERBOSE_EPOCH.corresponds(
                    self.verbose) or DEEP_VERBOSE_BATCH.corresponds(
                        self.verbose):

                # Print the validation loss and metrics on epoch end
                Thalamus().add_signal(
                    Signal(event=DEEP_EVENT_PRINT_VALIDATION_EPOCH_END,
                           args={
                               "losses": result_validation_losses,
                               "total_loss": total_validation_loss,
                               "metrics": result_validation_metrics,
                           }))

            if DEEP_MEMORIZE_BATCHES.corresponds(
                    self.memorize) or DEEP_MEMORIZE_EPOCHS.corresponds(
                        self.memorize):
                data = [
                    datetime.datetime.now().strftime(TIME_FORMAT),
                    self.__time(),
                    epoch_index,
                    total_validation_loss
                ] \
                    + [value.item() for (loss_name, value) in result_validation_losses.items()] \
                    + [value for (metric_name, value) in result_validation_metrics.items()]
                self.validation_history.put(data)

        if DEEP_SAVE_SIGNAL_AUTO.corresponds(self.save_signal):
            self.__compute_overwatch_metric(
                num_minibatches_training=num_minibatches,
                running_total_loss=self.running_total_loss,
                running_losses=self.running_losses,
                running_metrics=self.running_metrics,
                total_validation_loss=total_validation_loss,
                result_validation_losses=result_validation_losses,
                result_validation_metrics=result_validation_metrics)
        elif DEEP_SAVE_SIGNAL_END_EPOCH.corresponds(self.save_signal):
            Thalamus().add_signal(Signal(event=DEEP_EVENT_SAVE_MODEL, args={}))

        Notification(DEEP_NOTIF_SUCCESS, EPOCH_END % (epoch_index, num_epochs))
        self.save()
Example #18
0
    def save_model(self) -> None:
        """
        AUTHORS:
        --------

        :author: Alix Leroy
        :author: Samuel Westlake

        DESCRIPTION:
        ------------

        Save the model

        PARAMETERS:
        -----------

        RETURN:
        -------

        :return: None
        """
        # Set training_loss
        Thalamus().add_signal(
            Signal(event=DEEP_EVENT_REQUEST_TRAINING_LOSS, args=[]))

        # Set model and stuff
        Thalamus().add_signal(
            Signal(event=DEEP_EVENT_REQUEST_SAVE_PARAMS_FROM_TRAINER, args=[]))

        file_path = self.__get_file_path()

        # If we want to save to the pytorch format
        if DEEP_SAVE_FORMAT_PYTORCH.corresponds(self.method):
            # TODO: Finish try except statements here after testing...
            # try:
            torch.save(
                {
                    "model_state_dict": self.model.state_dict(),
                    "epoch": self.epoch_index,
                    "training_loss": self.training_loss,
                    "validation_loss": self.validation_loss,
                    "optimizer_state_dict": self.optimizer.state_dict()
                }, file_path)
            # except:
            #     Notification(DEEP_NOTIF_ERROR, "Error while saving the pytorch model and weights" )
            #     self.__handle_error_saving(model)

        # If we want to save to the ONNX format
        elif DEEP_SAVE_FORMAT_ONNX.corresponds(self.method):
            # TODO: and here. Also fix onnx export function
            Notification(DEEP_NOTIF_FATAL,
                         "Save as onnx format not implemented yet")
            # try:
            # torch.onnx._export(model, inp, file_path,
            #                    export_params=True,
            #                    verbose=True,
            #                    input_names=input_names,
            #                    output_names=output_names)
            # except:
            #     Notification(DEEP_NOTIF_ERROR, "Error while saving the ONNX model and weights" )
            #     self.__handle_error_saving(model)

        Notification(DEEP_NOTIF_SUCCESS, DEEP_MSG_MODEL_SAVED % file_path)
Example #19
0
 def send_training_loss(self):
     Thalamus().add_signal(
         Signal(event=DEEP_EVENT_SEND_TRAINING_LOSS,
                args={"training_loss": self.running_total_loss}))
Example #20
0
    def __train(self, first_training=True)->None:
        """
        AUTHORS:
        --------

        :author: Alix Leroy

        DESCRIPTION:
        ------------

        Loop over the dataset to train the network

        PARAMETERS:
        -----------

        :param first_training->bool: Whether more epochs have been required after initial training or not

        RETURN:
        -------

        :return: None
        """

        if first_training is True:
            Thalamus().add_signal(signal=Signal(event=DEEP_EVENT_ON_TRAINING_START, args={}))
        else:
            self.callbacks.unpause()

        for epoch in range(self.initial_epoch+1, self.num_epochs+1):  # loop over the dataset multiple times

            Thalamus().add_signal(signal=Signal(event=DEEP_EVENT_ON_EPOCH_START, args={"epoch_index": epoch,
                                                                                       "num_epochs": self.num_epochs}))

            for minibatch_index, minibatch in enumerate(self.dataloader, 0):

                # Clean the given data
                inputs, labels, additional_data = self.clean_single_element_list(minibatch)

                # zero the parameter gradients
                self.optimizer.zero_grad()

                # Infer the output of the batch
                outputs = self.model(*inputs)

                # Compute losses and metrics
                result_losses = self.compute_metrics(self.losses, inputs, outputs, labels, additional_data)
                result_metrics = self.compute_metrics(self.metrics, inputs, outputs, labels, additional_data)

                # Add weights to losses
                result_losses = apply_weight(result_losses, self.losses)

                # Sum all the result of the losses
                total_loss = sum_dict(result_losses)

                # Accumulates the gradient (by addition) for each parameter
                total_loss.backward()

                # Performs a parameter update based on the current gradient (stored in .grad attribute of a parameter)
                # and the update rule
                self.optimizer.step()

                outputs, total_loss, result_losses, result_metrics = self.detach(outputs=outputs,
                                                                                 total_loss=total_loss,
                                                                                 result_losses=result_losses,
                                                                                 result_metrics=result_metrics)

                # Send signal batch end
                Thalamus().add_signal(Signal(event= DEEP_EVENT_ON_BATCH_END,
                                             args={"minibatch_index": minibatch_index+1,
                                                   "num_minibatches": self.num_minibatches,
                                                   "epoch_index": epoch,
                                                   "total_loss": total_loss.item(),
                                                   "result_losses": result_losses,
                                                   "result_metrics": result_metrics
                                                   }))

            # Shuffle the data if required
            if self.shuffle is not None:
                self.dataset.shuffle(self.shuffle)

            # Reset the dataset (transforms cache)
            self.dataset.reset()

            # Evaluate the model
            total_validation_loss, result_validation_losses, result_validation_metrics = self.__evaluate_epoch()

            # Send signal epoch end
            Thalamus().add_signal(Signal(event=DEEP_EVENT_ON_EPOCH_END,
                                         args={"epoch_index": epoch,
                                                "num_epochs" : self.num_epochs,
                                                "model" : self.model,
                                                "num_minibatches" : self.num_minibatches,
                                                "total_validation_loss" : total_validation_loss.item(),
                                                "result_validation_losses" : result_validation_losses,
                                                "result_validation_metrics" : result_validation_metrics,
                                                "num_minibatches_validation" : self.tester.get_num_minibatches()
                                               }))


        # Send signal end training
        Thalamus().add_signal(Signal(event=DEEP_EVENT_ON_TRAINING_END,
                                     args={"model" : self.model}))

        # Pause callbacks which compute time
        self.callbacks.pause()