def load_memory(self): """ AUTHORS: -------- :author: Alix Leroy DESCRIPTION: ------------ Load the memory PARAMETERS: ----------- None RETURN: ------- :return: None """ if self.losses is not None and self.metrics is not None: overwatch_metric = OverWatchMetric(**self.config.training.overwatch.get()) # The hippocampus (brain/memory/hippocampus) temporary handles the saver and the history history_directory = "/".join( (get_main_path(), self.config.project.session, "history") ) weights_directory = "/".join( (get_main_path(), self.config.project.session, "weights") ) self.hippocampus = Hippocampus( losses=self.losses, metrics=self.metrics, model_name=self.config.model.name, verbose=self.config.history.verbose, memorize=self.config.history.memorize, history_directory=history_directory, overwatch_metric=overwatch_metric, **self.config.training.saver.get(), save_model_directory=weights_directory )
def __init__( self, # History losses: dict, metrics: dict, model_name: str = generate_random_alphanumeric(size=10), verbose: Flag = DEEP_VERBOSE_BATCH, memorize: Flag = DEEP_MEMORIZE_BATCHES, history_directory: str = "history", overwatch_metric: OverWatchMetric = OverWatchMetric( name=TOTAL_LOSS, condition=DEEP_SAVE_CONDITION_LESS), # Saver save_signal: Flag = DEEP_SAVE_SIGNAL_AUTO, method: Flag = DEEP_SAVE_FORMAT_PYTORCH, overwrite: bool = False, save_model_directory: str = "weights"): save_signal = get_corresponding_flag(DEEP_LIST_SAVE_SIGNAL, info=save_signal, default=DEEP_SAVE_SIGNAL_AUTO) # # HISTORY # self.__initialize_history(name=model_name, metrics=metrics, losses=losses, log_dir=history_directory, verbose=verbose, memorize=memorize, save_signal=save_signal, overwatch_metric=overwatch_metric) # # SAVER # self.__initialize_saver(name=model_name, save_directory=save_model_directory, save_signal=save_signal, method=method, overwrite=overwrite)
def load_memory(self): """ AUTHORS: -------- :author: Alix Leroy DESCRIPTION: ------------ Load the memory PARAMETERS: ----------- None RETURN: ------- :return: None """ if self.losses is not None and self.metrics is not None: overwatch_metric = OverWatchMetric( name=self.config.training.overwatch_metric, condition=self.config.training.overwatch_condition) self.hippocampus = Hippocampus( losses=self.losses, metrics=self.metrics, model_name=self.config.model.name, verbose=self.config.history.verbose, memorize=self.config.history.memorize, history_directory=DEEP_PATH_HISTORY, overwatch_metric=overwatch_metric, save_model_condition=self.config.training.save_condition, save_model_directory=DEEP_PATH_SAVE_MODEL, save_model_method=self.config.training.save_method)
def __init__( self, # History losses: dict, metrics: dict, model_name: str = generate_random_alphanumeric(size=10), verbose: int = DEEP_VERBOSE_BATCH, memorize: int = DEEP_MEMORIZE_BATCHES, history_directory: str = DEEP_PATH_HISTORY, overwatch_metric: OverWatchMetric = OverWatchMetric( name=TOTAL_LOSS, condition=DEEP_COMPARE_SMALLER), # Saver save_model_condition: int = DEEP_SAVE_CONDITION_AUTO, save_model_method: int = DEEP_SAVE_NET_FORMAT_PYTORCH, save_model_directory: str = DEEP_PATH_SAVE_MODEL, ): # # HISTORY # self.__initialize_history(name=model_name, metrics=metrics, losses=losses, log_dir=history_directory, verbose=verbose, memorize=memorize, overwatch_metric=overwatch_metric) # # SAVER # self.__initialize_saver(name=model_name, save_directory=save_model_directory, save_condition=save_model_condition, save_method=save_model_method)
def __init__(self, metrics: dict, losses: dict, log_dir: str = "history", train_batches_filename: str = "history_batches_training.csv", train_epochs_filename: str = "history_epochs_training.csv", validation_filename: str = "history_validation.csv", verbose: Flag = DEEP_VERBOSE_BATCH, memorize: Flag = DEEP_MEMORIZE_BATCHES, save_signal: Flag = DEEP_SAVE_SIGNAL_END_EPOCH, overwatch_metric: OverWatchMetric = OverWatchMetric( name=TOTAL_LOSS, condition=DEEP_SAVE_CONDITION_LESS)): self.log_dir = log_dir self.verbose = verbose self.metrics = metrics self.losses = losses self.memorize = get_corresponding_flag( [DEEP_MEMORIZE_BATCHES, DEEP_MEMORIZE_EPOCHS], info=memorize) self.save_signal = save_signal self.overwatch_metric = overwatch_metric # Running metrics self.running_total_loss = 0 self.running_losses = {} self.running_metrics = {} self.train_batches_history = multiprocessing.Manager().Queue() self.train_epochs_history = multiprocessing.Manager().Queue() self.validation_history = multiprocessing.Manager().Queue() # Add headers to history files train_batches_headers = ",".join( [WALL_TIME, RELATIVE_TIME, EPOCH, BATCH, TOTAL_LOSS] + list(vars(losses).keys()) + list(vars(metrics).keys())) train_epochs_headers = ",".join( [WALL_TIME, RELATIVE_TIME, EPOCH, TOTAL_LOSS] + list(vars(losses).keys()) + list(vars(metrics).keys())) validation_headers = ",".join( [WALL_TIME, RELATIVE_TIME, EPOCH, TOTAL_LOSS] + list(vars(losses).keys()) + list(vars(metrics).keys())) # Create the history files self.__add_logs("history_train_batches", log_dir, ".csv", train_batches_headers) self.__add_logs("history_train_epochs", log_dir, ".csv", train_epochs_headers) self.__add_logs("history_validation", log_dir, ".csv", validation_headers) self.start_time = 0 self.paused = False # Filepaths self.log_dir = log_dir self.train_batches_filename = train_batches_filename self.train_epochs_filename = train_epochs_filename self.validation_filename = validation_filename # Load histories self.__load_histories() # Connect to signals Thalamus().connect(receiver=self.on_batch_end, event=DEEP_EVENT_ON_BATCH_END, expected_arguments=[ "minibatch_index", "num_minibatches", "epoch_index", "total_loss", "result_losses", "result_metrics" ]) Thalamus().connect(receiver=self.on_epoch_end, event=DEEP_EVENT_ON_EPOCH_END, expected_arguments=[ "epoch_index", "num_epochs", "num_minibatches", "total_validation_loss", "result_validation_losses", "result_validation_metrics", "num_minibatches_validation" ]) Thalamus().connect(receiver=self.on_train_begin, event=DEEP_EVENT_ON_TRAINING_START, expected_arguments=[]) Thalamus().connect(receiver=self.on_train_end, event=DEEP_EVENT_ON_TRAINING_END, expected_arguments=[]) Thalamus().connect(receiver=self.on_epoch_start, event=DEEP_EVENT_ON_EPOCH_START, expected_arguments=["epoch_index", "num_epochs"]) Thalamus().connect(receiver=self.send_training_loss, event=DEEP_EVENT_REQUEST_TRAINING_LOSS, expected_arguments=[])
def on_overwatch_metric_computed( self, current_overwatch_metric: OverWatchMetric): """ AUTHORS: -------- :author: Alix Leroy :author: Samuel Westlake DESCRIPTION: ------------ Check if saving the model is required PARAMETERS: ----------- :param current_overwatch_metric: float: The value of the metric to over watch RETURN: ------- :return -> bool: Whether the model should be saved or not """ # Save if there is no metric to compare against if self.best_overwatch_metric is None: self.best_overwatch_metric = current_overwatch_metric save = True else: # If the new metric has to be smaller than the best one if DEEP_SAVE_CONDITION_LESS.corresponds( current_overwatch_metric.get_condition()): # If the model improved since last batch => Save if self.best_overwatch_metric.get_value( ) > current_overwatch_metric.get_value(): Notification( DEEP_NOTIF_SUCCESS, DEEP_MSG_SAVER_IMPROVED % (current_overwatch_metric.name, "%.4e" % Decimal(self.best_overwatch_metric.get_value() - current_overwatch_metric.get_value()))) self.best_overwatch_metric = current_overwatch_metric save = True # No improvement => Return False else: Notification( DEEP_NOTIF_INFO, DEEP_MSG_SAVER_NOT_IMPROVED % current_overwatch_metric.name) save = False # If the new metric has to be bigger than the best one (e.g. The accuracy of a classification) elif DEEP_SAVE_CONDITION_GREATER.corresponds( current_overwatch_metric.get_condition()): # If the model improved since last batch => Save if self.best_overwatch_metric.get_value( ) < current_overwatch_metric.get_value(): Notification( DEEP_NOTIF_SUCCESS, DEEP_MSG_SAVER_IMPROVED % (current_overwatch_metric.name, "%.4e" % Decimal(current_overwatch_metric.get_value() - self.best_overwatch_metric.get_value()))) self.best_overwatch_metric = current_overwatch_metric save = True # No improvement => Return False else: Notification( DEEP_NOTIF_INFO, DEEP_MSG_SAVER_NOT_IMPROVED % current_overwatch_metric.name) save = False else: Notification( DEEP_NOTIF_FATAL, "The following saving condition does not exist : %s" % current_overwatch_metric.get_condition()) save = False if save is True: self.save_model()
def is_saving_required(self, current_overwatch_metric: OverWatchMetric) -> bool: """ AUTHORS: -------- :author: Alix Leroy DESCRIPTION: ------------ Check if saving the model is required PARAMETERS: ----------- :param current_overwatch_metric_value->float: The value of the metric to over watch RETURN: ------- :return->bool: Whether the model should be saved or not """ save = False # Do not save at the first epoch if self.best_overwatch_metric is None: self.best_overwatch_metric = current_overwatch_metric save = False # If the new metric has to be smaller than the best one if current_overwatch_metric.get_condition() == DEEP_COMPARE_SMALLER: # If the model improved since last batch => Save if self.best_overwatch_metric.get_value( ) > current_overwatch_metric.get_value(): self.best_overwatch_metric = current_overwatch_metric save = True # No improvement => Return False else: save = False # If the new metric has to be bigger than the best one (e.g. The accuracy of a classification) elif current_overwatch_metric.get_condition() == DEEP_COMPARE_BIGGER: # If the model improved since last batch => Save if self.best_overwatch_metric.get_value( ) < current_overwatch_metric.get_value(): self.best_overwatch_metric = current_overwatch_metric save = True # No improvement => Return False else: save = False else: Notification( DEEP_NOTIF_FATAL, "The following saving condition does not exist : " + str("test")) Thalamus().add_signal(signal=Signal(event=DEEP_EVENT_SAVING_REQUIRED, args={"saving_required": save}))
def __init__( self, metrics: dict, losses: dict, log_dir: str = DEEP_PATH_HISTORY, train_batches_filename: str = "history_batches_training.csv", train_epochs_filename: str = "history_epochs_training.csv", validation_filename: str = "history_validation.csv", verbose: int = DEEP_VERBOSE_BATCH, memorize: int = DEEP_MEMORIZE_BATCHES, save_condition: int = DEEP_SAVE_CONDITION_END_EPOCH, # DEEP_SAVE_CONDITION_END_TRAINING to save at the end of training, DEEP_SAVE_CONDITION_END_EPOCH to save at the end of the epoch, overwatch_metric: OverWatchMetric = OverWatchMetric( name=TOTAL_LOSS, condition=DEEP_COMPARE_SMALLER), ): self.log_dir = log_dir self.verbose = verbose self.metrics = metrics self.losses = losses self.memorize = memorize self.save_condition = save_condition self.overwatch_metric = overwatch_metric # Running metrics self.running_total_loss = 0 self.running_losses = {} self.running_metrics = {} self.train_batches_history = multiprocessing.Manager().Queue() self.train_epochs_history = multiprocessing.Manager().Queue() self.validation_history = multiprocessing.Manager().Queue() # Add headers to history files train_batches_headers = ",".join( [WALL_TIME, RELATIVE_TIME, EPOCH, BATCH, TOTAL_LOSS] + list(losses.keys()) + list(metrics.keys())) train_epochs_headers = ",".join( [WALL_TIME, RELATIVE_TIME, EPOCH, TOTAL_LOSS] + list(losses.keys()) + list(metrics.keys())) validation_headers = ",".join( [WALL_TIME, RELATIVE_TIME, EPOCH, TOTAL_LOSS] + list(losses.keys()) + list(metrics.keys())) self.__add_logs("history_train_batches", log_dir, ".csv", train_batches_headers) self.__add_logs("history_train_epochs", log_dir, ".csv", train_epochs_headers) self.__add_logs("history_validation", log_dir, ".csv", validation_headers) self.start_time = 0 self.paused = False # Filepaths self.log_dir = log_dir self.train_batches_filename = train_batches_filename self.train_epochs_filename = train_epochs_filename self.validation_filename = validation_filename # Load histories self.__load_histories() # Connect to signals Thalamus().connect(receiver=self.on_batch_end, event=DEEP_EVENT_ON_BATCH_END) Thalamus().connect(receiver=self.on_epoch_end, event=DEEP_EVENT_ON_EPOCH_END, expected_arguments=[ "epoch_index", "num_epochs", "num_minibatches", "total_validation_loss", "result_validation_losses", "result_validation_metrics", "num_minibatches_validation" ]) Thalamus().connect(receiver=self.on_train_begin, event=DEEP_EVENT_ON_TRAINING_START, expected_arguments=[]) Thalamus().connect(receiver=self.on_train_end, event=DEEP_EVENT_ON_TRAINING_END, expected_arguments=[]) Thalamus().connect(receiver=self.on_epoch_start, event=DEEP_EVENT_ON_EPOCH_START, expected_arguments=["epoch_index", "num_epochs"])