コード例 #1
0
    def init(self):
        """
        Init method, initialize member variables and other program parts.
        :return: successful: (Boolean) Was the execution successful?
        """
        print("Controller: Starting init() ...")
        self.__logger = SLoggerHandler().getLogger(LoggerNames.CONTROLLER_C)
        self.__logger.info("Loading config ...", "Controller:init")
        successful = True

        try:
            self.__config_provider = ConfigProvider()
            self.__config = self.__config_provider.get_config(
                self.__controller_config_file_path)
            self.__logger.info("Finished loading config.", "Controller:init")

            self.__tf_record_handler = TfRecordHandler(
                tfrecord_dir="data",
                dataset_prepreprocessors=self.
                __config["datasetPrePreProcessors"],
                num_threads=self.__config["hardware"]["numCPUCores"])

            self.__logger.info("Finished init()", "Controller:init")
        except:
            successful = False
            self.__logger.error("Canceled init(). An error accrued!",
                                "Controller:init")
            print(traceback.format_exc())

        return successful
コード例 #2
0
 def __init__(self, config):
     """
     Constructor, initialize member variables.
     :param config: (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                     csnnReconstructionExperiment.json as an example.
     """
     self.__config = config
     self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
     self.__num_gpus = ConfigProvider().get_config("controllerConfig.json")["hardware"]["numGPUs"]
コード例 #3
0
class CsnnHypridExperiment(IExperiment):
    """
    The CsnnHypridExperiment trains the cnn and hybrid model presented in Table 2 of our paper: "CSNNs: Unsupervised,
    Backpropagation-Free Convolutional Neural Networks for Representation Learning".

    The experiment trains each model for each given dataset like a CNN.
    If xFoldCrossValidation is set this will be repeated x times.

    :Attributes:
        __config:    (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                      csnnHypridExperiment.json as an example.
        __logger:    (Logger) The logger for the experiment.
        __num_gpus:  (Integer) The number of GPUs to use.
    """
    def __init__(self, config):
        """
        Constructor, initialize member variables.
        :param config: (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                        csnnHybridExperiment.json as an example.
        """
        self.__config = config
        self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
        self.__num_gpus = ConfigProvider().get_config("controllerConfig.json")["hardware"]["numGPUs"]

    def execute(self):
        """
        Executes the experiment with the given config.

        The experiment trains each model for each given dataset like a CNN. If xFoldCrossValidation is set this will be
        repeated x times.
        """
        for hybrid_config in self.__config["hybridConfigs"]:
            hybrid_name = hybrid_config["modelName"]

            try:
                for dataset_config in self.__config["datasetConfigs"]:
                    provider = getDatasetProvider(dataset_config)
                    if not dataset_config["nameOfDataset"] in hybrid_config["batchSizes"].keys():
                        continue
                    for i in range(0, hybrid_config["xFoldCrossValidation"]):
                        model_dir = "/" + hybrid_name + "/" + dataset_config["nameOfDataset"] + "/xFoldCrossVal" + str(i)
                        self.__logger.info("Starting to train: " + model_dir, "CsnnPerformancesExperiment:execute")

                        if hybrid_config["xFoldCrossValidation"] <= 1:
                            xseed = None
                        else:
                            xseed = 42 + i

                        dataset, dataset_generator = prepareDataset(provider, dataset_config, xfold_seed=xseed,
                                                                    augment_data=hybrid_config["augmentData"])

                        trainAndValHybrid(hybrid_config, dataset_generator, dataset, dataset_config, self.__num_gpus,
                                        model_dir+"/Csnn")
                        self.__logger.info("Finished to train: " + model_dir, "CsnnPerformancesExperiment:execute")

            except Exception:
                print(traceback.format_exc())
コード例 #4
0
ファイル: ExperimentScheduler.py プロジェクト: codeaudit/CSNN
 def __init__(self, schedule):
     """
     Constructor, initialize member variables.
     :param schedule: (Dictionary) The schedule containing the experiment order.
     """
     self.__schedule = schedule
     self.__successful_experiments = []
     self.__canceled_experiments = []
     self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
     self.__finished_experiments = 0
コード例 #5
0
ファイル: TfRecordHandler.py プロジェクト: codeaudit/CSNN
 def __init__(self, tfrecord_dir, dataset_prepreprocessors=None, num_threads=0):
     """
     Constructor, initialize member variables.
     :param tfrecord_dir: (String) The path to the record save directory.
     :param dataset_prepreprocessors: (Dictionary) The prepreprocessors to use in the corresponding splits.
                                       None by default.
     :param num_threads: (Integer) The number of threads for writing the tfrecords. 1 by default.
     """
     self.__logger = SLoggerHandler().getLogger(LoggerNames.Output_C)
     self.__tfrecord_dir = tfrecord_dir
     self.__dataset_prepreprocessors = dataset_prepreprocessors
     self.__num_threads = num_threads
コード例 #6
0
    def init(self):
        """
        Init method, initialize member variables and other program parts.
        :return: successful: (Boolean) Was the execution successful?
        """
        print("Controller: Starting init() ...")
        self.__logger = SLoggerHandler().getLogger(LoggerNames.CONTROLLER_C)
        self.__logger.info("Loading config ...", "Controller:init")
        successful = True

        try:
            self.__config_provider = ConfigProvider()
            self.__config = self.__config_provider.get_config(
                self.__controller_config_file_path)
            self.__logger.info("Finished loading config.", "Controller:init")
            self.__logger.info("Finished init()", "Controller:init")
        except:
            successful = False
            self.__logger.error("Canceled init(). An error accrued!",
                                "Controller:init")
            print(traceback.format_exc())

        return successful
コード例 #7
0
    def __init__(self, tfrecord_dir, tfrecord_name, datasset_split, mode):
        """
        Constructor, initialize member variables.
        :param tfrecord_dir: (String) The path to the record save directory.
        :param tfrecord_name: (String) The name of the saved record.
        :param datasset_split: (Array) The splits of the datasets in format [trainNum,evalNum,testNum].
        :param mode: (String) The mode of the saved record.
        :param num_entries: (Integer) The number of entries in the dataset to export.
        """
        self.__logger = SLoggerHandler().getLogger(LoggerNames.Output_C)

        # Variables to see the progress.
        self.__num_written_entries = 0

        # Save path of the records.
        tfrecord_savepath = str(tfrecord_dir) + "/" + str(tfrecord_name) + "/" + str(tfrecord_name) + "_" + \
                            str(datasset_split) + "_" + mode

        # Create record writer for the dataset.
        tf_record_writer_options = tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.NONE)
        self.__tf_record_writer = tf.python_io.TFRecordWriter(
            tfrecord_savepath + ".tfrecords", tf_record_writer_options)
コード例 #8
0
class CsnnReconstructionExperiment(IExperiment):
    """
    The CsnnReconstructionExperiment test the given pre-trained CSNN by reconstructing the images from the encoding. The
    reconstructions are presented in Figure 6 of our paper: "CSNNs: Unsupervised, ackpropagation-Free Convolutional
    Neural Networks for Representation Learning".

    The experiment test each pre trained CSNN model for each given dataset by learning the defined reconstructor on the
    learned representation. If xFoldCrossValidation is set this will be repeated x times.

    :Attributes:
        __config:    (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                      csnnHypridExperiment.json as an example.
        __logger:    (Logger) The logger for the experiment.
        __num_gpus:  (Integer) The number of GPUs to use.
    """
    def __init__(self, config):
        """
        Constructor, initialize member variables.
        :param config: (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                        csnnReconstructionExperiment.json as an example.
        """
        self.__config = config
        self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
        self.__num_gpus = ConfigProvider().get_config("controllerConfig.json")["hardware"]["numGPUs"]

    def execute(self):
        """
        Executes the experiment with the given config.

        The experiment test each pre trained CSNN model for each given dataset by learning the defined reconstructor on
        the learned representation. If xFoldCrossValidation is set this will be repeated x times.
        """
        for csnn_config in self.__config["csnnConfigs"]:
            csnn_name = csnn_config["modelName"]

            try:
                for dataset_config in self.__config["datasetConfigs"]:
                    provider = getDatasetProvider(dataset_config)
                    if not dataset_config["nameOfDataset"] in csnn_config["batchSizes"].keys():
                        continue
                    for i in range(0, csnn_config["xFoldCrossValidation"]):
                        model_dir = "/" + csnn_name + "/" + dataset_config["nameOfDataset"] + "/xFoldCrossVal" + str(i)
                        self.__logger.info("Starting to test: " + model_dir, "CsnnReconstructionExperiment:execute")

                        if csnn_config["xFoldCrossValidation"] <= 1:
                            xseed = None
                        else:
                            xseed = 42 + i

                        dataset, dataset_generator = prepareDataset(provider, dataset_config, xfold_seed=xseed,
                                                                    augment_data=csnn_config["augmentData"])

                        dataset_max_div, _ = prepareDataset(provider, dataset_config, xfold_seed=xseed,
                                                            augment_data=csnn_config["augmentData"],
                                                            normalize_data="maxDiv")

                        self.__logger.info("Starting to create dataset encoding with: " + model_dir,
                                           "CsnnReconstructionExperiment:execute")
                        encoding_provider, encoding = prepareEncoding(csnn_config, dataset_generator, dataset,
                                                            dataset_config, csnn_name, self.__num_gpus,
                                                            model_dir + "/Csnn", zero_mean_unit_variance=
                                                            csnn_config["zeroMeanUnitVarianceEncoding"],
                                                            return_with_encoding=dataset_max_div)
                        self.__logger.info("Finished to create dataset encoding with: " + model_dir,
                                           "CsnnReconstructionExperiment:execute")

                        self.__logger.info("Starting to train reconstructor for: " + model_dir,
                                           "CsnnReconstructionExperiment:execute")
                        try:
                            rec_config = self.__config["reconstructionConfig"]
                            trainAndValReconstructionModel(Cifar10Reconstruction(rec_config), rec_config,
                                                           self.__num_gpus, model_dir + "/" + rec_config["modelName"] +
                                                        "/" + dataset_config["nameOfDataset"], encoding, dataset_config)

                        except Exception:
                            print(traceback.format_exc())
                        self.__logger.info("Finished to train reconstructor for: " + model_dir,
                                           "CsnnReconstructionExperiment:execute")
            except Exception:
                print(traceback.format_exc())
コード例 #9
0
class AModelSuit(metaclass=ABCMeta):
    """
    A AModelSuit handles the train/eval/test/inference of a model. Therefore it brings the input, the model and
    the trainer, together in one place. In each AModelSuit functions for the training and validation must be defined.

    The AModelSuit provides basic functionality like model saving and defines interface methods for ModelSuits.

    :Attributes:
        _model:                         ("Model") The model to handle with the ModelSuit.
        _dataset:                       (Dictionary) The dataset to train/eval/test the model.
        _trainer:                       (ITrainer) The trainer to train the model.
        _batch_size:                    (Integer) The batch size for the model.
        _batches_in_epoch:              (Integer) The number of batches in one training epoch.
        _logger:                        (Logger) The logger for the ModelSuit.
        _model_dir:                     (String) The directory of the model (e.g. to save it).
        _save_checkpoint_steps:         (Integer) Every save_checkpoint_steps steps the ModelSuit saves model
                                            (training) checkpoints. 500 by default. (Optional) set to -1 if not needed.
        _save_checkpoint_epochs:        (Integer) Every save_checkpoint_epochs epochs the ModelSuit saves model
                                            (training) checkpoints. 1 by default. (Optional) set to -1 if not needed. 
                                            List of epochs supported (e.g. [1,5] saves only a checkpoint in the first and fifth epoch)
        _log_steps:                     (Integer) Every log_steps steps the ModelSuit writes logs. 100 by default. (Optional) set to -1 if not needed.
        _log_epochs:                    (Integer) Every log_epoch epochs the ModelSuit writes logs. 1 by default. (Optional) set to -1 if not needed.
        _save_summary_steps:            (Integer) Every save_summary_steps steps the ModelSuit saves Tensorboard summaries. 250 by default. 
                                            (Optional) set to -1 if not needed.
        _save_summary_steps:            (Integer) Every save_summary_epoch epochs the ModelSuit saves Tensorboard summaries. 1 by default. 
                                            (Optional) set to -1 if not needed.
        _ckpt:                          (tf.train.Checkpoint) Variable for the current checkpoint.
        _ckpt_manager:                  (tf.train.CheckpointManager) To manage the checkpoint.
        _summary_manager:               (TensorboardSummaryManager) The writer/manager for the Tensorboard summaries.
        _summary_txt_writer:            (TxtSummaryWriter) The writer for the text summaries.
        _txt_function_time_stopper:     (TxtFunctionTimeStopper) The writer and stopper of function times.
        __first_round:                  (Dictionary of three Booleans) Is it the first round of training/evaluation/test?
    """

    def __init__(self, model, trainer, dataset, batch_size, batches_in_epoch, model_dir="/model", save_checkpoint_steps=500, save_checkpoint_epochs=1,
                 log_steps=100, log_epochs=1, save_summary_steps=250, save_summary_epochs=1, load_checkpoint="latest"):
        """
        Constructor, initialize member variables.
        :param model: ("Model") The model to handle with the ModelSuit
        :param trainer: (ITrainer) The trainer to train the model.
        :param dataset: (Dictionary) The dataset to train/eval/test the model.
        :param batch_size: (Integer) The batch size for the model.
        :param batches_in_epoch: (Integer) The number of batches in one training epoch.
        :param model_dir: (String) The directory of the model (e.g. to save it). "/model" by default.
        :param save_checkpoint_steps: (Integer) Every save_checkpoint_steps steps the ModelSuit saves model
                                        (training) checkpoints. 500 by default. (Optional) set to -1 if not needed.
        :param save_checkpoint_epochs: (Integer) Every save_checkpoint_epochs epochs the ModelSuit saves model
                                        (training) checkpoints. 1 by default. (Optional) set to -1 if not needed. 
                                        List of epochs supported (e.g. [1,5] saves only a checkpoint in the first and fifth epoch)
        :param log_steps: (Integer) Every log_steps steps the ModelSuit writes logs. 100 by default. (Optional) set to -1 if not needed.
        :param log_epochs: (Integer) Every log_epoch epochs the ModelSuit writes logs. 1 by default. (Optional) set to -1 if not needed.
        :param save_summary_steps: (Integer) Every save_summary_steps steps the ModelSuit saves Tensorboard summaries. 250 by default. 
                                    (Optional) set to -1 if not needed.
        :param save_summary_steps: (Integer) Every save_summary_epoch epochs the ModelSuit saves Tensorboard summaries. 1 by default. 
                                    (Optional) set to -1 if not needed.
        :param load_checkpoint: (Integer) Loads the given model checkpoint. "latest" by default. 
        """
        # Set model, optimizer, dataset, trainer, batch_size.
        self._model = model
        self._dataset = dataset
        self._trainer = trainer
        self._batch_size = batch_size
        self._batches_in_epoch = batches_in_epoch

        # Setting up the Loggers
        self._logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)

        # Dir to save and reload model.
        self._model_dir = os.path.dirname(sys.modules['__main__'].__file__) + "/experimentResults" + model_dir

        # Log every log_interval_steps and/or _epochs
        self._log_steps = log_steps
        self._log_epochs = log_epochs

        # Save summary every save_summary_steps and/or _epochs
        self._save_summary_steps = save_summary_steps
        self._save_summary_epochs = save_summary_epochs

        # Save checkpoints every save_checkpoints_steps and/or _epochs
        self._save_checkpoint_steps = save_checkpoint_steps
        self._save_checkpoint_epochs = save_checkpoint_epochs

        # Checkpoint variable
        self._ckpt = tf.train.Checkpoint(optimizer=self._model.optimizer, net=self._model)

        # Create a manager for the checkpoint and restore the latest  (if there is one)
        self._ckpt_manager = tf.train.CheckpointManager(self._ckpt, self._model_dir+'/tf_ckpts', max_to_keep=None)

        # Load specified checkpoint if needed, else continue training if model exists
        if load_checkpoint is not None:
            if load_checkpoint == "latest":
                restore_checkpoint = self._ckpt_manager.latest_checkpoint
            else: 
                restore_checkpoint = self._model_dir+'/tf_ckpts/ckpt-' + str(load_checkpoint)

        if restore_checkpoint:   
            self._ckpt.restore(restore_checkpoint).assert_existing_objects_matched()
            self._logger.info("Restored model from {}".format(restore_checkpoint), "AModelSuit:__init__")
        else:
            self._logger.info("No checkpoint found. Initializing model from scratch", "AModelSuit:__init__")

        # To save summary.
        self._summary_manager = TensorboardSummaryManager(self._model_dir)
        self._summary_txt_writer = TxtSummaryWriter(self._model_dir)
        self._txt_function_time_stopper = TxtFunctionTimeStopper(self._model_dir)

        # Is it the first round of training testing or evaluation?
        self.__first_round = {"train": True, "eval": True, "test": True}
        
    def doTraining(self, train_steps, eval_steps, train_epochs=-1, eval_epochs=-1, only_save_best_checkpoints=False):
        """
        Trains the model with the trainer and the input of the ModelSuit.
        :param train_steps: (Integer) The steps to train the model. (Optional) set to -1 if not needed.
        :param eval_steps: (Integer) Every eval_steps steps the Model will be evaluated. (Optional) set to -1 if not needed.
        :param train_epochs: (Integer) The epochs to train the model. (Optional) set to -1 if not needed. -1 by default.
        :param eval_epochs: (Integer) Every eval_epochs epochs the Model will be evaluated. (Optional) set to -1 if not needed. -1 by default.
        :param only_save_best_checkpoints: (Boolean) If true only the best Model checkpoints on the evaluation set will
                                            be saved. Not used.
        """
        self._logger.train("Started training for " + str(train_steps) + " steps or "+str(train_epochs) +
                           " epochs. Evaluation every " + str(eval_steps) + " steps and/or " +str(eval_epochs) +
                           " epochs.",  "AModelSuit:doTraining")
        self._logger.train("Eager Execution: " + str(tf.executing_eagerly()), "AModelSuit:doTraining")
        self._logger.train("Eager Keras Model: " + str(self._model.run_eagerly), "AModelSuit:doTraining")

        # Stop times.
        start_training_time = time.time()
        start_log_loss_steps_time = time.time()
        start_log_loss_epochs_time = time.time()

        # Training variables.
        best_loss = 999999999

        current_step = self._model.optimizer.iterations.numpy()
        current_epoch = current_step//self._batches_in_epoch

        # Check if the model is already trained for the given steps or epochs
        if train_steps != -1:
            if current_step >= train_steps:
                return
        elif train_epochs != -1:
            if current_epoch >= train_epochs:
                return

        # Save first checkpoint with random weights
        if current_step == 0:
            save_path = self._ckpt_manager.save(checkpoint_number=0)
            self._logger.train("Saved checkpoint for step {}: {}".format(current_step, save_path), "AModelSuit:doTraining")
            # If evaluation is wished do validation.
            if (eval_steps > 0) or (eval_epochs > 0):
                eval_losses, eval_acc = self.doValidation("eval")
                eval_loss = eval_losses[0]
                # And if only_save_best_checkpoints is set save initil best losses and 
                if only_save_best_checkpoints:
                    best_loss = eval_loss
                    best_acc = eval_acc
                    best_losses = eval_losses
                    best_current_step = current_step
                    best_current_epoch = current_epoch
        self._model.resetMetrics()

        # If the model is not trained start training
        training_not_finished = True
        while training_not_finished:
            start_epoch_time = time.time()
            for data in self._dataset["train"]:

                # If its the first round of training trace the graph.
                #if self.__first_round["train"]:
                    #tf.summary.trace_on(graph=True, profiler=True)

                # Perform a training step.
                outputs = self._trainer.trainingStep(self._model, data)

                # If its the first round of training, add the graph trace to the summary.
                #if self.__first_round["train"]:
                    #with self._summary_manager.writer("train").as_default():
                    #    tf.summary.trace_export(name="train_initial_trace", step=0, profiler_outdir=self._model_dir)   
                #    self.__first_round["train"] = False

                # Get training values and metrics.
                losses = [outputs[0][i].numpy() for i in range(0, len(outputs[0]))]
                metrics = self._model.getMetrics()
                acc_value = metrics[0].numpy()

                # Increment the global step.
                current_step += 1

                # If log_steps should be saved and log_steps steps past, print the logs.
                if (self._log_steps != -1) and (current_step % self._log_steps == 0):
                    end_log_loss_steps_time = time.time()
                    self._logger.train("Step " + str(current_step) + ": " + str(self._log_steps) +
                                        " steps past in " + str(end_log_loss_steps_time - start_log_loss_steps_time)
                                            + "s. Acc: " + str(acc_value * 100) + "%. Losses: " + str(losses),
                                        "AModelSuit:doTraining")
                    start_log_loss_steps_time = time.time()

                # If a summary should be saved and save_summary_steps steps past, save the summary.
                if (self._save_summary_steps != -1) and (current_step % self._save_summary_steps == 0):
                    with self._summary_manager.writer("train").as_default():
                        self._model.writeSummary([outputs, metrics], current_step)
                        self._summary_manager.writer("train").flush()

                # If log_checkpoint_steps should be saved, save checkpoint every save_checkpoint_steps iterations 
                # if only_save_best_checkpoints is not set.
                if (self._save_checkpoint_steps != -1) and (not only_save_best_checkpoints) and (current_step % self._save_checkpoint_steps == 0):
                    save_path = self._ckpt_manager.save(checkpoint_number=current_step)
                    self._logger.train("Saved checkpoint for step {}: {}".format(current_step, save_path),
                                        "AModelSuit:doTraining")
                    self._logger.train("Losses: " + str(losses), "AModelSuit:doTraining")

                # If evaluation of steps is wished and if eval_steps steps past, do validation.
                if (eval_steps > 0) and (current_step % eval_steps == 0):
                    eval_losses, eval_acc = self.doValidation("eval")
                    eval_loss = eval_losses[0]
                    # And if only_save_best_checkpoints is set and the eval_acc is higher then the best save model.
                    if only_save_best_checkpoints and (best_loss > eval_loss):
                        save_path = self._ckpt_manager.save(checkpoint_number=current_step)
                        self._logger.train("Saved checkpoint for step {}: {}".format(current_step, save_path),
                                            "AModelSuit:doTraining")
                        self._logger.train("Eval Losses: " + str(eval_losses), "AModelSuit:doTraining")
                        best_loss = eval_loss
                        best_acc = eval_acc
                        best_losses = eval_losses
                        best_current_step = current_step
                        best_current_epoch = current_epoch

                self._model.resetMetrics()

                # Check if we at the end of the training.
                if train_steps != -1:
                    if current_step >= train_steps:
                        training_not_finished = False
                        break

            # One epoch passed
            current_epoch += 1   
                                
            # Now we repeat the same for epochs...  
            # If log_epochs should be saved and log_epochs epochs past, print the logs.
            if (self._log_epochs != -1) and (current_epoch % self._log_epochs == 0):
                end_log_loss_epochs_time = time.time()
                self._logger.train("Epoch " + str(current_epoch) + ": " + str(self._log_epochs) +
                                    " epochs past in " + str(end_log_loss_epochs_time - start_log_loss_epochs_time)
                                        + "s. Acc: " + str(acc_value * 100) + "%. Losses: " + str(losses),
                                    "AModelSuit:doTraining")
                start_log_loss_epochs_time = time.time()
                
            # If a summary should be saved and save_summary_epochs epochs past, save the summary.
            if (self._save_summary_epochs != -1) and (current_epoch % self._save_summary_epochs == 0):
                    with self._summary_manager.writer("train").as_default():
                        self._model.writeSummary([outputs, metrics], current_step) #Summary needs the current step not epoch!
                        self._summary_manager.writer("train").flush()

            # If log_checkpoint_epochs should be saved, save checkpoint every save_checkpoint_epochs iterations 
            # if only_save_best_checkpoints is not set.
            if (self._save_checkpoint_epochs != -1) and (not only_save_best_checkpoints) and (current_epoch % self._save_checkpoint_epochs == 0):
                save_path = self._ckpt_manager.save(checkpoint_number=current_step) 
                self._logger.train("Saved checkpoint for epoch {}: {}".format(current_epoch, save_path),
                                    "AModelSuit:doTraining")
                self._logger.train("Losses: " + str(losses), "AModelSuit:doTraining")

            # If evaluation of epochs is wished and if eval_epochs epochs past, do validation.
            if (eval_epochs > 0) and (current_epoch % eval_epochs == 0):
                eval_losses, eval_acc = self.doValidation("eval")
                eval_loss = eval_losses[0]
                # And if only_save_best_checkpoints is set and the eval_acc is higher then the best save model.
                if only_save_best_checkpoints and (best_loss > eval_loss):
                    save_path = self._ckpt_manager.save(checkpoint_number=current_step)
                    self._logger.train("Saved checkpoint for epoch {}: {}".format(current_epoch, save_path),
                                        "AModelSuit:doTraining")
                    self._logger.train("Losses: " + str(losses), "AModelSuit:doTraining")
                    best_loss = eval_loss
                    best_acc = eval_acc
                    best_losses = eval_losses
                    best_current_step = current_step
                    best_current_epoch = current_epoch

            # Update the learning rate based ot the current epoch
            if hasattr(self._model, 'updateLearningRate'):
                self._model.updateLearningRate(current_epoch)

            # Check if we at the end of the training.
            if train_epochs != -1:
                if current_epoch >= train_epochs:
                    break

        # Save checkpoints and summary at the end of the training                         
        with self._summary_manager.writer("train").as_default():
            self._model.writeSummary([outputs, metrics], current_step)
            self._summary_manager.writer("train").flush()

        # Do a validation at the end
        eval_losses, eval_acc = self.doValidation("eval")
        eval_loss = eval_losses[0]

        # Save the model at the end. if only_save_best_checkpoints is not set.
        if not only_save_best_checkpoints:
            save_path = self._ckpt_manager.save(checkpoint_number=current_step)  

            self._logger.train("Saved checkpoint for step {}: {}".format(current_step, save_path),
                                "AModelSuit:doTraining")
            self._logger.train("Losses: " + str(losses), "AModelSuit:doTraining")
        elif only_save_best_checkpoints:
            # And if only_save_best_checkpoints is set and the eval_acc is higher then the best save model.
            if best_loss > eval_loss:
                save_path = self._ckpt_manager.save(checkpoint_number=current_step)
                self._logger.train("Saved checkpoint for step {}: {}".format(current_step, save_path),
                                   "AModelSuit:doTraining")
                self._logger.train("Losses: " + str(losses), "AModelSuit:doTraining")
                best_loss = eval_loss
                best_acc = eval_acc
                best_losses = eval_losses
                best_current_step = current_step
                best_current_epoch = current_epoch
            self._summary_txt_writer.writeSummary("Best Loss Epoch: " + str(best_current_epoch), "eval")
            self._summary_txt_writer.writeSummary("Best Loss Step: " + str(best_current_step), "eval")
            self._summary_txt_writer.writeSummary("Best Losses: " + str(best_losses), "eval")
            self._summary_txt_writer.writeSummary("Best Acc: " + str(best_acc), "eval")

        # Stop training time.
        end_training_time = time.time()

        self._logger.train("Finished training for " +  str(current_epoch) +  " epochs or "+ str(train_steps) +
                            " steps. Evaluation was every " + str(eval_steps) + " steps and/or " +str(eval_epochs)+  " epochs. Training duration was: " +
                            str(end_training_time - start_training_time) + "s. Final Acc: " + str(
                acc_value * 100) +"%. Final losses: " + str(losses), "AModelSuit:doTraining")


    def doValidation(self, mode):
        """
        Validates the model on the subdataset subset defined by the mode.
        :param mode: (String) The subset of the dataset ("train", "eval" or "test).
        """
        self._logger.val("Started validation for " + str(mode) + " dataset.", "AModelSuit:doValidation")
        self._logger.val("Eager Execution: " + str(tf.executing_eagerly()), "AModelSuit:doValidation")
        self._logger.val("Eager Keras Model: " + str(self._model.run_eagerly), "AModelSuit:doValidation")

        # Stop times.
        start_validation_time = time.time()
        start_log_loss_steps_time = time.time()
          
        # Evaluation variables.
        loss_values_obj = []
        loss_values_reg = []

        acc_values = []
        outputs = 0
        val_step = 0

        # Train the model on the sub dataset (one of train/eval/test).
        for data in self._dataset[mode]:
            # If its the first round of training trace the graph.
            #if self.__first_round[mode]:
            #    tf.summary.trace_on(graph=True, profiler=True)
            
            # Perform a validation step.
            outputs = self._trainer.validationStep(self._model, data)
             
            # If its the first round of training, add the graph trace to the summary.
            #if self.__first_round[mode]:
            #    with self._summary_manager.writer(mode).as_default():
            #        tf.summary.trace_export(name=str(mode)+"_initial_trace", step=0, profiler_outdir=self._model_dir)
            #    self.__first_round[mode] = False

            # Get evaluation values and metrics.
            loss_vals_obj = outputs[0][0]
            loss_vals_reg = outputs[0][1]

            metrics = self._model.getMetrics()
            acc_value = metrics[0].numpy()
            loss_values_obj.append(loss_vals_obj)
            loss_values_reg.append(loss_vals_reg)

            acc_values.append(acc_value)

             # If log_steps should be saved and log_steps steps past, print the logs.
            if (self._log_steps != -1) and (val_step % self._log_steps == 0):
                end_log_loss_steps_time = time.time()
                o_loss = np.mean(loss_vals_obj)
                r_loss = np.mean(loss_vals_reg)
                self._logger.val("Step " + str(val_step) + ": " + str(self._log_steps) +
                                 " steps past in " + str(end_log_loss_steps_time - start_log_loss_steps_time)
                                 + "s. Accuracy till now: " + str(acc_value * 100) + "%. Loss value for step: " + str(loss_vals_obj+loss_vals_reg) + 
                                 " Obj Loss value for step: " + str(loss_vals_obj) +
                                 " Reg Loss value for step: " + str(loss_vals_reg), "AModelSuit:doValidation")
                start_log_loss_steps_time = time.time()
            val_step += 1

        # Get evaluation values and metrics for epoch.
        loss_values_obj = np.concatenate(loss_values_obj)  
        outputs[0][0] = np.mean(loss_values_obj)
        outputs[0][1] = np.mean(loss_values_reg)
        outputs[0].insert(0, outputs[0][0]+outputs[0][1]) 

        metrics = self._model.getMetrics()
        self._model.resetMetrics()

        # Save checkpoints and summary at the end of the validation/epoch  
        current_step = self._model.optimizer.iterations.numpy()
        with self._summary_manager.writer(mode).as_default():
            self._model.writeSummary([outputs, metrics], current_step)
            self._summary_manager.writer(mode).flush()

        # Stop evaluation time.
        end_validation_time = time.time()

        self._logger.val("Finished validation for " + str(mode) + " dataset. Validation duration was: " +
                            str(end_validation_time - start_validation_time) + "s. Final accuracy: " + str(
                metrics[0].numpy() * 100) +"%. Final losses: " + str(outputs[0]), "AModelSuit:doValidation")

        # Write Acc and Loss in textfile.
        self._summary_txt_writer.writeSummary("Acc for epoch: " + str(metrics[0].numpy() * 100), mode)
        self._summary_txt_writer.writeSummary("Losses for epoch: " + str(outputs[0]), mode)
        return outputs[0], metrics[0].numpy() * 100

    def doDatesetValidation(self):
        """
        Validates the model on the entire dataset.
        """
        self.doValidation("train")
        self.doValidation("eval")
        self.doValidation("test")

    def saveModel(self):
        """
        Saves the model.
        """
        tf.saved_model.save(self._model, self._model_dir)

    def getModel(self):
        """
        Returns the model.
        :return: model: ("Model") The model to handle with the ModelSuit
        """
        return self._model
コード例 #10
0
    def __init__(self, model, trainer, dataset, batch_size, batches_in_epoch, model_dir="/model", save_checkpoint_steps=500, save_checkpoint_epochs=1,
                 log_steps=100, log_epochs=1, save_summary_steps=250, save_summary_epochs=1, load_checkpoint="latest"):
        """
        Constructor, initialize member variables.
        :param model: ("Model") The model to handle with the ModelSuit
        :param trainer: (ITrainer) The trainer to train the model.
        :param dataset: (Dictionary) The dataset to train/eval/test the model.
        :param batch_size: (Integer) The batch size for the model.
        :param batches_in_epoch: (Integer) The number of batches in one training epoch.
        :param model_dir: (String) The directory of the model (e.g. to save it). "/model" by default.
        :param save_checkpoint_steps: (Integer) Every save_checkpoint_steps steps the ModelSuit saves model
                                        (training) checkpoints. 500 by default. (Optional) set to -1 if not needed.
        :param save_checkpoint_epochs: (Integer) Every save_checkpoint_epochs epochs the ModelSuit saves model
                                        (training) checkpoints. 1 by default. (Optional) set to -1 if not needed. 
                                        List of epochs supported (e.g. [1,5] saves only a checkpoint in the first and fifth epoch)
        :param log_steps: (Integer) Every log_steps steps the ModelSuit writes logs. 100 by default. (Optional) set to -1 if not needed.
        :param log_epochs: (Integer) Every log_epoch epochs the ModelSuit writes logs. 1 by default. (Optional) set to -1 if not needed.
        :param save_summary_steps: (Integer) Every save_summary_steps steps the ModelSuit saves Tensorboard summaries. 250 by default. 
                                    (Optional) set to -1 if not needed.
        :param save_summary_steps: (Integer) Every save_summary_epoch epochs the ModelSuit saves Tensorboard summaries. 1 by default. 
                                    (Optional) set to -1 if not needed.
        :param load_checkpoint: (Integer) Loads the given model checkpoint. "latest" by default. 
        """
        # Set model, optimizer, dataset, trainer, batch_size.
        self._model = model
        self._dataset = dataset
        self._trainer = trainer
        self._batch_size = batch_size
        self._batches_in_epoch = batches_in_epoch

        # Setting up the Loggers
        self._logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)

        # Dir to save and reload model.
        self._model_dir = os.path.dirname(sys.modules['__main__'].__file__) + "/experimentResults" + model_dir

        # Log every log_interval_steps and/or _epochs
        self._log_steps = log_steps
        self._log_epochs = log_epochs

        # Save summary every save_summary_steps and/or _epochs
        self._save_summary_steps = save_summary_steps
        self._save_summary_epochs = save_summary_epochs

        # Save checkpoints every save_checkpoints_steps and/or _epochs
        self._save_checkpoint_steps = save_checkpoint_steps
        self._save_checkpoint_epochs = save_checkpoint_epochs

        # Checkpoint variable
        self._ckpt = tf.train.Checkpoint(optimizer=self._model.optimizer, net=self._model)

        # Create a manager for the checkpoint and restore the latest  (if there is one)
        self._ckpt_manager = tf.train.CheckpointManager(self._ckpt, self._model_dir+'/tf_ckpts', max_to_keep=None)

        # Load specified checkpoint if needed, else continue training if model exists
        if load_checkpoint is not None:
            if load_checkpoint == "latest":
                restore_checkpoint = self._ckpt_manager.latest_checkpoint
            else: 
                restore_checkpoint = self._model_dir+'/tf_ckpts/ckpt-' + str(load_checkpoint)

        if restore_checkpoint:   
            self._ckpt.restore(restore_checkpoint).assert_existing_objects_matched()
            self._logger.info("Restored model from {}".format(restore_checkpoint), "AModelSuit:__init__")
        else:
            self._logger.info("No checkpoint found. Initializing model from scratch", "AModelSuit:__init__")

        # To save summary.
        self._summary_manager = TensorboardSummaryManager(self._model_dir)
        self._summary_txt_writer = TxtSummaryWriter(self._model_dir)
        self._txt_function_time_stopper = TxtFunctionTimeStopper(self._model_dir)

        # Is it the first round of training testing or evaluation?
        self.__first_round = {"train": True, "eval": True, "test": True}
コード例 #11
0
class ImagePlusLabelVisualizer:
    """
    The ImagePlusLabelVisualizer tries to visualize the images and labels via cv2 and logs additional information,
    like the shape of the images and labels on screen.

    :Attributes:
        __logger:  (Logger) The logger for the visualizer.
    """

    def __init__(self):
        """
        Constructor, initialize member variables.
        """
        self.__logger = SLoggerHandler().getLogger(LoggerNames.LOGGER_C)

    def visualizeImagesAndLabelsWithBreak(self, images, labels, indexes=[0]):
        """
        Visualizes the images and labels defined in the indexes variable.
        Prints additional information on screen.
        Stops the program until some input is given.
        :param images: (np.array) The images to visualize.
        :param labels: (np.array) The labels to visualize.
        :param indexes: (Array) The indexes of the images and labels to visualize.
        """
        self.logImagesAndLabelsInfo(images, labels)

        for index in indexes:
            self.visualizeImageAndLabelWithBreak(images[index], labels[index])

    def visualizeImageAndLabelWithBreak(self, image, label):
        """
        Visualizes the image and label given.
        Prints additional information on screen.
        Stops the program until some input is given.
        :param images: (np.array) The image to visualize.
        :param labels: (np.array) The label to visualize.
        """
        self.logImagesAndLabelsInfo(image, label)
        self.__visualizeImageAndLabelWithBreak(image, label)

    def logImagesAndLabelsInfo(self, image, label):
        """
        Prints additional information of the image and label like the shape on screen.
        :param images: (np.array) The image to visualize.
        :param labels: (np.array) The label to visualize.
        """
        info_text = "\n*********ImagePlusLabelVisualizer*********\nImages shape: " + str(image.shape) + "\n" + \
                    "Labels shape: " + str(label.shape) + "\n" + "Label: " + str(label) + \
                    "\n******************************************"
        self.__logger.info(str(info_text), "ImagePlusLabelVisualizer:logImageAndLabelInformation")

    def __visualizeImageAndLabelWithBreak(self, image, label):
        """
        Visualizes the image and label given.
        Stops the program until some input is given.
        :param images: (np.array) The image to visualize.
        :param labels: (np.array) The label to visualize.
        """
        converterd_image = self.__checkAndConvertImage(image)
        converted_label = self.__checkAndConvertLabel(label)

        converterd_image = cv2.resize(converterd_image, (320, 320))

        cv2.imshow(str(converted_label), converterd_image)
        cv2.waitKey()

    def __checkAndConvertImage(self, image):
        """
        Checks if the given image is in right shape to be visualized. If not the shape will be converted properly.
        :param images: (np.array) The image to check.
        """
        img_shape = image.shape
        if(img_shape[0]<=3):
            image = image.transpose((1,2,0))
            image = image.astype(np.uint8)
            if img_shape[2] is 3:
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        return image

    def __checkAndConvertLabel(self, label):
        """
        Checks if the given label is in right shape to be visualized. If not the shape will be converted properly.
        :param label: (np.array) The label to check.
        """
        return label
コード例 #12
0
ファイル: CsnnOfmExperiment.py プロジェクト: codeaudit/CSNN
class CsnnOfmExperiment(IExperiment):
    """
    The CsnnOfmExperiment trains the models used to create Figure 4 of our paper: "CSNNs: Unsupervised,
    Backpropagation-Free Convolutional Neural Networks for Representation Learning".

    The experiment trains each CSNN model for each given dataset for all defined training steps per layer and learns the
    defined classifier to test the learned representation. If xFoldCrossValidation is set this will be repeated x times.

    :Attributes:
        __config:    (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                      csnnOfmExperimentss.json as an example.
        __logger:    (Logger) The logger for the experiment.
        __num_gpus:  (Integer) The number of GPUs to use.

    """
    def __init__(self, config):
        """
        Constructor, initialize member variables.
        :param config: (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                        csnnOfmExperiment.json as an example.
        """
        self.__config = config
        self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
        self.__num_gpus = ConfigProvider().get_config(
            "controllerConfig.json")["hardware"]["numGPUs"]

    def execute(self):
        """
        Executes the experiment with the given config.

        The experiment trains each CSNN model for each given dataset for all defined training steps per layer and learns
        thedefined classifier to test the learned representation. If xFoldCrossValidation is set this will be repeated
        x times.
        """
        encoding_config = self.__config["encodingConfig"]

        # For each model to train and val
        for csnn_config in self.__config["csnnConfigs"]:
            csnn_name = csnn_config["modelName"]
            try:
                # For each dataset
                for dataset_config in self.__config["datasetConfigs"]:

                    # Get the provider of the dataset
                    provider = getDatasetProvider(dataset_config)
                    if not dataset_config["nameOfDataset"] in csnn_config[
                            "batchSizes"].keys():
                        continue

                    # and for each xFold iteration
                    for i in range(0, csnn_config["xFoldCrossValidation"]):
                        # Construct the model Name
                        model_dir = "/" + csnn_name + "/" + dataset_config[
                            "nameOfDataset"] + "/xFoldCrossVal" + str(i)
                        self.__logger.info("Starting to train: " + model_dir,
                                           "CsnnOfmExperiment:execute")

                        # Reset the training of the model
                        csnn_config["trainingSteps"] = 0

                        # Train model for training_step_per_layer steps
                        prev_training_steps = 0
                        prev_training_steps_per_layer = 0
                        for training_step_per_layer in self.__config[
                                "ofmSteps"]:

                            # Based on the current training_step_per_layer we calculate the new training steps for
                            # non frozen layers
                            csnn_config[
                                "trainingSteps"] = training_step_per_layer * len(
                                    csnn_config["layers"])

                            # Calculate the training intervals based on the old training intervals.
                            for l in range(0, len(csnn_config["layers"])):
                                csnn_config["layers"][l]["trainInterval"][
                                    0] = prev_training_steps
                                prev_training_steps += (
                                    training_step_per_layer -
                                    prev_training_steps_per_layer)
                                csnn_config["layers"][l]["trainInterval"][
                                    1] = prev_training_steps

                            prev_training_steps_per_layer = training_step_per_layer
                            prev_training_steps = csnn_config["trainingSteps"]

                            # If xFoldCrossValidation is set, create a seed to shuffle the dataset.
                            if csnn_config["xFoldCrossValidation"] <= 1:
                                xseed = None
                            else:
                                xseed = 42 + i
                            dataset, dataset_generator = prepareDataset(
                                provider,
                                dataset_config,
                                xfold_seed=xseed,
                                augment_data=csnn_config["augmentData"])

                            # Train the CSNN
                            trainAndValCsnn(csnn_config, dataset_generator,
                                            dataset, dataset_config,
                                            self.__num_gpus,
                                            model_dir + "/Csnn")
                            self.__logger.info(
                                "Finished to train: " + model_dir,
                                "CsnnOfmExperiment:execute",
                                "CsnnOfmExperiment:execute")

                            self.__logger.info(
                                "Starting to create dataset encoding with: " +
                                model_dir, "CsnnOfmExperiment:execute")
                            # Create the encoding of the dataset via the trained CSNN.
                            encoding_provider = prepareEncoding(
                                csnn_config,
                                dataset_generator,
                                dataset,
                                dataset_config,
                                csnn_name,
                                self.__num_gpus,
                                model_dir + "/Csnn",
                                zero_mean_unit_variance=csnn_config[
                                    "zeroMeanUnitVarianceEncoding"])
                            self.__logger.info(
                                "Finished to create dataset encoding with: " +
                                model_dir, "CsnnOfmExperiment:execute")

                            self.__logger.info(
                                "Starting to train classifiers for: " +
                                model_dir, "CsnnOfmExperiment:execute")
                            # Test the trained model by training the classifier(s)
                            for classifier in self.__config["classifiers"]:
                                try:
                                    classifier["numClasses"] = dataset_config[
                                        "numClasses"]
                                    if (classifier["type"] == "nonlinear") or (
                                            classifier["type"] == "linear"):
                                        trainAndValClassifier(
                                            Mlp(classifier), classifier,
                                            encoding_provider, encoding_config,
                                            self.__num_gpus, model_dir + "/" +
                                            classifier["modelName"] +
                                            str(training_step_per_layer))
                                    elif classifier["type"] == "fewShot":
                                        trainAndValFewShotClassifier(
                                            Mlp(classifier), classifier,
                                            encoding_provider, encoding_config,
                                            self.__num_gpus, model_dir + "/" +
                                            classifier["modelName"] +
                                            str(training_step_per_layer))
                                except:
                                    print(traceback.format_exc())
                            self.__logger.info(
                                "Finished to train classifiers for: " +
                                model_dir, "CsnnOfmExperiment:execute")
            except:
                print(traceback.format_exc())
コード例 #13
0
class CsnnPerformancesExperiment(IExperiment):
    """
    The CsnnPerformancesExperiment trains the cnn and hybrid model presented in Table 1 and 2 of our paper: "
    CSNNs: Unsupervised, Backpropagation-Free Convolutional Neural Networks for Representation Learning".

    The experiment trains each CSNN model for each given dataset for the defined training steps per layer and learns the
    defined classifier to test the learned representation. If xFoldCrossValidation is set this will be repeated x times.

    :Attributes:
        __config:    (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                      csnnPerformancesExperiment.json as an example.
        __logger:    (Logger) The logger for the experiment.
        __num_gpus:  (Integer) The number of GPUs to use.
    """
    def __init__(self, config):
        """
        Constructor, initialize member variables.
        :param config: (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                        csnnOfmExperiment.json as an example.
        """
        self.__config = config
        self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
        self.__num_gpus = ConfigProvider().get_config(
            "controllerConfig.json")["hardware"]["numGPUs"]

    def execute(self):
        """
        Executes the experiment with the given config.

        The experiment trains each CSNN model for each given dataset for the defined training steps per layer and learns
        the defined classifier to test the learned representation. If xFoldCrossValidation is set this will be repeated
        x times.
        """
        encoding_config = self.__config["encodingConfig"]
        for csnn_config in self.__config["csnnConfigs"]:
            csnn_name = csnn_config["modelName"]

            try:
                for dataset_config in self.__config["datasetConfigs"]:

                    provider = getDatasetProvider(dataset_config)
                    if not dataset_config["nameOfDataset"] in csnn_config[
                            "batchSizes"].keys():
                        continue
                    for i in range(0, csnn_config["xFoldCrossValidation"]):
                        model_dir = "/" + csnn_name + "/" + dataset_config[
                            "nameOfDataset"] + "/xFoldCrossVal" + str(i)
                        self.__logger.info(
                            "Starting to train: " + model_dir,
                            "CsnnPerformancesExperiment:execute")

                        if csnn_config["xFoldCrossValidation"] <= 1:
                            xseed = None
                        else:
                            xseed = 42 + i

                        dataset, dataset_generator = prepareDataset(
                            provider,
                            dataset_config,
                            xfold_seed=xseed,
                            augment_data=csnn_config["augmentData"])

                        trainAndValCsnn(csnn_config, dataset_generator,
                                        dataset, dataset_config,
                                        self.__num_gpus, model_dir + "/Csnn")
                        self.__logger.info(
                            "Finished to train: " + model_dir,
                            "CsnnPerformancesExperiment:execute")

                        self.__logger.info(
                            "Starting to create dataset encoding with: " +
                            model_dir, "CsnnPerformancesExperiment:execute")
                        encoding_provider = prepareEncoding(
                            csnn_config,
                            dataset_generator,
                            dataset,
                            dataset_config,
                            csnn_name,
                            self.__num_gpus,
                            model_dir + "/Csnn",
                            zero_mean_unit_variance=csnn_config[
                                "zeroMeanUnitVarianceEncoding"])
                        self.__logger.info(
                            "Finished to create dataset encoding with: " +
                            model_dir, "CsnnPerformancesExperiment:execute")

                        self.__logger.info(
                            "Starting to train classifiers for: " + model_dir,
                            "CsnnPerformancesExperiment:execute")
                        for classifier in self.__config["classifiers"]:
                            try:
                                classifier["numClasses"] = dataset_config[
                                    "numClasses"]
                                if (classifier["type"]
                                        == "nonlinear") or (classifier["type"]
                                                            == "linear"):
                                    trainAndValClassifier(
                                        Mlp(classifier), classifier,
                                        encoding_provider, encoding_config,
                                        self.__num_gpus, model_dir + "/" +
                                        classifier["modelName"])
                                elif classifier["type"] == "fewShot":
                                    trainAndValFewShotClassifier(
                                        Mlp(classifier), classifier,
                                        encoding_provider, encoding_config,
                                        self.__num_gpus, model_dir + "/" +
                                        classifier["modelName"])
                            except Exception:
                                print(traceback.format_exc())
                        self.__logger.info(
                            "Finished to train classifiers for: " + model_dir,
                            "CsnnPerformancesExperiment:execute")
            except Exception:
                print(traceback.format_exc())
コード例 #14
0
 def __init__(self):
     """
     Constructor, initialize member variables.
     """
     self.__logger = SLoggerHandler().getLogger(LoggerNames.LOGGER_C)
コード例 #15
0
ファイル: TfRecordHandler.py プロジェクト: codeaudit/CSNN
class TfRecordHandler:
    """
    The TfRecordHandler handles the creation of tfrecords for various datasets.

    :Attributes:
        __logger:       (Logger) The logger for the controller.
        __tfrecord_dir: (String) The path to the record save directory.
        __num_threads:  (Integer) The number of threads for writing the tfrecords. 1 by default.
    """
    def __init__(self, tfrecord_dir, dataset_prepreprocessors=None, num_threads=0):
        """
        Constructor, initialize member variables.
        :param tfrecord_dir: (String) The path to the record save directory.
        :param dataset_prepreprocessors: (Dictionary) The prepreprocessors to use in the corresponding splits.
                                          None by default.
        :param num_threads: (Integer) The number of threads for writing the tfrecords. 1 by default.
        """
        self.__logger = SLoggerHandler().getLogger(LoggerNames.Output_C)
        self.__tfrecord_dir = tfrecord_dir
        self.__dataset_prepreprocessors = dataset_prepreprocessors
        self.__num_threads = num_threads

    def createTfRecords(self, dataset_names, dataset_splits):
        """
        Creates the tfrecords for the given datasets witch the given train, eval, test split.
        Therefore DataProviders with the names "dataset_nameProvider" for example "MnistProvider" must be implemented
        properly for the datasets to convert them into tfrecords.
        :param dataset_names: (String) The name of the dataset
        :param dataset_splits: (Dictionary) The splits of the datasets in format
                               {"datasetname":[trainNum,evalNum,testNum, "prepreprocessing"].
        """
        # Calcualte to number of splits to write in tfrecords.
        splits_to_write = 0
        for dataset_name in dataset_names:
            splits_to_write += len(dataset_splits[dataset_name])
            self.__logger.info("Read " + str(len(dataset_splits[dataset_name])) + " splits to write for dataset " +
                               str(dataset_name) + ": " +str(dataset_splits[dataset_name]),
                               "TfRecordHandler:createTfRecords")
        self.__logger.info("Read " + str(splits_to_write) + " splits to write in total.",
                           "TfRecordHandler:createTfRecords")

        # Calculate the threads_per_split roughly.
        threads_per_split = self.__num_threads / splits_to_write
        self.__logger.info("Splits to write: "+str(splits_to_write)+ ". Threads available: " + str(self.__num_threads) +
                           ". => Calculated  " + str(threads_per_split) + " threads per split.",
                           "TfRecordHandler:createTfRecords")

        # If there is just one split to write, there is no need to multithread multible dataset splits, just the split
        # itself.
        if splits_to_write == 1:
            self.__logger.info("Just one dataset split to write. Starting to create tfrecords with multithreading in "
                               "dataset parts...", "TfRecordHandler:createTfRecords")
            for dataset_name in dataset_names:
                for dataset_split in dataset_splits[dataset_name]:
                    self.createTfRecord(dataset_name, dataset_split, threads_per_split)

        # If number of threads is one or less then one, there is no need to multithead anything.
        elif self.__num_threads <= 1:
            self.__logger.info("Less then or 1 thread available in total. Starting to create tfrecords without "
                               "multithreading...", "TfRecordHandler:createTfRecords")
            for dataset_name in dataset_names:
                for dataset_split in dataset_splits[dataset_name]:
                    self.createTfRecord(dataset_name, dataset_split, 0)

        # If the number of threads is greater then te split to write and there a so many thread, that each split can
        # have more then 3 threads, then execute each split in a extra thread and distribute the training, eval and test data
        # file writing in the thread into 2-3 threads.
        elif threads_per_split >= 3:
            self.__logger.info("More then 3 threads per split! Starting to create tfrecords with " +
                               str(self.__num_threads) + " threads. Multithreading in splits and dataset parts...",
                               "TfRecordHandler:createTfRecords")
            # Calculate threads per split "exactly".
            threads_per_split = max(math.ceil(threads_per_split - 1), 3)

            # Run each split in a thread with threads_per_split threads per split.
            threads = []
            for dataset_name in dataset_names:
                for dataset_split in dataset_splits[dataset_name]:
                    threads.append(FunctionThread(self.createTfRecord, dataset_name, dataset_split, threads_per_split))
                    threads[-1].start()

            # Wait for all threads to finish.
            for thread in threads:
                thread.join()

        # If the number of threads is more the 2, then execute as many splits as possible in parallel, if some threads
        # are idle because the number of threads in a little bit greater then the number of splits, distribute the
        # threads in two-pairs to the writing of the training, eval and test files.
        else:
            self.__logger.info("More then 1 thread available in total! Started to create tfrecords with " +
                               str(self.__num_threads) + " threads. Multithreading in splits and sometimes in dataset...",
                               "TfRecordHandler:createTfRecords")
            # Calculate the threads per split "exactly".
            threads_for_splits = self.__calculateThreadsForEachSplit(splits_to_write)

            # Prepare the queue.
            queue = []
            index = 0
            for dataset_name in dataset_names:
                for dataset_split in dataset_splits[dataset_name]:
                    queue.append([self.createTfRecord, dataset_name, dataset_split, threads_for_splits[index]])
                    index += 1

            # Run the splits in as many threads as possible.
            threads = []
            fifo_index = 1
            for i in range(1, len(queue) + 1):
                threads.append(FunctionThread(queue[i - 1][0], queue[i - 1][1], queue[i - 1][2], queue[i - 1][3]))
                threads[-1].start()

                self.__logger.info("Started split thread " + str(i) + "...", "TfRecordHandler:createTfRecord")
                # Wait for the latest started thread to finish, this makes sense because the train data thread is
                # started first and the the data to process in the training set is usually the most.
                if i >= self.__num_threads:
                    # Just wait for the thread to finish and print the info if its not the last thread.
                    if i < len(queue):
                        self.__logger.info("Waiting for split thread " + str(i) + " to finish to start next thread...",
                                            "TfRecordHandler:createTfRecord")
                        threads[fifo_index].join()
                        fifo_index += 1

            # Wait for all threads to finsih.
            self.__logger.info("Waiting for all remaining split threads to finish...", "TfRecordHandler:createTfRecord")
            for thread in threads:
                thread.join()

        self.__logger.info("Finished to create all tfrecords.", "TfRecordHandler:createTfRecords")

    def createTfRecord(self, dataset_name, dataset_split, num_threads=1):
        """
        Creates the tfrecords for the given dataset witch the given split.
        Therefore a DataProvider with the name "datasetnameProvider" for example MnistProvider must be implemented
        properly for the dataset to convert it into tfrecords.
        :param dataset_names : (String) The name of the dataset
        :param dataset_split : (Array) The splits of the datasets in format [trainNum,evalNum,testNum,
                              "prepreprocessing"].
        :param num_threads : (Integer) The number of threads for writing the tfrecords. 1 by default.
        """
        self.__logger.info("Started to create tfrecords for " + str(dataset_name) + " with split  " + str(
            dataset_split) + " and " + str(num_threads) + " extra threads per split...",
                           "TfRecordHandler:createTfRecords")

        # The train, test and eval part of the dataset is read into three separate files, therefore it makes no sense to
        # use more then three threads.
        if num_threads > 3:
            num_threads = 3

        # The name of the provider class.
        provider_name = dataset_name+"Provider"

        # Dynamically import the provider class by name.
        provider_module = importlib.import_module("Input_Component.DataProviders."+provider_name)

        # Dynamically load the provider class by name.
        # Combined with the above import its like: from Input_Component.DataProviders.MnistProvider import MnistProvider.
        dataset_provider = getattr(provider_module, provider_name)()

        # Set the Split of the Dataset.
        dataset_provider.setDatasetSplit(dataset_split)

        if dataset_split[3] != "None":
            # Dynamically import the preprocessing class by name.
            preprocessing_name = self.__dataset_prepreprocessors[dataset_split[3]]["preProcessingClassName"]
            preprocessing_module = importlib.import_module(
            "Preprocessing_Component.Prepreprocessing." + preprocessing_name)

            # Dynamically load the class by name.
            prepreprocessor = getattr(preprocessing_module, preprocessing_name)\
                (self.__dataset_prepreprocessors[dataset_split[3]])

        # Prepare to Write split in tfrecord files.
        queue = []
        if dataset_provider.datasetProcessableAtOnce():
            # Load test data.
            dataset = dataset_provider.getSplittedDatasetInNumpy()
            train_data = {"data": dataset["x_train"], "label": dataset["y_train"]}
            eval_data = {"data": dataset["x_eval"], "label": dataset["y_eval"]}
            test_data = {"data": dataset["x_test"], "label": dataset["y_test"]}

            if dataset_split[3] != "None":
                #Todo: Multithreading
                train_data, eval_data, test_data = prepreprocessor.process(train_data, eval_data, test_data)
                dataset_split[0] = len(train_data["data"])
                dataset_split[1] = len(eval_data["data"])
                dataset_split[2] = len(test_data["data"])

            # Debug_outdated
            #from Logger_Component.DataVisualizers.ImagePlusLabelVisualizer import ImagePlusLabelVisualizer
            #ImagePlusLabelVisualizer().visualizeImagesAndLabelsWithBreak(train_data, train_labels, [0, 7, 3])
            #ImagePlusLabelVisualizer().visualizeImagesAndLabelsWithBreak(eval_data, eval_labels, [0, 7, 3])
            #ImagePlusLabelVisualizer().visualizeImagesAndLabelsWithBreak(test_data, test_labels, [0, 7, 3])

            # If threading is on prepare the queue.
            if num_threads > 0:
                queue.append([self.__writeTfrecord, train_data, "train", dataset_name, dataset_split])
                queue.append([self.__writeTfrecord, eval_data, "eval", dataset_name, dataset_split])
                queue.append([self.__writeTfrecord, test_data, "test", dataset_name, dataset_split])
            else:
                self.__writeTfrecord(train_data, "train", dataset_name, dataset_split)
                self.__writeTfrecord(eval_data, "eval", dataset_name, dataset_split)
                self.__writeTfrecord(test_data, "test", dataset_name, dataset_split)
        else:

            #Todo add preprocessing

            # If threading is on prepare the queue.
            if num_threads > 0:
                queue.append([self.__writeTfrecordBatchwise, dataset_provider, "train", dataset_name, dataset_split])
                queue.append([self.__writeTfrecordBatchwise, dataset_provider, "eval", dataset_name, dataset_split])
                queue.append([self.__writeTfrecordBatchwise, dataset_provider, "test", dataset_name, dataset_split])
            else:
                self.__writeTfrecordBatchwise(dataset_provider, "train", dataset_name, dataset_split)
                self.__writeTfrecordBatchwise(dataset_provider, "eval", dataset_name, dataset_split)
                self.__writeTfrecordBatchwise(dataset_provider, "test", dataset_name, dataset_split)

        # If threading is on, start the maximum amount of threads and wait for all threads to finish.
        threads = []
        if num_threads > 0:
            for i in range(1, len(queue)+1):
                threads.append(FunctionThread(queue[i-1][0], queue[i-1][1],  queue[i-1][2],  queue[i-1][3],
                                              queue[i-1][4]))
                threads[-1].start()

                self.__logger.info("Started dataset thread " + str(i) + " for " + str(dataset_name) + " with split  "
                                   + str(dataset_split) + "...", "TfRecordHandler:createTfRecord")

                # Wait for the latest started thread to finish, this makes sense because the train data thread is
                # started first and the the data to process in the training set is usually the most.
                if i >= num_threads:
                    # Just wait for the thread to finish and print the info if its not the last thread.
                    if i < 3:
                        self.__logger.info("Waiting for dataset thread " + str(i) +
                                           " to finish to start next thread for " + str(dataset_name) + " with split "
                                           + str(dataset_split)+"...", "TfRecordHandler:createTfRecord")
                        threads[i-1].join()

            self.__logger.info("Waiting for all remaining dataset threads to finish for " +
                                str(dataset_name) + " with split " + str(dataset_split)+"...",
                               "TfRecordHandler:createTfRecord")

            # Wait for all threads to finsih.
            for thread in threads:
                thread.join()

        self.__logger.info("Finished to create tfrecords for " + str(dataset_name) + " with split  " + str(
            dataset_split) + " and " + str(num_threads) + " extra threads per split...",
                           "TfRecordHandler:createTfRecords")

    def __writeTfrecord(self, data, mode, dataset_name, dataset_split):
        """
        Writes the tfrecord file for the given data.
        :param data: (Array of Dictionaries) The data to write in the tfrecord file
                       e.g.:[{"img": [1,2,3,4], "label": 1}, {"img": [1,2,3,4], "label": 1}].
        :param mode: (String) The mode of the saved record.
        :param dataset_name: (String) The name of the dataset.
        :param dataset_split: (Array) The splits of the dataset in format
                              [trainNum, evalNum, testNum, "prepreprocessing"].
        """
        with TfRecordExporter(self.__tfrecord_dir, dataset_name, dataset_split, mode, len(data)) as exporter:
            exporter.writeData(data)

    def __writeTfrecordBatchwise(self, dataset_provider, mode, dataset_name, dataset_split):
        """
        Writes the tfrecord file for the given data batchwise.
        :param dataset_provider: (DatasetProvider) The provider of the batches of the dataset.
        :param mode: (String) The mode of the saved record.
        :param dataset_name: (String) The name of the dataset.
        :param dataset_split: (Array) The splits of the dataset in format [trainNum, evalNum, testNum,
                                "prepreprocessing"].
        """
        num_read_in_batches = dataset_provider.getNumReadInBatches(mode)

        with TfRecordExporter(self.__tfrecord_dir, dataset_name, dataset_split, mode,
                              dataset_provider.getSetSize(mode)) as exporter:
            for i in range(0, dataset_provider.getNumReadInBatches(mode)):
                batch = dataset_provider.getNextReadInBatchInNumpy(mode)
                exporter.writeData(batch, False)

                self.__logger.info("Created tfrecord for batch " + str(i) + "/" + str(num_read_in_batches) + ": " +
                                   str(round(i / num_read_in_batches * 100, 2)) + "%.",
                                   "TfRecordHandler:createTfRecords")

    def __calculateThreadsForEachSplit(self, splits_to_write):
        """
        Calculates the threads per split an distributes the threads in two pairs,
        because one thread is like no thread ;).
        :param splits_to_write: (Integer) The number of splits to write.
        :returns threads_for_splits: (Array) The threads for each split.
        """
        threads_for_splits = np.full((splits_to_write), 0)
        not_used_threads_to_distribute = self.__num_threads - splits_to_write

        distribute_index = 0
        while not_used_threads_to_distribute >= 2:
            threads_for_splits[distribute_index] += 2
            distribute_index += 1
            not_used_threads_to_distribute -= 2
            if distribute_index >= (splits_to_write - 1):
                distribute_index = 0

        return threads_for_splits
コード例 #16
0
ファイル: ExperimentScheduler.py プロジェクト: codeaudit/CSNN
class ExperimentScheduler:
    """
    The ExperimentSchedule schedules the experiments defined in the given schedule.
    If a experiment went wrong, the execution goes on for the next experiment and an execution information is printed
    after each experiment.

    :Attributes:
        __schedule:                (Dictionary) The schedule containing the experiment order.
        __successful_experiments:  (Array) Contains the names of the successful experiments.
        __canceled_experiments:    (Array) Contains the names of the canceled experiments.
        __logger:                  (Logger) The logger for the experiments.
        __finished_experiments:    (Integer) Counts the finished experiments (successful or not).
    """
    def __init__(self, schedule):
        """
        Constructor, initialize member variables.
        :param schedule: (Dictionary) The schedule containing the experiment order.
        """
        self.__schedule = schedule
        self.__successful_experiments = []
        self.__canceled_experiments = []
        self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
        self.__finished_experiments = 0

    def execute(self):
        """
        Executes the experiments defined in the given schedule.
        If a experiment went wrong, the execution goes on for the next experiment and an execution information is printed
        after each experiment.
        """
        if self.__schedule["mode"] == "sequential":
            self.__finished_experiments = 0

            for experiment_name in self.__schedule["experimentsToRun"]:
                self.__finished_experiments = self.__finished_experiments + 1
                try:
                    # Dynamically import the experiment class by name.
                    experiment_module = importlib.import_module(
                        "Experiment_Component.Experiments." + experiment_name)

                    # Dynamically load the provider class by name.
                    # Combined with the above import its like: from Experiment_Component.Experiments.CsnMnistExperiment import CsnMnistExperiment
                    experiment = getattr(experiment_module, experiment_name)

                    for config_name in self.__schedule[
                            "experimentConfigsToRun"][experiment_name]:
                        try:
                            config = ConfigProvider().get_config(
                                "Experiment_Component/ExperimentConfigs/" +
                                config_name)
                            experiment(config).execute()
                            self.__successful_experiments.append(
                                experiment_name)
                            self.__logExecutionInfo()
                        except:
                            self.__logger.error(
                                "Cancled experiment " + experiment_name +
                                " An error accrued:" +
                                str(traceback.format_exc()),
                                "ExperimentScheduler:execute")
                            self.__canceled_experiments.append(experiment_name)
                            self.__logExecutionInfo()

                except:
                    self.__logger.error(
                        "Cancled experiment " + experiment_name +
                        " An error accrued:" + str(traceback.format_exc()),
                        "ExperimentScheduler:execute")
                    self.__canceled_experiments.append(experiment_name)
                    self.__logExecutionInfo()

    def __logExecutionInfo(self):
        """
        Logs the execution information (for example the successful or canceled experiments) after each experiment.
        """
        info_text = "\n************ExperimentScheduler************\n" + "Experiment " + str(
            self.__finished_experiments) + " of " + str(
                len(self.__schedule["experimentsToRun"])
            ) + " finished.\nSuccessful experiments: " + str(
                self.__successful_experiments
            ) + "\nCancled experiments: " + str(
                self.__canceled_experiments
            ) + "\n*******************************************"
        self.__logger.info(info_text, "ExperimentScheduler:__logExecutionInfo")
コード例 #17
0
class Controller:
    """
    The controller is the central point of the framework. It takes care of the execution of various programs like
    the creation of TfRecord datasets or the execution of experiments via a scheduler.

    :Attributes:
        __controller_config_file_path: (String) The path to the config for the controller.
        __config:                      (Dictionary) The config of the controller.
        __logger:                      (Logger) The logger for the controller.
        __experiment_scheduler:        (ExperimentScheduler) The scheduler to handle multiple experiments.
        __tf_record_handler:           (TfRecordHandler) The handler to create the tf records.
        __config_provider:             (ConfigProvider) : The provider to request config input.
        __experiment_scheduler:        (ExperimentScheduler) The scheduler to schedule the experiment execution.
    """
    def __init__(self, controller_config_file_path):
        """
        Constructor, initialize member variables.
        :param controller_config_file_path: (String) String to controllerConfig File.
        """
        print("Controller: Starting __init__() ...")
        self.__controller_config_file_path = controller_config_file_path
        self.__config = None
        self.__logger = None
        self.__tf_record_handler = None
        self.__config_provider = None
        self.__experiment_scheduler = None
        print("Controller: Finished __init__()")

    def init(self):
        """
        Init method, initialize member variables and other program parts.
        :return: successful: (Boolean) Was the execution successful?
        """
        print("Controller: Starting init() ...")
        self.__logger = SLoggerHandler().getLogger(LoggerNames.CONTROLLER_C)
        self.__logger.info("Loading config ...", "Controller:init")
        successful = True

        try:
            self.__config_provider = ConfigProvider()
            self.__config = self.__config_provider.get_config(
                self.__controller_config_file_path)
            self.__logger.info("Finished loading config.", "Controller:init")

            self.__tf_record_handler = TfRecordHandler(
                tfrecord_dir="data",
                dataset_prepreprocessors=self.
                __config["datasetPrePreProcessors"],
                num_threads=self.__config["hardware"]["numCPUCores"])

            self.__logger.info("Finished init()", "Controller:init")
        except:
            successful = False
            self.__logger.error("Canceled init(). An error accrued!",
                                "Controller:init")
            print(traceback.format_exc())

        return successful

    def execute(self):
        """
        Executes the execution specified in the controllers config.
        :return: successful: (Boolean) Was the execution successful??
        """
        self.__logger.info("Starting execute() ...", "Controller:execute")
        successful = True
        if self.__config["executeCreateTfRecordsFromDataset"]:
            try:
                self.__logger.info(
                    "Starting executeCreateTfRecordsFromDataset() ...",
                    "Controller:execute")
                self.__tf_record_handler.createTfRecords(
                    self.__config["datasetsToCreateTfRecords"],
                    self.__config["datasetTfRecordSplits"])
                self.__logger.info(
                    "Finished executeCreateTfRecordsFromDataset()",
                    "Controller:execute")
            except:
                successful = False
                self.__logger.error(
                    "Canceled executeCreateTfRecordsFromDataset(). An error accrued!",
                    "Controller:execute")
                print(traceback.format_exc())

        if self.__config["executeExperiments"]:
            try:
                self.__logger.info("Starting executeExperiments() ...",
                                   "Controller:execute")
                # load schedule
                experiment_schedule = self.__config_provider.get_config(
                    "experimentSchedule.json")
                self.__experiment_scheduler = ExperimentScheduler(
                    experiment_schedule)
                self.__experiment_scheduler.execute()
                self.__logger.info("Finished executeExperiments()",
                                   "Controller:execute")
            except:
                successful = False
                self.__logger.error(
                    "Canceled executeExperiments(). An error accrued!",
                    "Controller:execute")
                print(traceback.format_exc())

        return successful
コード例 #18
0
class TrainPretextModelsExperiment(IExperiment):
    """
    The experiment trains each pretext model for each given dataset and saves logs and checkpoints.
    If xFoldCrossValidation is given this will be repeated for all given cross-validations.

    :Attributes:
        __config:    (Dictionary) The config of the experiment, containing all pretext models parameters. Refer to the config
                      trainPretextModelsExperiment.json for an example.
        __logger:    (Logger) The logger for the experiment.
        __num_gpus:  (Integer) The number of GPUs to use.

    """
    def __init__(self, config):
        """
        Constructor, initialize member variables.
        :param config: (Dictionary) The config of the experiment, containing all pretext models parameters. Refer to the config
                        trainPretextModelsExperiment.json for an example.
        """
        self.__config = config
        self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
        self.__num_gpus = ConfigProvider().get_config(
            "controllerConfig.json")["hardware"]["numGPUs"]

    def execute(self):
        """
        Executes the experiment with the given config.

        The experiment trains each model for each given dataset for the defined training steps.
        If xFoldCrossValidation is set this will be repeated x times.
        """
        for pretext_model_config in self.__config["pretextModelConfigs"]:
            model_name = pretext_model_config["modelName"]
            try:
                for dataset_config in self.__config["datasetConfigs"]:

                    # Only train the model if a batch size for the dataset is given
                    if not dataset_config[
                            "nameOfDataset"] in pretext_model_config[
                                "batchSizes"].keys():
                        continue

                    # and for each xFold iteration
                    for xFold_step in pretext_model_config[
                            "xFoldCrossValidation"]:

                        # If the dataset contains different sizes we want to test, we save them in seperat directories
                        dataset_dir_name = dataset_config["nameOfDataset"]
                        if "trainDatasetSize" in dataset_config.keys():
                            dataset_dir_name = dataset_dir_name + "_" + str(
                                dataset_config["trainDatasetSize"])

                        # Construct the model Name
                        model_dir = "/" + model_name + "/" + dataset_dir_name + "/xFoldCrossVal_" + str(
                            xFold_step)
                        self.__logger.info(
                            "Starting to train: " + model_dir,
                            "TrainPretextModelsExperiment:execute")

                        # Train the model
                        self.__logger.info(
                            "Starting to train: " + model_dir,
                            "TrainPretextModelsExperiment:execute")
                        trainAndValPretextModel(
                            pretext_model_config, model_dir, dataset_config,
                            xFold_step, pretext_model_config["xFoldType"],
                            self.__num_gpus, self.__logger)
                        self.__logger.info(
                            "Finished to train: " + model_dir,
                            "TrainPretextModelsExperiment:execute")

            except:
                print(traceback.format_exc())
コード例 #19
0
class TrainTargetModelsExperiment(IExperiment):
    """
    The experiment trains each target model for each given dataset using the representation of the given
    (unsupervised) pretext models and saves logs and checkpoints.
    It trains one target model for every given checkpoint.
    If xFoldCrossValidationsToLoad is given this will be repeated for all configured cross-validations.

    :Attributes:
        __config:    (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                      trainTargetModelsExperiment.json for an example.
        __logger:    (Logger) The logger for the experiment.
        __num_gpus:  (Integer) The number of GPUs to use.

    """
    def __init__(self, config):
        """
        Constructor, initialize member variables.
        :param config: (Dictionary) The config of the experiment, containing all model parameters. Refer to the config
                        trainModelsExperiment.json as an example.
        """
        self.__config = config
        self.__logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)
        self.__num_gpus = ConfigProvider().get_config(
            "controllerConfig.json")["hardware"]["numGPUs"]

    def execute(self):
        """
        Executes the experiment with the given config.

        The experiment trains each model for each given dataset for the defined training steps on the representation of the given 
        representation model for the desired checkpoints.
        If xFoldCrossValidation is set this will be repeated x times.
        """
        # For each model to train and val
        for target_model_config in self.__config["targetModelConfigs"]:
            target_model_name = target_model_config["modelName"]
            try:
                for dataset_config in self.__config["datasetConfigs"]:
                    for pretext_model_config in self.__config[
                            "pretextModelConfigs"]:
                        for pretext_model_xFoldToLoad in pretext_model_config[
                                "xFoldCrossValidationsToLoad"]:
                            for pretext_model_checkpoint in pretext_model_config[
                                    "loadCheckpoints"]:

                                # Only train the model if a batch size for the representation model is given
                                if not dataset_config[
                                        "nameOfDataset"] in pretext_model_config[
                                            "batchSizes"].keys():
                                    continue

                                if pretext_model_config[
                                        "loadCheckpointEpochMultipliers"][
                                            dataset_config["nameOfDataset"]]:
                                    pretext_model_checkpoint = pretext_model_checkpoint * pretext_model_config[
                                        "loadCheckpointEpochMultipliers"][
                                            dataset_config["nameOfDataset"]]

                                # If the dataset contains multible labels we want to test, we save them in seperat directories
                                dataset_dir_name = dataset_config[
                                    "nameOfDataset"]
                                if "labelName" in dataset_config.keys():
                                    dataset_dir_name = dataset_dir_name + "_" + dataset_config[
                                        "labelName"]

                                # If the dataset contains different sizes we want to test, we save them in seperat directories
                                pretext_dataset_dir_name = dataset_config[
                                    "nameOfDataset"]
                                if "trainDatasetSize" in dataset_config.keys():
                                    dataset_dir_name = dataset_dir_name + "_" + str(
                                        dataset_config["trainDatasetSize"])
                                    pretext_dataset_dir_name = pretext_dataset_dir_name + "_" + str(
                                        dataset_config["trainDatasetSize"])

                                # Construct the model Name
                                target_model_dir = "/" + target_model_name + "/" + dataset_dir_name + "/" + \
                                            pretext_model_config["modelName"] + "/loadedxFoldCrossVal_" +\
                                            str(pretext_model_xFoldToLoad) + "/checkpoint_" + str(pretext_model_checkpoint)

                                # Train the model
                                self.__logger.info(
                                    "Starting to train: " + target_model_dir,
                                    "TrainTargetModelsExperiment:execute")

                                # The pretext model is trained unsupervised in our case,
                                # therefore we can train in once on the dataset at load it for differtent target tasks on this dataset.
                                # This applies for shapes3D in our case.
                                pretext_model_dir = "/" + pretext_model_config["modelName"] + "/" + pretext_dataset_dir_name + \
                                                           "/xFoldCrossVal_" + str(pretext_model_xFoldToLoad)

                                trainAndValTargetModel(
                                    target_model_config, target_model_dir,
                                    dataset_config, pretext_model_xFoldToLoad,
                                    pretext_model_config["xFoldType"],
                                    self.__num_gpus, self.__logger,
                                    pretext_model_dir,
                                    pretext_model_checkpoint,
                                    pretext_model_config)

                                self.__logger.info(
                                    "Finished to train: " + target_model_dir,
                                    "TrainTargetModelsExperiment:execute")

            except:
                print(traceback.format_exc())
コード例 #20
0
class AModelSuit(metaclass=ABCMeta):
    """
    A AModelSuit handles the train/eval/test/inference of a model. Therefore it brings the input, the model and
    the trainer, together in one place. In each AModelSuit functions for the training and validation must be defined.

    The AModelSuit provides basic functionality like session handling and model saving and defines
    interface methods for ModelSuits.

    :Attributes:
        _model:                        ("Model") The model to handle with the ModelSuit
        _logger:                       (Logger) The logger for the ModelSuit.
        _sess:                         (tf.Session) The Tensorflow session for the execution of the graph constructed
                                        by the ModelSuit
        _trainer:                      (ITrainer) The trainer to train the model.
        _batch_size:                   (Integer) The batch size for the model.
        _modelDir:                     (String) The directory of the model (e.g. to save it).
        _log_interval:                 (Integer) Every log_interval steps the ModelSuit writes logs.
        _save_summary_interval:        (Integer) Every save_summary_interval steps the ModelSuit saves
                                       Tensorboard summaries.
        _save_checkpoint_interval:     (Integer) Every _save_checkpoint_interval steps the ModelSuit saves model
                                       (training) checkpoints.
        _saver:                        (tf.train.Saver) The saver to save the model.
        _globalStep:                   (Integer) The current global step of the model.
        _summary_writer:               (TensorboardSummaryWriter) The writer for the Tensorboard summaries.
        _summary_txt_writer:           (TxtSummaryWriter) The writer for the text summaries.
        _txt_function_time_stopper:    (TxtFunctionTimeStopper) The writer and stopper of function times.
    """
    def __init__(self,
                 sess,
                 model,
                 batch_size,
                 trainer,
                 model_dir="/model",
                 save_checkpoint_interval=500,
                 log_interval=100,
                 save_summary_interval=250):
        """
        Constructor, initialize member variables.
        :param sess:  (tf.Session) The Tensorflow session for the execution of the graph constructed by the ModelSuit
        :param model: ("Model") The model to handle with the ModelSuit
        :param batch_size: (Integer) The batch size for the model.
        :param trainer: (ITrainer) The trainer to train the model.
        :param model_dir: (String) The directory of the model (e.g. to save it). "/model" by default.
        :param save_checkpoint_interval: (Integer) Every _save_checkpoint_interval steps the ModelSuit saves model
                                        (training) checkpoints. 500 by default.
        :param log_interval: (Integer) Every log_interval steps the ModelSuit writes logs. 100 by default.
        :param save_summary_interval: (Integer) Every save_summary_interval steps the ModelSuit saves Tensorboard
                                       summaries. 250 by default.
        """
        # Set model and session.
        self._sess = sess
        self._model = model

        # Setting up the Loggers
        self._logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)

        # Hooks for train, eval and predict
        self._trainer = trainer
        self._batch_size = batch_size

        # Dir to save and reload model.
        self._model_dir = os.path.dirname(sys.modules['__main__'].__file__
                                          ) + "/experimentResults" + model_dir

        # Log every log_interval steps
        self._log_interval = log_interval

        # Log summary every save_summary_interval steps
        self._save_summary_interval = save_summary_interval

        # To save model and checkpoints.
        self._save_checkpoint_interval = save_checkpoint_interval
        self._saver = tf.train.Saver(max_to_keep=10)
        self._global_step = 0

        # Restore existing Model if its not half or wrong defined.
        if len(glob.glob(self._model_dir + "/checkpoints/model-*")) >= 1:
            # Restore model weights from previously saved model
            self._saver.restore(
                self._sess,
                tf.train.latest_checkpoint(self._model_dir + '/checkpoints/'))

            checkpoint = tf.train.get_checkpoint_state(self._model_dir +
                                                       '/checkpoints/')

            # Extract from checkpoint filename.
            self._global_step = int(
                os.path.basename(
                    checkpoint.model_checkpoint_path).split('-')[1])

        # Create new Model.
        else:
            # If there is some half or wrong defined model stuff remove it.
            if os.path.exists(self._model_dir):
                shutil.rmtree(self._model_dir)
            os.makedirs(self._model_dir + '/checkpoints/')
            self._sess.run(tf.global_variables_initializer())
            self._saver.save(self._sess,
                             self._model_dir + '/checkpoints/model')

        # To save summary.
        self._summary_writer = TensorboardSummaryWriter(
            self._model_dir, self._sess.graph)
        self._summary_txt_writer = TxtSummaryWriter(self._model_dir)
        self._txt_function_time_stopper = TxtFunctionTimeStopper(
            self._model_dir)

    @abstractmethod
    def doTraining(self, train_steps, eval_interval,
                   only_save_best_checkpoints):
        """
        Interface Method: Trains the model with the trainer and the input of the ModelSuit.
        :param train_steps: (Integer) The steps to train the model.
        :param eval_interval: (Integer) Every eval_interval steps the Model will be evaluated.
        :param only_save_best_checkpoints: (Boolean) If true only the best Model checkpoints on the evaluation set will
                                            be saved.
        """
        pass

    @abstractmethod
    def doValidation(self, mode):
        """
        Interface Method: Validates the model on the subdataset subset defined by the mode.
        :param mode: (String) The subset of the dataset ("train", "eval" or "test).
        """
        pass

    def doDatesetValidation(self):
        """
        Validates the model on the entire dataset.
        """
        self.doValidation("train")
        self.doValidation("eval")
        self.doValidation("test")

    def closeSession(self):
        """
        Closes the Tensorflow session.
        """
        self._sess.close()
        tf.reset_default_graph()

    def calcNumTrainableParams(self):
        """
        Calculates and logs the number of trainable parameters in the model.
        """
        total_parameters = 0
        self._logger.debug("Calculating trainable parameters ...",
                           "AModelSuit:calcNumTrainableParams")
        for variable in tf.trainable_variables():
            shape = variable.get_shape()
            self._logger.debug(
                "For Variable: " + str(variable) + " with Shape: " +
                str(shape) + " with length: " + str(len(shape)),
                "AModelSuit:calcNumTrainableParams")
            variable_parameters = 1
            for dim in shape:
                variable_parameters *= dim.value
                self._logger.debug(
                    "For Dimension: " + str(dim) + " found: " +
                    str(dim.value) + " parameters. Total for shape so far: " +
                    str(variable_parameters),
                    "AModelSuit:calcNumTrainableParams")
                total_parameters += variable_parameters
            self._logger.debug(
                "Total number of trainable parameters in model so far: " +
                str(total_parameters), "AModelSuit:calcNumTrainableParams")
        self._logger.debug(
            "Total number of trainable parameters in model: " +
            str(total_parameters), "AModelSuit:calcNumTrainableParams")
コード例 #21
0
    def __init__(self,
                 sess,
                 model,
                 batch_size,
                 trainer,
                 model_dir="/model",
                 save_checkpoint_interval=500,
                 log_interval=100,
                 save_summary_interval=250):
        """
        Constructor, initialize member variables.
        :param sess:  (tf.Session) The Tensorflow session for the execution of the graph constructed by the ModelSuit
        :param model: ("Model") The model to handle with the ModelSuit
        :param batch_size: (Integer) The batch size for the model.
        :param trainer: (ITrainer) The trainer to train the model.
        :param model_dir: (String) The directory of the model (e.g. to save it). "/model" by default.
        :param save_checkpoint_interval: (Integer) Every _save_checkpoint_interval steps the ModelSuit saves model
                                        (training) checkpoints. 500 by default.
        :param log_interval: (Integer) Every log_interval steps the ModelSuit writes logs. 100 by default.
        :param save_summary_interval: (Integer) Every save_summary_interval steps the ModelSuit saves Tensorboard
                                       summaries. 250 by default.
        """
        # Set model and session.
        self._sess = sess
        self._model = model

        # Setting up the Loggers
        self._logger = SLoggerHandler().getLogger(LoggerNames.EXPERIMENT_C)

        # Hooks for train, eval and predict
        self._trainer = trainer
        self._batch_size = batch_size

        # Dir to save and reload model.
        self._model_dir = os.path.dirname(sys.modules['__main__'].__file__
                                          ) + "/experimentResults" + model_dir

        # Log every log_interval steps
        self._log_interval = log_interval

        # Log summary every save_summary_interval steps
        self._save_summary_interval = save_summary_interval

        # To save model and checkpoints.
        self._save_checkpoint_interval = save_checkpoint_interval
        self._saver = tf.train.Saver(max_to_keep=10)
        self._global_step = 0

        # Restore existing Model if its not half or wrong defined.
        if len(glob.glob(self._model_dir + "/checkpoints/model-*")) >= 1:
            # Restore model weights from previously saved model
            self._saver.restore(
                self._sess,
                tf.train.latest_checkpoint(self._model_dir + '/checkpoints/'))

            checkpoint = tf.train.get_checkpoint_state(self._model_dir +
                                                       '/checkpoints/')

            # Extract from checkpoint filename.
            self._global_step = int(
                os.path.basename(
                    checkpoint.model_checkpoint_path).split('-')[1])

        # Create new Model.
        else:
            # If there is some half or wrong defined model stuff remove it.
            if os.path.exists(self._model_dir):
                shutil.rmtree(self._model_dir)
            os.makedirs(self._model_dir + '/checkpoints/')
            self._sess.run(tf.global_variables_initializer())
            self._saver.save(self._sess,
                             self._model_dir + '/checkpoints/model')

        # To save summary.
        self._summary_writer = TensorboardSummaryWriter(
            self._model_dir, self._sess.graph)
        self._summary_txt_writer = TxtSummaryWriter(self._model_dir)
        self._txt_function_time_stopper = TxtFunctionTimeStopper(
            self._model_dir)