예제 #1
0
    def initialize_statistics_collection(self):
        """
        - Initializes all ``StatisticsCollectors`` and ``StatisticsAggregators`` used by a given worker: \

            - For training statistics (adds the statistics of the model & task),
            - For validation statistics (adds the statistics of the model & task).

        - Creates the output files (csv).

        """
        # TRAINING.
        # Create statistics collector for training.
        self.training_stat_col = StatisticsCollector()
        self.add_statistics(self.training_stat_col)
        self.training.task.add_statistics(self.training_stat_col)
        self.pipeline.add_statistics(self.training_stat_col)
        # Create the csv file to store the training statistics.
        self.training_batch_stats_file = self.training_stat_col.initialize_csv_file(
            self.app_state.log_dir, 'training_statistics.csv')

        # Create statistics aggregator for training.
        self.training_stat_agg = StatisticsAggregator()
        self.add_aggregators(self.training_stat_agg)
        self.training.task.add_aggregators(self.training_stat_agg)
        self.pipeline.add_aggregators(self.training_stat_agg)
        # Create the csv file to store the training statistic aggregations.
        self.training_set_stats_file = self.training_stat_agg.initialize_csv_file(
            self.app_state.log_dir, 'training_set_agg_statistics.csv')

        # VALIDATION.
        # Create statistics collector for validation.
        self.validation_stat_col = StatisticsCollector()
        self.add_statistics(self.validation_stat_col)
        self.validation.task.add_statistics(self.validation_stat_col)
        self.pipeline.add_statistics(self.validation_stat_col)
        # Create the csv file to store the validation statistics.
        self.validation_batch_stats_file = self.validation_stat_col.initialize_csv_file(
            self.app_state.log_dir, 'validation_statistics.csv')

        # Create statistics aggregator for validation.
        self.validation_stat_agg = StatisticsAggregator()
        self.add_aggregators(self.validation_stat_agg)
        self.validation.task.add_aggregators(self.validation_stat_agg)
        self.pipeline.add_aggregators(self.validation_stat_agg)
        # Create the csv file to store the validation statistic aggregations.
        self.validation_set_stats_file = self.validation_stat_agg.initialize_csv_file(
            self.app_state.log_dir, 'validation_set_agg_statistics.csv')
예제 #2
0
    def test_aggregator_string(self):
        """ Tests whether the collector is aggregating and producing the right string. """

        stat_col = StatisticsCollector()
        stat_agg = StatisticsAggregator()

        # Add default statistics with formatting.
        stat_col.add_statistics('loss', '{:12.10f}')
        stat_col.add_statistics('episode', '{:06d}')
        stat_col.add_statistics('batch_size', None)

        # create some random values
        loss_values = random.sample(range(100), 100)
        # "Collect" basic statistics.
        for episode, loss in enumerate(loss_values):
            stat_col['episode'] = episode
            stat_col['loss'] = loss
            stat_col['batch_size'] = 1
            # print(stat_col.export_statistics_to_string())

        # Empty before aggregation.
        self.assertEqual(stat_agg.export_to_string(), " ")

        # Number of aggregated episodes.
        stat_agg.add_aggregator('acc_mean', '{:2.5f}')
        collected_loss_values  = stat_col['loss']
        batch_sizes = stat_col['batch_size']
        stat_agg['acc_mean'] = np.mean(collected_loss_values) / np.sum(batch_sizes)

        # Aggregated result.
        self.assertEqual(stat_agg.export_to_string('[Epoch 1]'), "acc_mean 0.49500 [Epoch 1]")


#if __name__ == "__main__":
#    unittest.main()
예제 #3
0
    def initialize_statistics_collection(self):
        """
        Function initializes all statistics collectors and aggregators used by a given worker,
        creates output files etc.
        """
        # Create statistics collector.
        self.stat_col = StatisticsCollector()
        self.add_statistics(self.stat_col)
        self.pm.task.add_statistics(self.stat_col)
        self.pipeline.add_statistics(self.stat_col)
        # Create the csv file to store the statistics.
        self.pm_batch_stats_file = self.stat_col.initialize_csv_file(
            self.app_state.log_dir, self.tsn + '_statistics.csv')

        # Create statistics aggregator.
        self.stat_agg = StatisticsAggregator()
        self.add_aggregators(self.stat_agg)
        self.pm.task.add_aggregators(self.stat_agg)
        self.pipeline.add_aggregators(self.stat_agg)
        # Create the csv file to store the statistic aggregations.
        # Will contain a single row with aggregated statistics.
        self.pm_set_stats_file = self.stat_agg.initialize_csv_file(
            self.app_state.log_dir, self.tsn + '_set_agg_statistics.csv')
예제 #4
0
class Processor(Worker):
    """
    Defines the basic ``Processor``.

    If defining another type of Processor, it should subclass it.

    """
    def __init__(self):
        """
        Calls the ``Worker`` constructor, adds some additional arguments to parser.
        """
        # Call base constructor to set up app state, registry and add default params.
        super(Processor, self).__init__("Processor", Processor)

        self.parser.add_argument(
            '--section',
            dest='section_name',
            type=str,
            default="test",
            help=
            'Name of the section defining the specific set to be processed (DEFAULT: test)'
        )

    def setup_global_experiment(self):
        """
        Sets up the global test experiment for the ``Processor``:

            - Checks that the model to use exists

            - Checks that the configuration file exists

            - Creates the configuration

        The rest of the experiment setup is done in :py:func:`setup_individual_experiment()` \
        to allow for multiple tests suppport.

        """
        # Call base method to parse all command line arguments and add default sections.
        super(Processor, self).setup_experiment()

        # "Pass" configuration parameters from the default_test section to section indicated by the section_name.
        self.config.add_default_params({
            self.app_state.args.section_name:
            self.config['default_test'].to_dict()
        })
        self.config.del_default_params('default_test')

        # Retrieve checkpoint file.
        chkpt_file = self.app_state.args.load_checkpoint

        # Check the presence of the CUDA-compatible devices.
        if self.app_state.args.use_gpu and (torch.cuda.device_count() == 0):
            self.logger.error(
                "Cannot use GPU as there are no CUDA-compatible devices present in the system!"
            )
            exit(-1)

        # Config that will be used.
        abs_root_configs = None

        # Check if checkpoint file was indicated.
        if chkpt_file != "":
            #print('Please pass path to and name of the file containing pipeline to be loaded as --load parameter')
            #exit(-2)

            # Check if file with model exists.
            if not path.isfile(chkpt_file):
                print('Checkpoint file {} does not exist'.format(chkpt_file))
                exit(-3)

            # Extract path.
            self.abs_path, _ = path.split(
                path.dirname(path.expanduser(chkpt_file)))

            # Use the "default" config.
            abs_root_configs = [
                path.join(self.abs_path, 'training_configuration.yml')
            ]

        # Check if config file was indicated by the user.
        if self.app_state.args.config != '':
            # Split and make them absolute.
            root_configs = self.app_state.args.config.replace(" ",
                                                              "").split(',')
            # If there are - expand them to absolute paths.
            abs_root_configs = [
                path.expanduser(config) for config in root_configs
            ]

            # Using name of the first configuration file from command line.
            basename = path.basename(root_configs[0])
            # Take config filename without extension.
            pipeline_name = path.splitext(basename)[0]

            # Use path to experiments + pipeline.
            self.abs_path = path.join(
                path.expanduser(self.app_state.args.expdir), pipeline_name)

        if abs_root_configs is None:
            print(
                'Please indicate configuration file to be used (--config) and/or pass path to and name of the file containing pipeline to be loaded (--load)'
            )
            exit(-2)

        # Get the list of configurations which need to be loaded.
        configs_to_load = config_parsing.recurrent_config_parse(
            abs_root_configs, [], self.app_state.absolute_config_path)

        # Read the YAML files one by one - but in reverse order -> overwrite the first indicated config(s)
        config_parsing.reverse_order_config_load(self.config, configs_to_load)

        # -> At this point, the Config Registry contains the configuration loaded (and overwritten) from several files.

    def setup_individual_experiment(self):
        """
        Setup individual test experiment in the case of multiple tests, or the main experiment in the case of \
        one test experiment.

        - Set up the log directory path

        - Set random seeds

        - Creates the pipeline consisting of many components

        - Creates testing task manager

        - Performs testing of compatibility of testing pipeline

        """

        # Get test section.
        try:
            self.tsn = self.app_state.args.section_name
            self.config_test = self.config[self.tsn]
            if self.config_test is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the section '{}' from the loaded configuration"
                .format(self.tsn))
            exit(-1)

        # Get testing task type.
        try:
            _ = self.config_test['task']['type']
        except KeyError:
            print(
                "Error: Couldn't retrieve the task 'type' from the '{}' section in the loaded configuration"
                .format(self.tsn))
            exit(-5)

        # Get pipeline section.
        try:
            psn = self.app_state.args.pipeline_section_name
            self.config_pipeline = self.config[psn]
            if self.config_pipeline is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the pipeline section '{}' from the loaded configuration"
                .format(psn))
            exit(-1)

        # Get pipeline name.
        try:
            pipeline_name = self.config_pipeline['name']
        except KeyError:
            print(
                "Error: Couldn't retrieve the pipeline 'name' from the loaded configuration"
            )
            exit(-6)

        # Prepare output paths for logging
        while True:
            # Dirty fix: if log_dir already exists, wait for 1 second and try again
            try:
                time_str = self.tsn + '_{0:%Y%m%d_%H%M%S}'.format(
                    datetime.now())
                if self.app_state.args.exptag != '':
                    time_str = time_str + "_" + self.app_state.args.exptag
                self.app_state.log_dir = self.abs_path + '/' + time_str + '/'
                # Lowercase dir.
                self.app_state.log_dir = self.app_state.log_dir.lower()
                makedirs(self.app_state.log_dir, exist_ok=False)
            except FileExistsError:
                sleep(1)
            else:
                break

        # Set log dir.
        self.app_state.log_file = self.app_state.log_dir + 'processor.log'
        # Initialize logger in app state.
        self.app_state.logger = logging.initialize_logger("AppState")
        # Add handlers for the logfile to worker logger.
        logging.add_file_handler_to_logger(self.logger)
        self.logger.info("Logger directory set to: {}".format(
            self.app_state.log_dir))

        # Set cpu/gpu types.
        self.app_state.set_types()

        # Set random seeds in the testing section.
        self.set_random_seeds(self.tsn, self.config_test)

        # Total number of detected errors.
        errors = 0

        ################# TESTING PROBLEM #################

        # Build the used task manager.
        self.pm = TaskManager(self.tsn, self.config_test)
        errors += self.pm.build()

        # check if the maximum number of episodes is specified, if not put a
        # default equal to the size of the dataset (divided by the batch size)
        # So that by default, we loop over the test set once.
        task_size_in_episodes = len(self.pm)

        if self.config_test["terminal_conditions"]["episode_limit"] == -1:
            # Overwrite the config value!
            self.config_test['terminal_conditions'].add_config_params(
                {'episode_limit': task_size_in_episodes})

        # Warn if indicated number of episodes is larger than an epoch size:
        if self.config_test["terminal_conditions"][
                "episode_limit"] > task_size_in_episodes:
            self.logger.warning(
                'Indicated limit of number of episodes is larger than one epoch, reducing it.'
            )
            # Overwrite the config value!
            self.config_test['terminal_conditions'].add_config_params(
                {'episode_limit': task_size_in_episodes})

        self.logger.info("Limiting the number of episodes to: {}".format(
            self.config_test["terminal_conditions"]["episode_limit"]))

        ###################### PIPELINE ######################

        # Build the pipeline using the loaded configuration and global variables.
        self.pipeline = PipelineManager(pipeline_name, self.config_pipeline)
        errors += self.pipeline.build()

        # Show pipeline.
        summary_str = self.pipeline.summarize_all_components_header()
        summary_str += self.pm.task.summarize_io(self.tsn)
        summary_str += self.pipeline.summarize_all_components()
        self.logger.info(summary_str)

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-7)

        # Handshake definitions.
        self.logger.info("Handshaking testing pipeline")
        defs_testing = self.pm.task.output_data_definitions()
        errors += self.pipeline.handshake(defs_testing)

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-2)

        # Check if there are any models in the pipeline.
        if len(self.pipeline.models) == 0:
            self.logger.error(
                'Cannot proceed with training, as there are no trainable models in the pipeline'
            )
            exit(-3)

        # Load the pretrained models params from checkpoint.
        try:
            # Check command line arguments, then check load option in config.
            if self.app_state.args.load_checkpoint != "":
                pipeline_name = self.app_state.args.load_checkpoint
                msg = "command line (--load)"
            elif "load" in self.config_pipeline:
                pipeline_name = self.config_pipeline['load']
                msg = "'pipeline' section of the configuration file"
            else:
                pipeline_name = ""
            # Try to load the the whole pipeline.
            if pipeline_name != "":
                if path.isfile(pipeline_name):
                    # Load parameters from checkpoint.
                    self.pipeline.load(pipeline_name)
                else:
                    raise Exception(
                        "Couldn't load the checkpoint {} indicated in the {}: file does not exist"
                        .format(pipeline_name, msg))
                # If we succeeded, we do not want to load the models from the file anymore!
            else:
                # Try to load the models parameters - one by one, if set so in the configuration file.
                self.pipeline.load_models()

        except KeyError:
            self.logger.error(
                "File {} indicated in the {} seems not to be a valid model checkpoint"
                .format(pipeline_name, msg))
            exit(-5)
        except Exception as e:
            self.logger.error(e)
            # Exit by following the logic: if user wanted to load the model but failed, then continuing the experiment makes no sense.
            exit(-6)

        # Log the model summaries.
        summary_str = self.pipeline.summarize_models_header()
        summary_str += self.pipeline.summarize_models()
        self.logger.info(summary_str)

        # Move the models in the pipeline to GPU.
        if self.app_state.args.use_gpu:
            self.pipeline.cuda()

        # Turn on evaluation mode.
        self.pipeline.eval()

        # Export and log configuration, optionally asking the user for confirmation.
        config_parsing.display_parsing_results(self.logger,
                                               self.app_state.args,
                                               self.unparsed)
        config_parsing.display_globals(self.logger,
                                       self.app_state.globalitems())
        config_parsing.export_experiment_configuration_to_yml(
            self.logger, self.app_state.log_dir, "training_configuration.yml",
            self.config, self.app_state.args.confirm)

    def initialize_statistics_collection(self):
        """
        Function initializes all statistics collectors and aggregators used by a given worker,
        creates output files etc.
        """
        # Create statistics collector.
        self.stat_col = StatisticsCollector()
        self.add_statistics(self.stat_col)
        self.pm.task.add_statistics(self.stat_col)
        self.pipeline.add_statistics(self.stat_col)
        # Create the csv file to store the statistics.
        self.pm_batch_stats_file = self.stat_col.initialize_csv_file(
            self.app_state.log_dir, self.tsn + '_statistics.csv')

        # Create statistics aggregator.
        self.stat_agg = StatisticsAggregator()
        self.add_aggregators(self.stat_agg)
        self.pm.task.add_aggregators(self.stat_agg)
        self.pipeline.add_aggregators(self.stat_agg)
        # Create the csv file to store the statistic aggregations.
        # Will contain a single row with aggregated statistics.
        self.pm_set_stats_file = self.stat_agg.initialize_csv_file(
            self.app_state.log_dir, self.tsn + '_set_agg_statistics.csv')

    def finalize_statistics_collection(self):
        """
        Finalizes statistics collection, closes all files etc.
        """
        # Close all files.
        self.pm_batch_stats_file.close()
        self.pm_set_stats_file.close()

    def run_experiment(self):
        """
        Main function of the ``Processor``: Test the loaded model over the set.

        Iterates over the ``DataLoader`` for a maximum number of episodes equal to the set size.

        The function does the following for each episode:

            - Forwards pass of the model,
            - Logs statistics & accumulates loss,
            - Activate visualization if set.

        """
        # Initialize tensorboard and statistics collection.
        self.initialize_statistics_collection()

        num_samples = len(self.pm)

        self.logger.info(
            'Processing the entire set ({} samples in {} episodes)'.format(
                num_samples, len(self.pm.dataloader)))

        try:
            # Run in no_grad mode.
            with torch.no_grad():
                # Reset the counter.
                self.app_state.episode = -1

                # Inform the task manager that epoch has started.
                self.pm.initialize_epoch()

                for batch in self.pm.dataloader:
                    # Increment counter.
                    self.app_state.episode += 1
                    # Terminal condition 0: max test episodes reached.
                    if self.app_state.episode == self.config_test[
                            "terminal_conditions"]["episode_limit"]:
                        break

                    # Forward pass.
                    self.pipeline.forward(batch)
                    # Collect the statistics.
                    self.collect_all_statistics(self.pm, self.pipeline, batch,
                                                self.stat_col)

                    # Export to csv - at every step.
                    self.stat_col.export_to_csv()

                    # Log to logger - at logging frequency.
                    if self.app_state.episode % self.app_state.args.logging_interval == 0:
                        self.logger.info(
                            self.stat_col.export_to_string('[Partial]'))

                    # move to next episode.
                    self.app_state.episode += 1

                # End for.
                # Inform the task managers that the epoch has ended.
                self.pm.finalize_epoch()

                self.logger.info('\n' + '=' * 80)
                self.logger.info('Processing finished')

                # Aggregate statistics for the whole set.
                self.aggregate_all_statistics(self.pm, self.pipeline,
                                              self.stat_col, self.stat_agg)

                # Export aggregated statistics.
                self.export_all_statistics(self.stat_agg, '[Full Set]')

        except SystemExit as e:
            # the training did not end properly
            self.logger.error('Experiment interrupted because {}'.format(e))
        except KeyboardInterrupt:
            # the training did not end properly
            self.logger.error('Experiment interrupted!')
        finally:
            # Finalize statistics collection.
            self.finalize_statistics_collection()
            self.logger.info("Experiment logged to: {}".format(
                self.app_state.log_dir))
예제 #5
0
class Trainer(Worker):
    """
    Base class for the trainers.

    Iterates over epochs on the dataset.

    All other types of trainers (e.g. ``OnlineTrainer`` & ``OfflineTrainer``) should subclass it.

    """
    def __init__(self, name, class_type):
        """
        Base constructor for all trainers:

            - Adds default trainer command line arguments

        :param name: Name of the worker
        :type name: str

        :param class_type: Class type of the component.

        """
        # Call base constructor to set up app state, registry and add default arguments.
        super(Trainer, self).__init__(name, class_type)

        # Add arguments to the specific parser.
        # These arguments will be shared by all basic trainers.
        self.parser.add_argument(
            '--tensorboard',
            action='store',
            dest='tensorboard',
            choices=[0, 1, 2],
            type=int,
            help=
            "If present, enable logging to TensorBoard. Available log levels:\n"
            "0: Log the collected statistics.\n"
            "1: Add the histograms of the model's biases & weights (Warning: Slow).\n"
            "2: Add the histograms of the model's biases & weights gradients "
            "(Warning: Even slower).")

        self.parser.add_argument(
            '--saveall',
            dest='save_intermediate',
            action='store_true',
            help=
            'Setting to true results in saving intermediate models during training (DEFAULT: False)'
        )

        self.parser.add_argument(
            '--training',
            dest='training_section_name',
            type=str,
            default="training",
            help=
            'Name of the section defining the training procedure (DEFAULT: training)'
        )

        self.parser.add_argument(
            '--validation',
            dest='validation_section_name',
            type=str,
            default="validation",
            help=
            'Name of the section defining the validation procedure (DEFAULT: validation)'
        )

    def setup_experiment(self):
        """
        Sets up experiment of all trainers:

        - Calls base class setup_experiment to parse the command line arguments,

        - Loads the config file(s)

        - Set up the log directory path

        - Add a ``FileHandler`` to the logger

        - Set random seeds

        - Creates the pipeline consisting of many components

        - Creates training task manager

        - Handles curriculum learning if indicated

        - Creates validation task manager

        - Set optimizer

        - Performs testing of compatibility of both training and validation tasks and created pipeline.

        """
        # Call base method to parse all command line arguments and add default sections.
        super(Trainer, self).setup_experiment()

        # "Pass" configuration parameters from the "default_training" section to training section indicated by the section_name.
        self.config.add_default_params({
            self.app_state.args.training_section_name:
            self.config['default_training'].to_dict()
        })
        self.config.del_default_params('default_training')

        # "Pass" configuration parameters from the "default_validation" section to validation section indicated by the section_name.
        self.config.add_default_params({
            self.app_state.args.validation_section_name:
            self.config['default_validation'].to_dict()
        })
        self.config.del_default_params('default_validation')

        # Check the presence of the CUDA-compatible devices.
        if self.app_state.args.use_gpu and (torch.cuda.device_count() == 0):
            self.logger.error(
                "Cannot use GPU as there are no CUDA-compatible devices present in the system!"
            )
            exit(-1)

        # Check if config file was selected.
        if self.app_state.args.config == '':
            print('Please pass configuration file(s) as --c parameter')
            exit(-2)

        # Split and make them absolute.
        root_configs = self.app_state.args.config.replace(" ", "").split(',')
        # If there are - expand them to absolute paths.
        abs_root_configs = [path.expanduser(config) for config in root_configs]

        # Get the list of configurations which need to be loaded.
        configs_to_load = config_parse.recurrent_config_parse(
            abs_root_configs, [], self.app_state.absolute_config_path)

        # Read the YAML files one by one - but in reverse order -> overwrite the first indicated config(s)
        config_parse.reverse_order_config_load(self.config, configs_to_load)

        # -> At this point, the Param Registry contains the configuration loaded (and overwritten) from several files.
        # Log the resulting training configuration.
        conf_str = 'Loaded (initial) configuration:\n'
        conf_str += '=' * 80 + '\n'
        conf_str += yaml.safe_dump(self.config.to_dict(),
                                   default_flow_style=False)
        conf_str += '=' * 80 + '\n'
        print(conf_str)

        # Get training section.
        try:
            tsn = self.app_state.args.training_section_name
            self.config_training = self.config[tsn]
            # We must additionally check if it is None - weird behvaiour when using default value.
            if self.config_training is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the training section '{}' from the loaded configuration"
                .format(tsn))
            exit(-1)

        # Get training task type.
        try:
            training_task_type = self.config_training['task']['type']
        except KeyError:
            print(
                "Error: Couldn't retrieve the task 'type' from the training section '{}' in the loaded configuration"
                .format(tsn))
            exit(-1)

        # Get validation section.
        try:
            vsn = self.app_state.args.validation_section_name
            self.config_validation = self.config[vsn]
            if self.config_validation is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the validation section '{}' from the loaded configuration"
                .format(vsn))
            exit(-1)

        # Get validation task type.
        try:
            _ = self.config_validation['task']['type']
        except KeyError:
            print(
                "Error: Couldn't retrieve the task 'type' from the validation section '{}' in the loaded configuration"
                .format(vsn))
            exit(-1)

        # Get pipeline section.
        try:
            psn = self.app_state.args.pipeline_section_name
            self.config_pipeline = self.config[psn]
            if self.config_pipeline is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the pipeline section '{}' from the loaded configuration"
                .format(psn))
            exit(-1)

        # Get pipeline name.
        try:
            pipeline_name = self.config_pipeline['name']
        except KeyError:
            # Using name of the first configuration file from command line.
            basename = path.basename(root_configs[0])
            # Take config filename without extension.
            pipeline_name = path.splitext(basename)[0]
            # Set pipeline name, so processor can use it afterwards.
            self.config_pipeline.add_config_params({'name': pipeline_name})

        # Prepare the output path for logging
        while True:  # Dirty fix: if log_dir already exists, wait for 1 second and try again
            try:
                time_str = '{0:%Y%m%d_%H%M%S}'.format(datetime.now())
                if self.app_state.args.exptag != '':
                    time_str = time_str + "_" + self.app_state.args.exptag
                self.app_state.log_dir = path.expanduser(
                    self.app_state.args.expdir
                ) + '/' + training_task_type + '/' + pipeline_name + '/' + time_str + '/'
                # Lowercase dir.
                self.app_state.log_dir = self.app_state.log_dir.lower()
                makedirs(self.app_state.log_dir, exist_ok=False)
            except FileExistsError:
                sleep(1)
            else:
                break

        # Set log dir.
        self.app_state.log_file = self.app_state.log_dir + 'trainer.log'
        # Initialize logger in app state.
        self.app_state.logger = logging.initialize_logger("AppState")
        # Add handlers for the logfile to worker logger.
        logging.add_file_handler_to_logger(self.logger)
        self.logger.info("Logger directory set to: {}".format(
            self.app_state.log_dir))

        # Set cpu/gpu types.
        self.app_state.set_types()

        # Models dir.
        self.checkpoint_dir = self.app_state.log_dir + 'checkpoints/'
        makedirs(self.checkpoint_dir, exist_ok=False)

        # Set random seeds in the training section.
        self.set_random_seeds('training', self.config_training)

        # Total number of detected errors.
        errors = 0

        ################# TRAINING PROBLEM #################

        # Build training task manager.
        self.training = TaskManager('training', self.config_training)
        errors += self.training.build()

        # parse the curriculum learning section in the loaded configuration.
        if 'curriculum_learning' in self.config_training:

            # Initialize curriculum learning - with values from loaded configuration.
            self.training.task.curriculum_learning_initialize(
                self.config_training['curriculum_learning'])

            # If the 'must_finish' key is not present in config then then it will be finished by default
            self.config_training['curriculum_learning'].add_default_params(
                {'must_finish': True})

            self.must_finish_curriculum = self.config_training[
                'curriculum_learning']['must_finish']
            self.logger.info("Curriculum Learning activated")

        else:
            # If not using curriculum learning then it does not have to be finished.
            self.must_finish_curriculum = False
            self.curric_done = True

        ################# VALIDATION PROBLEM #################

        # Build validation task manager.
        self.validation = TaskManager('validation', self.config_validation)
        errors += self.validation.build()

        ###################### PIPELINE ######################

        # Build the pipeline using the loaded configuration.
        self.pipeline = PipelineManager(pipeline_name, self.config_pipeline)
        errors += self.pipeline.build()

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-2)

        # Show pipeline.
        summary_str = self.pipeline.summarize_all_components_header()
        summary_str += self.training.task.summarize_io("training")
        summary_str += self.validation.task.summarize_io("validation")
        summary_str += self.pipeline.summarize_all_components()
        self.logger.info(summary_str)

        # Handshake definitions.
        self.logger.info("Handshaking training pipeline")
        defs_training = self.training.task.output_data_definitions()
        errors += self.pipeline.handshake(defs_training)

        self.logger.info("Handshaking validation pipeline")
        defs_valid = self.validation.task.output_data_definitions()
        errors += self.pipeline.handshake(defs_valid)

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-2)

        ################## MODEL LOAD/FREEZE #################

        # Load the pretrained models params from checkpoint.
        try:
            # Check command line arguments, then check load option in config.
            if self.app_state.args.load_checkpoint != "":
                pipeline_name = self.app_state.args.load_checkpoint
                msg = "command line (--load)"
            elif "load" in self.config_pipeline:
                pipeline_name = self.config_pipeline['load']
                msg = "'pipeline' section of the configuration file"
            else:
                pipeline_name = ""
            # Try to load the model.
            if pipeline_name != "":
                if path.isfile(pipeline_name):
                    # Load parameters from checkpoint.
                    self.pipeline.load(pipeline_name)
                else:
                    raise Exception(
                        "Couldn't load the checkpoint {} indicated in the {}: file does not exist"
                        .format(pipeline_name, msg))
                # If we succeeded, we do not want to load the models from the file anymore!
            else:
                # Try to load the models parameters - one by one, if set so in the configuration file.
                self.pipeline.load_models()

        except KeyError:
            self.logger.error(
                "File {} indicated in the {} seems not to be a valid model checkpoint"
                .format(pipeline_name, msg))
            exit(-5)
        except Exception as e:
            self.logger.error(e)
            # Exit by following the logic: if user wanted to load the model but failed, then continuing the experiment makes no sense.
            exit(-6)

        # Finally, freeze the models (that the user wants to freeze).
        self.pipeline.freeze_models()

        # Log the model summaries.
        summary_str = self.pipeline.summarize_models_header()
        summary_str += self.pipeline.summarize_models()
        self.logger.info(summary_str)

        # Move the models in the pipeline to GPU.
        if self.app_state.args.use_gpu:
            self.pipeline.cuda()

        ################# OPTIMIZER #################

        # Set the optimizer.
        optimizer_conf = dict(self.config_training['optimizer'])
        optimizer_type = optimizer_conf['type']
        del optimizer_conf['type']

        # Check if there are any models in the pipeline.
        if len(
                list(
                    filter(lambda p: p.requires_grad,
                           self.pipeline.parameters()))) == 0:
            self.logger.error(
                'Cannot proceed with training, as there are no trainable models in the pipeline (or all models are frozen)'
            )
            exit(-7)

        # Instantiate the optimizer and filter the model parameters based on if they require gradients.
        self.optimizer = getattr(torch.optim, optimizer_type)(filter(
            lambda p: p.requires_grad, self.pipeline.parameters()),
                                                              **optimizer_conf)

        log_str = 'Optimizer:\n' + '=' * 80 + "\n"
        log_str += "  Type: " + optimizer_type + "\n"
        log_str += "  Params: {}".format(optimizer_conf)

        self.logger.info(log_str)

    def add_statistics(self, stat_col):
        """
        Calls base method and adds epoch statistics to ``StatisticsCollector``.

        :param stat_col: ``StatisticsCollector``.

        """
        # Add loss and episode.
        super(Trainer, self).add_statistics(stat_col)

        # Add default statistics with formatting.
        stat_col.add_statistics('epoch', '{:02d}')

    def add_aggregators(self, stat_agg):
        """
        Adds basic aggregators to to ``StatisticsAggregator`` and extends them with: epoch.

        :param stat_agg: ``StatisticsAggregator``.

        """
        # Add basic aggregators.
        super(Trainer, self).add_aggregators(stat_agg)

        # add 'aggregators' for the epoch.
        stat_agg.add_aggregator('epoch', '{:02d}')

    def initialize_statistics_collection(self):
        """
        - Initializes all ``StatisticsCollectors`` and ``StatisticsAggregators`` used by a given worker: \

            - For training statistics (adds the statistics of the model & task),
            - For validation statistics (adds the statistics of the model & task).

        - Creates the output files (csv).

        """
        # TRAINING.
        # Create statistics collector for training.
        self.training_stat_col = StatisticsCollector()
        self.add_statistics(self.training_stat_col)
        self.training.task.add_statistics(self.training_stat_col)
        self.pipeline.add_statistics(self.training_stat_col)
        # Create the csv file to store the training statistics.
        self.training_batch_stats_file = self.training_stat_col.initialize_csv_file(
            self.app_state.log_dir, 'training_statistics.csv')

        # Create statistics aggregator for training.
        self.training_stat_agg = StatisticsAggregator()
        self.add_aggregators(self.training_stat_agg)
        self.training.task.add_aggregators(self.training_stat_agg)
        self.pipeline.add_aggregators(self.training_stat_agg)
        # Create the csv file to store the training statistic aggregations.
        self.training_set_stats_file = self.training_stat_agg.initialize_csv_file(
            self.app_state.log_dir, 'training_set_agg_statistics.csv')

        # VALIDATION.
        # Create statistics collector for validation.
        self.validation_stat_col = StatisticsCollector()
        self.add_statistics(self.validation_stat_col)
        self.validation.task.add_statistics(self.validation_stat_col)
        self.pipeline.add_statistics(self.validation_stat_col)
        # Create the csv file to store the validation statistics.
        self.validation_batch_stats_file = self.validation_stat_col.initialize_csv_file(
            self.app_state.log_dir, 'validation_statistics.csv')

        # Create statistics aggregator for validation.
        self.validation_stat_agg = StatisticsAggregator()
        self.add_aggregators(self.validation_stat_agg)
        self.validation.task.add_aggregators(self.validation_stat_agg)
        self.pipeline.add_aggregators(self.validation_stat_agg)
        # Create the csv file to store the validation statistic aggregations.
        self.validation_set_stats_file = self.validation_stat_agg.initialize_csv_file(
            self.app_state.log_dir, 'validation_set_agg_statistics.csv')

    def finalize_statistics_collection(self):
        """
        Finalizes the statistics collection by closing the csv files.

        """
        # Close all files.
        self.training_batch_stats_file.close()
        self.training_set_stats_file.close()
        self.validation_batch_stats_file.close()
        self.validation_set_stats_file.close()

    def initialize_tensorboard(self):
        """
        Initializes the TensorBoard writers, and log directories.

        """
        # Create TensorBoard outputs - if TensorBoard is supposed to be used.
        if self.app_state.args.tensorboard is not None:
            from tensorboardX import SummaryWriter
            self.training_batch_writer = SummaryWriter(self.app_state.log_dir +
                                                       '/training')
            self.training_stat_col.initialize_tensorboard(
                self.training_batch_writer)

            self.training_set_writer = SummaryWriter(self.app_state.log_dir +
                                                     '/training_set_agg')
            self.training_stat_agg.initialize_tensorboard(
                self.training_set_writer)

            self.validation_batch_writer = SummaryWriter(
                self.app_state.log_dir + '/validation')
            self.validation_stat_col.initialize_tensorboard(
                self.validation_batch_writer)

            self.validation_set_writer = SummaryWriter(self.app_state.log_dir +
                                                       '/validation_set_agg')
            self.validation_stat_agg.initialize_tensorboard(
                self.validation_set_writer)
        else:
            self.training_batch_writer = None
            self.training_set_writer = None
            self.validation_batch_writer = None
            self.validation_set_writer = None

    def finalize_tensorboard(self):
        """ 
        Finalizes the operation of TensorBoard writers by closing them.
        """
        # Close the TensorBoard writers.
        if self.training_batch_writer is not None:
            self.training_batch_writer.close()
        if self.training_set_writer is not None:
            self.training_set_writer.close()
        if self.validation_batch_writer is not None:
            self.validation_batch_writer.close()
        if self.validation_set_writer is not None:
            self.validation_set_writer.close()

    def validate_on_batch(self, valid_batch):
        """
        Performs a validation of the model using the provided batch.

        Additionally logs results (to files, TensorBoard) and handles visualization.

        :param valid_batch: data batch generated by the task and used as input to the model.
        :type valid_batch: ``DataStreams``

        :return: Validation loss.

        """
        # Turn on evaluation mode.
        self.pipeline.eval()
        # Empty the statistics collector.
        self.validation_stat_col.empty()

        # Compute the validation loss using the provided data batch.
        with torch.no_grad():
            # Forward pass.
            self.pipeline.forward(valid_batch)
            # Collect the statistics.
            self.collect_all_statistics(self.validation, self.pipeline,
                                        valid_batch, self.validation_stat_col)

        # Export collected statistics.
        self.export_all_statistics(self.validation_stat_col,
                                   '[Partial Validation]')

    def validate_on_set(self):
        """
        Performs a validation of the model on the whole validation set, using the validation ``DataLoader``.

        Iterates over the entire validation set (through the `DataLoader``), aggregates the collected statistics \
        and logs that to the console, csv and TensorBoard (if set).

        """
        # Get number of samples.
        num_samples = len(self.validation)

        self.logger.info(
            'Validating over the entire validation set ({} samples in {} episodes)'
            .format(num_samples, len(self.validation.dataloader)))

        # Turn on evaluation mode.
        self.pipeline.eval()

        # Reset the statistics.
        self.validation_stat_col.empty()

        # Remember global episode number.
        old_episode = self.app_state.episode

        with torch.no_grad():
            for ep, valid_batch in enumerate(self.validation.dataloader):

                self.app_state.episode = ep
                # Forward pass.
                self.pipeline.forward(valid_batch)
                # Collect the statistics.
                self.collect_all_statistics(self.validation, self.pipeline,
                                            valid_batch,
                                            self.validation_stat_col)

        # Revert to global episode number.
        self.app_state.episode = old_episode

        # Aggregate statistics for the whole set.
        self.aggregate_all_statistics(self.validation, self.pipeline,
                                      self.validation_stat_col,
                                      self.validation_stat_agg)

        # Export aggregated statistics.
        self.export_all_statistics(self.validation_stat_agg,
                                   '[Full Validation]')
            tb_writer = self.tb_writer
        # If it is still None - well, we cannot do anything more.
        if tb_writer is None:
            return

        # Iterate through keys and values and concatenate them.
        for key, value in self.aggregators.items():
            # Skip episode.
            if key == 'episode':
                continue
            tb_writer.add_scalar(key, value, episode)


if __name__ == "__main__":

    stat_col = StatisticsCollector()
    stat_agg = StatisticsAggregator()

    # Add default statistics with formatting.
    stat_col.add_statistic('loss', '{:12.10f}')
    stat_col.add_statistic('episode', '{:06d}')

    import random
    # create some random values
    loss_values = random.sample(range(100), 100)
    # "Collect" basic statistics.
    for episode, loss in enumerate(loss_values):
        stat_col['episode'] = episode
        stat_col['loss'] = loss
        # print(stat_col.export_statistics_to_string())
예제 #7
0
class Tester(Worker):
    """
    Defines the basic ``Tester``.

    If defining another type of tester, it should subclass it.

    """
    def __init__(self, name="Tester"):
        """
        Calls the ``Worker`` constructor, adds some additional arguments to parser.

       :param name: Name of the worker (DEFAULT: "Tester").
       :type name: str

        """
        # Call base constructor to set up app state, registry and add default params.
        super(Tester, self).__init__(name)

    def setup_global_experiment(self):
        """
        Sets up the global test experiment for the ``Tester``:

            - Checks that the model to use exists on file:

                >>> if not os.path.isfile(flags.model)

            - Checks that the configuration file exists:

                >>> if not os.path.isfile(config_file)

            - Create the configuration:

                >>> self.config.add_config_params_from_yaml(config)

        The rest of the experiment setup is done in :py:func:`setup_individual_experiment()` \
        to allow for multiple tests suppport.

        """
        # Call base method to parse all command line arguments and add default sections.
        super(Tester, self).setup_experiment()

        chkpt_file = self.app_state.args.load_checkpoint

        # Check if checkpoint file was indicated.
        if chkpt_file == "":
            print(
                'Please pass path to and name of the file containing pipeline to be loaded as --load parameter'
            )
            exit(-1)

        # Check if file with model exists.
        if not os.path.isfile(chkpt_file):
            print('Checkpoint file {} does not exist'.format(chkpt_file))
            exit(-2)

        # Extract path.
        abs_config_path, _ = os.path.split(
            os.path.dirname(os.path.expanduser(chkpt_file)))

        # Check if config file was indicated by the user.
        if self.app_state.args.config != '':
            root_config = self.app_state.args.config
        else:
            # Use the "default one".
            root_config = os.path.join(abs_config_path,
                                       'training_configuration.yaml')

        # Check if configuration file exists.
        if not os.path.isfile(root_config):
            print('Config file {} does not exist'.format(root_config))
            exit(-3)

        # Check the presence of the CUDA-compatible devices.
        if self.app_state.args.use_gpu and (torch.cuda.device_count() == 0):
            self.logger.error(
                "Cannot use GPU as there are no CUDA-compatible devices present in the system!"
            )
            exit(-4)

        # Extract absolute path to main ptp 'config' directory.
        # Save it in app_state!
        self.app_state.absolute_config_path = abs_config_path[:abs_config_path.
                                                              find("configs") +
                                                              8]
        # Get relative path.
        rel_config_path = abs_config_path[abs_config_path.find("configs") + 8:]

        print("TODO: different root config extraction path!!")
        print(self.app_state.absolute_config_path)
        exit(1)

        # Get the list of configurations which need to be loaded.
        configs_to_load = config_parse.recurrent_config_parse(
            rel_config_path, [], self.app_state.absolute_config_path)

        # Read the YAML files one by one - but in reverse order -> overwrite the first indicated config(s)
        config_parse.reverse_order_config_load(
            self.config, configs_to_load, self.app_state.absolute_config_path)

        # -> At this point, the Config Registry contains the configuration loaded (and overwritten) from several files.

    def setup_individual_experiment(self):
        """
        Setup individual test experiment in the case of multiple tests, or the main experiment in the case of \
        one test experiment.

        - Set up the log directory path:

            >>> os.makedirs(self.log_dir, exist_ok=False)

        - Add a FileHandler to the logger (defined in BaseWorker):

            >>>  self.logger.addHandler(fh)

        - Set random seeds:

            >>>  self.set_random_seeds('testing', self.config['testing'])

        - Creates the pipeline consisting of many components

        - Creates testing problem manager

        - Performs testing of compatibility of testing pipeline.

        """

        # Get testing problem type.
        try:
            _ = self.config['testing']['problem']['type']
        except KeyError:
            print(
                "Error: Couldn't retrieve the problem 'type' from the 'testing' section in the loaded configuration"
            )
            exit(-5)

        # Get pipeline name.
        try:
            pipeline_name = self.config['pipeline']['name']
        except KeyError:
            print(
                "Error: Couldn't retrieve the pipeline 'name' from the loaded configuration"
            )
            exit(-6)

        # Prepare output paths for logging
        while True:
            # Dirty fix: if log_dir already exists, wait for 1 second and try again
            try:
                time_str = 'test_{0:%Y%m%d_%H%M%S}'.format(datetime.now())
                if self.app_state.args.savetag != '':
                    time_str = time_str + "_" + self.app_state.args.savetag
                self.log_dir = self.abs_path + '/' + time_str + '/'
                # Lowercase dir.
                self.log_dir = self.log_dir.lower()
                os.makedirs(self.log_dir, exist_ok=False)
            except FileExistsError:
                sleep(1)
            else:
                break

        # Set log dir.
        self.app_state.log_file = self.log_dir + 'tester.log'
        # Initialize logger in app state.
        self.app_state.logger = logging.initialize_logger("AppState")
        # Add handlers for the logfile to worker logger.
        logging.add_file_handler_to_logger(self.logger)
        self.logger.info("Logger directory set to: {}".format(self.log_dir))

        # Set cpu/gpu types.
        self.app_state.set_types()

        # Set random seeds in the testing section.
        self.set_random_seeds('testing', self.config['testing'])

        # Total number of detected errors.
        errors = 0

        ################# TESTING PROBLEM #################

        # Build training problem manager.
        self.testing = ProblemManager('testing', self.config['testing'])
        errors += self.testing.build()

        # check if the maximum number of episodes is specified, if not put a
        # default equal to the size of the dataset (divided by the batch size)
        # So that by default, we loop over the test set once.
        max_test_episodes = len(self.testing)

        self.config['testing']['problem'].add_default_params(
            {'max_test_episodes': max_test_episodes})
        if self.config["testing"]["problem"]["max_test_episodes"] == -1:
            # Overwrite the config value!
            self.config['testing']['problem'].add_config_params(
                {'max_test_episodes': max_test_episodes})

        # Warn if indicated number of episodes is larger than an epoch size:
        if self.config["testing"]["problem"][
                "max_test_episodes"] > max_test_episodes:
            self.logger.warning(
                'Indicated maximum number of episodes is larger than one epoch, reducing it.'
            )
            self.config['testing']['problem'].add_config_params(
                {'max_test_episodes': max_test_episodes})

        self.logger.info("Setting the max number of episodes to: {}".format(
            self.config["testing"]["problem"]["max_test_episodes"]))

        ###################### PIPELINE ######################

        # Build the pipeline using the loaded configuration and global variables.
        self.pipeline = PipelineManager(pipeline_name, self.config['pipeline'])
        errors += self.pipeline.build()

        # Show pipeline.
        summary_str = self.pipeline.summarize_all_components_header()
        summary_str += self.testing.problem.summarize_io("testing")
        summary_str += self.pipeline.summarize_all_components()
        self.logger.info(summary_str)

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-7)

        # Handshake definitions.
        self.logger.info("Handshaking testing pipeline")
        defs_testing = self.testing.problem.output_data_definitions()
        errors += self.pipeline.handshake(defs_testing)

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-2)

        # Check if there are any models in the pipeline.
        if len(self.pipeline.models) == 0:
            self.logger.error(
                'Cannot proceed with training, as there are no trainable models in the pipeline'
            )
            exit(-3)

        # Load the pretrained models params from checkpoint.
        try:
            # Check command line arguments, then check load option in config.
            if self.app_state.args.load_checkpoint != "":
                pipeline_name = self.app_state.args.load_checkpoint
                msg = "command line (--load)"
            elif "load" in self.config['pipeline']:
                pipeline_name = self.config['pipeline']['load']
                msg = "'pipeline' section of the configuration file"
            else:
                pipeline_name = ""
            # Try to load the model.
            if pipeline_name != "":
                if os.path.isfile(pipeline_name):
                    # Load parameters from checkpoint.
                    self.pipeline.load(pipeline_name)
                else:
                    raise Exception(
                        "Couldn't load the checkpoint {} indicated in the {}: file does not exist"
                        .format(pipeline_name, msg))

            # Try to load the models parameters - one by one, if set so in the configuration file.
            self.pipeline.load_models()

        except KeyError:
            self.logger.error(
                "File {} indicated in the {} seems not to be a valid model checkpoint"
                .format(pipeline_name, msg))
            exit(-5)
        except Exception as e:
            self.logger.error(e)
            # Exit by following the logic: if user wanted to load the model but failed, then continuing the experiment makes no sense.
            exit(-6)

        # Log the model summaries.
        summary_str = self.pipeline.summarize_models_header()
        summary_str += self.pipeline.summarize_models()
        self.logger.info(summary_str)

        # Move the models in the pipeline to GPU.
        if self.app_state.args.use_gpu:
            self.pipeline.cuda()

        # Turn on evaluation mode.
        self.pipeline.eval()

        # Export and log configuration, optionally asking the user for confirmation.
        self.export_experiment_configuration(self.log_dir,
                                             "testing_configuration.yaml",
                                             self.app_state.args.confirm)

    def initialize_statistics_collection(self):
        """
        Function initializes all statistics collectors and aggregators used by a given worker,
        creates output files etc.
        """
        # Create statistics collector for testing.
        self.testing_stat_col = StatisticsCollector()
        self.add_statistics(self.testing_stat_col)
        self.testing.problem.add_statistics(self.testing_stat_col)
        self.pipeline.add_statistics(self.testing_stat_col)
        # Create the csv file to store the testing statistics.
        self.testing_batch_stats_file = self.testing_stat_col.initialize_csv_file(
            self.log_dir, 'testing_statistics.csv')

        # Create statistics aggregator for testing.
        self.testing_stat_agg = StatisticsAggregator()
        self.add_aggregators(self.testing_stat_agg)
        self.testing.problem.add_aggregators(self.testing_stat_agg)
        self.pipeline.add_aggregators(self.testing_stat_agg)
        # Create the csv file to store the testing statistic aggregations.
        # Will contain a single row with aggregated statistics.
        self.testing_set_stats_file = self.testing_stat_agg.initialize_csv_file(
            self.log_dir, 'testing_set_agg_statistics.csv')

    def finalize_statistics_collection(self):
        """
        Finalizes statistics collection, closes all files etc.
        """
        # Close all files.
        self.testing_batch_stats_file.close()
        self.testing_set_stats_file.close()

    def run_experiment(self):
        """
        Main function of the ``Tester``: Test the loaded model over the test set.

        Iterates over the ``DataLoader`` for a maximum number of episodes equal to the test set size.

        The function does the following for each episode:

            - Forwards pass of the model,
            - Logs statistics & accumulates loss,
            - Activate visualization if set.


        """
        # Initialize tensorboard and statistics collection.
        self.initialize_statistics_collection()

        num_samples = len(self.testing)

        self.logger.info(
            'Testing over the entire test set ({} samples in {} episodes)'.
            format(num_samples, len(self.testing.dataloader)))

        try:
            # Run test
            with torch.no_grad():

                episode = 0
                for test_dict in self.testing.dataloader:

                    # Terminal condition 0: max test episodes reached.
                    if episode == self.config["testing"]["problem"][
                            "max_test_episodes"]:
                        break

                    # Forward pass.
                    self.pipeline.forward(test_dict)
                    # Collect the statistics.
                    self.collect_all_statistics(self.testing, self.pipeline,
                                                test_dict,
                                                self.testing_stat_col, episode)

                    # Export to csv - at every step.
                    self.testing_stat_col.export_to_csv()

                    # Log to logger - at logging frequency.
                    if episode % self.app_state.args.logging_interval == 0:
                        self.logger.info(
                            self.testing_stat_col.export_to_string(
                                '[Partial Test]'))

                    # move to next episode.
                    episode += 1

                # End for.

                self.logger.info('\n' + '=' * 80)
                self.logger.info('Test finished')

                # Aggregate statistics for the whole set.
                self.aggregate_all_statistics(self.testing, self.pipeline,
                                              self.testing_stat_col,
                                              self.testing_stat_agg, episode)

                # Export aggregated statistics.
                self.export_all_statistics(self.testing_stat_agg,
                                           '[Full Test]')

        except SystemExit as e:
            # the training did not end properly
            self.logger.error('Experiment interrupted because {}'.format(e))
        except KeyboardInterrupt:
            # the training did not end properly
            self.logger.error('Experiment interrupted!')
        finally:
            # Finalize statistics collection.
            self.finalize_statistics_collection()
예제 #8
0
    def test_collector_string(self):
        """ Tests whether the collector is collecting and producing the right string. """

        stat_col = StatisticsCollector()
        stat_col.add_statistics('loss', '{:12.10f}')
        stat_col.add_statistics('episode', '{:06d}')
        stat_col.add_statistics('acc', '{:2.3f}')
        stat_col.add_statistics('acc_help', None)

        # Episode 0.
        stat_col['episode'] = 0
        stat_col['loss'] = 0.7
        stat_col['acc'] = 100
        stat_col['acc_help'] = 121

        # Export.
        #csv_file = stat_col.initialize_csv_file('./', 'collector_test.csv')
        #stat_col.export_to_csv(csv_file)
        self.assertEqual(stat_col.export_to_string(), "loss 0.7000000000; episode 000000; acc 100.000 ")

        # Episode 1.
        stat_col['episode'] = 1
        stat_col['loss'] = 0.7
        stat_col['acc'] = 99.3

        stat_col.add_statistics('seq_length', '{:2.0f}')
        stat_col['seq_length'] = 5

        # Export.
        #stat_col.export_to_csv(csv_file)
        self.assertEqual(stat_col.export_to_string('[Validation]'), "loss 0.7000000000; episode 000001; acc 99.300; seq_length  5 [Validation]")

        # Empty.
        stat_col.empty()
        self.assertEqual(stat_col.export_to_string(), "loss ; episode ; acc ; seq_length  ")