Beispiel #1
0
    def __init__(self, name, config):
        """
        Initializes the pipeline manager.

        :param config: Parameters used to instantiate all required components.
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Initialize the logger.
        self.name = name
        self.config = config
        self.app_state = AppState()
        self.logger = logging.initialize_logger(name)

        # Set initial values of all pipeline elements.
        # Empty list of all components, sorted by their priorities.
        self.__components = {}
        # Empty list of all models - it will contain only "references" to objects stored in the components list.
        self.models = []
        # Empty list of all losses - it will contain only "references" to objects stored in the components list.
        self.losses = []

        # Initialization of best loss - as INF.
        self.best_loss = inf
        self.best_status = "Unknown"
Beispiel #2
0
    def setup_experiment(self):
        """
        Setups a specific experiment.

        Base method:

            - Parses command line arguments.

            - Initializes logger with worker name.

            - Sets the 3 default config sections (training / validation / test) and sets their dataloaders params.

        .. note::

            Child classes should override this method, but still call its parent to draw the basic functionality \
            implemented here.


        """
        # Parse arguments.
        self.app_state.args, self.unparsed = self.parser.parse_known_args()

        # Initialize logger using the configuration.
        # For now do not add file handler, as path to logfile is not known yet.
        self.logger = logging.initialize_logger(self.name, False)

        # add empty sections
        self.config.add_default_params(
            {"training": {
                'terminal_conditions': {}
            }})
        self.config.add_default_params({"validation": {}})
        self.config.add_default_params({"testing": {}})
Beispiel #3
0
    def __init__(self, name, class_type, config):
        """
        Initializes the component. This constructor:

            - sets the access to ``AppState`` (for dtypes, settings, globals etc.)
            - stores the component name and type
            - stores reference to the passed configuration registry section
            - loads default component parameters
            - initializes the logger
            - initializes mapping facilities and facades

        :param name: Name of the component.

        :param class_type: Class type of the component.

        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        self.name = name
        self.config = config

        # Get access to AppState: for command line args, globals etc.
        self.app_state = AppState()

        # Initialize logger.
        self.logger = logging.initialize_logger(self.name)

        # Load default configuration.
        if class_type is not None:
            self.config.add_default_params(
                load_class_default_config_file(class_type))

        # Initialize the "streams mapping facility".
        if "streams" not in config or config["streams"] is None:
            self.__stream_keys = {}
        else:
            self.__stream_keys = config["streams"]
        self.stream_keys = KeyMappingsFacade(self.__stream_keys)

        # Initialize the "globals mapping facility".
        if "globals" not in config or config["globals"] is None:
            self.__global_keys = {}
        else:
            self.__global_keys = config["globals"]
        self.global_keys = KeyMappingsFacade(self.__global_keys)

        # Initialize the "statistics mapping facility".
        if "statistics" not in config or config["statistics"] is None:
            self.__statistics_keys = {}
        else:
            self.__statistics_keys = config["statistics"]
        self.statistics_keys = KeyMappingsFacade(self.__statistics_keys)

        # Facade for accessing global parameters (stored still in AppState).
        self.globals = GlobalsFacade(self.__global_keys)
Beispiel #4
0
    def setup_individual_experiment(self):
        """
        Setup individual test experiment in the case of multiple tests, or the main experiment in the case of \
        one test experiment.

        - Set up the log directory path

        - Set random seeds

        - Creates the pipeline consisting of many components

        - Creates testing task manager

        - Performs testing of compatibility of testing pipeline

        """

        # Get test section.
        try:
            self.tsn = self.app_state.args.section_name
            self.config_test = self.config[self.tsn]
            if self.config_test is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the section '{}' from the loaded configuration"
                .format(self.tsn))
            exit(-1)

        # Get testing task type.
        try:
            _ = self.config_test['task']['type']
        except KeyError:
            print(
                "Error: Couldn't retrieve the task 'type' from the '{}' section in the loaded configuration"
                .format(self.tsn))
            exit(-5)

        # Get pipeline section.
        try:
            psn = self.app_state.args.pipeline_section_name
            self.config_pipeline = self.config[psn]
            if self.config_pipeline is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the pipeline section '{}' from the loaded configuration"
                .format(psn))
            exit(-1)

        # Get pipeline name.
        try:
            pipeline_name = self.config_pipeline['name']
        except KeyError:
            print(
                "Error: Couldn't retrieve the pipeline 'name' from the loaded configuration"
            )
            exit(-6)

        # Prepare output paths for logging
        while True:
            # Dirty fix: if log_dir already exists, wait for 1 second and try again
            try:
                time_str = self.tsn + '_{0:%Y%m%d_%H%M%S}'.format(
                    datetime.now())
                if self.app_state.args.exptag != '':
                    time_str = time_str + "_" + self.app_state.args.exptag
                self.app_state.log_dir = self.abs_path + '/' + time_str + '/'
                # Lowercase dir.
                self.app_state.log_dir = self.app_state.log_dir.lower()
                makedirs(self.app_state.log_dir, exist_ok=False)
            except FileExistsError:
                sleep(1)
            else:
                break

        # Set log dir.
        self.app_state.log_file = self.app_state.log_dir + 'processor.log'
        # Initialize logger in app state.
        self.app_state.logger = logging.initialize_logger("AppState")
        # Add handlers for the logfile to worker logger.
        logging.add_file_handler_to_logger(self.logger)
        self.logger.info("Logger directory set to: {}".format(
            self.app_state.log_dir))

        # Set cpu/gpu types.
        self.app_state.set_types()

        # Set random seeds in the testing section.
        self.set_random_seeds(self.tsn, self.config_test)

        # Total number of detected errors.
        errors = 0

        ################# TESTING PROBLEM #################

        # Build the used task manager.
        self.pm = TaskManager(self.tsn, self.config_test)
        errors += self.pm.build()

        # check if the maximum number of episodes is specified, if not put a
        # default equal to the size of the dataset (divided by the batch size)
        # So that by default, we loop over the test set once.
        task_size_in_episodes = len(self.pm)

        if self.config_test["terminal_conditions"]["episode_limit"] == -1:
            # Overwrite the config value!
            self.config_test['terminal_conditions'].add_config_params(
                {'episode_limit': task_size_in_episodes})

        # Warn if indicated number of episodes is larger than an epoch size:
        if self.config_test["terminal_conditions"][
                "episode_limit"] > task_size_in_episodes:
            self.logger.warning(
                'Indicated limit of number of episodes is larger than one epoch, reducing it.'
            )
            # Overwrite the config value!
            self.config_test['terminal_conditions'].add_config_params(
                {'episode_limit': task_size_in_episodes})

        self.logger.info("Limiting the number of episodes to: {}".format(
            self.config_test["terminal_conditions"]["episode_limit"]))

        ###################### PIPELINE ######################

        # Build the pipeline using the loaded configuration and global variables.
        self.pipeline = PipelineManager(pipeline_name, self.config_pipeline)
        errors += self.pipeline.build()

        # Show pipeline.
        summary_str = self.pipeline.summarize_all_components_header()
        summary_str += self.pm.task.summarize_io(self.tsn)
        summary_str += self.pipeline.summarize_all_components()
        self.logger.info(summary_str)

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-7)

        # Handshake definitions.
        self.logger.info("Handshaking testing pipeline")
        defs_testing = self.pm.task.output_data_definitions()
        errors += self.pipeline.handshake(defs_testing)

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-2)

        # Check if there are any models in the pipeline.
        if len(self.pipeline.models) == 0:
            self.logger.error(
                'Cannot proceed with training, as there are no trainable models in the pipeline'
            )
            exit(-3)

        # Load the pretrained models params from checkpoint.
        try:
            # Check command line arguments, then check load option in config.
            if self.app_state.args.load_checkpoint != "":
                pipeline_name = self.app_state.args.load_checkpoint
                msg = "command line (--load)"
            elif "load" in self.config_pipeline:
                pipeline_name = self.config_pipeline['load']
                msg = "'pipeline' section of the configuration file"
            else:
                pipeline_name = ""
            # Try to load the the whole pipeline.
            if pipeline_name != "":
                if path.isfile(pipeline_name):
                    # Load parameters from checkpoint.
                    self.pipeline.load(pipeline_name)
                else:
                    raise Exception(
                        "Couldn't load the checkpoint {} indicated in the {}: file does not exist"
                        .format(pipeline_name, msg))
                # If we succeeded, we do not want to load the models from the file anymore!
            else:
                # Try to load the models parameters - one by one, if set so in the configuration file.
                self.pipeline.load_models()

        except KeyError:
            self.logger.error(
                "File {} indicated in the {} seems not to be a valid model checkpoint"
                .format(pipeline_name, msg))
            exit(-5)
        except Exception as e:
            self.logger.error(e)
            # Exit by following the logic: if user wanted to load the model but failed, then continuing the experiment makes no sense.
            exit(-6)

        # Log the model summaries.
        summary_str = self.pipeline.summarize_models_header()
        summary_str += self.pipeline.summarize_models()
        self.logger.info(summary_str)

        # Move the models in the pipeline to GPU.
        if self.app_state.args.use_gpu:
            self.pipeline.cuda()

        # Turn on evaluation mode.
        self.pipeline.eval()

        # Export and log configuration, optionally asking the user for confirmation.
        config_parsing.display_parsing_results(self.logger,
                                               self.app_state.args,
                                               self.unparsed)
        config_parsing.display_globals(self.logger,
                                       self.app_state.globalitems())
        config_parsing.export_experiment_configuration_to_yml(
            self.logger, self.app_state.log_dir, "training_configuration.yml",
            self.config, self.app_state.args.confirm)
Beispiel #5
0
    def setup_experiment(self):
        """
        Sets up experiment of all trainers:

        - Calls base class setup_experiment to parse the command line arguments,

        - Loads the config file(s)

        - Set up the log directory path

        - Add a ``FileHandler`` to the logger

        - Set random seeds

        - Creates the pipeline consisting of many components

        - Creates training task manager

        - Handles curriculum learning if indicated

        - Creates validation task manager

        - Set optimizer

        - Performs testing of compatibility of both training and validation tasks and created pipeline.

        """
        # Call base method to parse all command line arguments and add default sections.
        super(Trainer, self).setup_experiment()

        # "Pass" configuration parameters from the "default_training" section to training section indicated by the section_name.
        self.config.add_default_params({
            self.app_state.args.training_section_name:
            self.config['default_training'].to_dict()
        })
        self.config.del_default_params('default_training')

        # "Pass" configuration parameters from the "default_validation" section to validation section indicated by the section_name.
        self.config.add_default_params({
            self.app_state.args.validation_section_name:
            self.config['default_validation'].to_dict()
        })
        self.config.del_default_params('default_validation')

        # Check the presence of the CUDA-compatible devices.
        if self.app_state.args.use_gpu and (torch.cuda.device_count() == 0):
            self.logger.error(
                "Cannot use GPU as there are no CUDA-compatible devices present in the system!"
            )
            exit(-1)

        # Check if config file was selected.
        if self.app_state.args.config == '':
            print('Please pass configuration file(s) as --c parameter')
            exit(-2)

        # Split and make them absolute.
        root_configs = self.app_state.args.config.replace(" ", "").split(',')
        # If there are - expand them to absolute paths.
        abs_root_configs = [path.expanduser(config) for config in root_configs]

        # Get the list of configurations which need to be loaded.
        configs_to_load = config_parse.recurrent_config_parse(
            abs_root_configs, [], self.app_state.absolute_config_path)

        # Read the YAML files one by one - but in reverse order -> overwrite the first indicated config(s)
        config_parse.reverse_order_config_load(self.config, configs_to_load)

        # -> At this point, the Param Registry contains the configuration loaded (and overwritten) from several files.
        # Log the resulting training configuration.
        conf_str = 'Loaded (initial) configuration:\n'
        conf_str += '=' * 80 + '\n'
        conf_str += yaml.safe_dump(self.config.to_dict(),
                                   default_flow_style=False)
        conf_str += '=' * 80 + '\n'
        print(conf_str)

        # Get training section.
        try:
            tsn = self.app_state.args.training_section_name
            self.config_training = self.config[tsn]
            # We must additionally check if it is None - weird behvaiour when using default value.
            if self.config_training is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the training section '{}' from the loaded configuration"
                .format(tsn))
            exit(-1)

        # Get training task type.
        try:
            training_task_type = self.config_training['task']['type']
        except KeyError:
            print(
                "Error: Couldn't retrieve the task 'type' from the training section '{}' in the loaded configuration"
                .format(tsn))
            exit(-1)

        # Get validation section.
        try:
            vsn = self.app_state.args.validation_section_name
            self.config_validation = self.config[vsn]
            if self.config_validation is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the validation section '{}' from the loaded configuration"
                .format(vsn))
            exit(-1)

        # Get validation task type.
        try:
            _ = self.config_validation['task']['type']
        except KeyError:
            print(
                "Error: Couldn't retrieve the task 'type' from the validation section '{}' in the loaded configuration"
                .format(vsn))
            exit(-1)

        # Get pipeline section.
        try:
            psn = self.app_state.args.pipeline_section_name
            self.config_pipeline = self.config[psn]
            if self.config_pipeline is None:
                raise KeyError()
        except KeyError:
            print(
                "Error: Couldn't retrieve the pipeline section '{}' from the loaded configuration"
                .format(psn))
            exit(-1)

        # Get pipeline name.
        try:
            pipeline_name = self.config_pipeline['name']
        except KeyError:
            # Using name of the first configuration file from command line.
            basename = path.basename(root_configs[0])
            # Take config filename without extension.
            pipeline_name = path.splitext(basename)[0]
            # Set pipeline name, so processor can use it afterwards.
            self.config_pipeline.add_config_params({'name': pipeline_name})

        # Prepare the output path for logging
        while True:  # Dirty fix: if log_dir already exists, wait for 1 second and try again
            try:
                time_str = '{0:%Y%m%d_%H%M%S}'.format(datetime.now())
                if self.app_state.args.exptag != '':
                    time_str = time_str + "_" + self.app_state.args.exptag
                self.app_state.log_dir = path.expanduser(
                    self.app_state.args.expdir
                ) + '/' + training_task_type + '/' + pipeline_name + '/' + time_str + '/'
                # Lowercase dir.
                self.app_state.log_dir = self.app_state.log_dir.lower()
                makedirs(self.app_state.log_dir, exist_ok=False)
            except FileExistsError:
                sleep(1)
            else:
                break

        # Set log dir.
        self.app_state.log_file = self.app_state.log_dir + 'trainer.log'
        # Initialize logger in app state.
        self.app_state.logger = logging.initialize_logger("AppState")
        # Add handlers for the logfile to worker logger.
        logging.add_file_handler_to_logger(self.logger)
        self.logger.info("Logger directory set to: {}".format(
            self.app_state.log_dir))

        # Set cpu/gpu types.
        self.app_state.set_types()

        # Models dir.
        self.checkpoint_dir = self.app_state.log_dir + 'checkpoints/'
        makedirs(self.checkpoint_dir, exist_ok=False)

        # Set random seeds in the training section.
        self.set_random_seeds('training', self.config_training)

        # Total number of detected errors.
        errors = 0

        ################# TRAINING PROBLEM #################

        # Build training task manager.
        self.training = TaskManager('training', self.config_training)
        errors += self.training.build()

        # parse the curriculum learning section in the loaded configuration.
        if 'curriculum_learning' in self.config_training:

            # Initialize curriculum learning - with values from loaded configuration.
            self.training.task.curriculum_learning_initialize(
                self.config_training['curriculum_learning'])

            # If the 'must_finish' key is not present in config then then it will be finished by default
            self.config_training['curriculum_learning'].add_default_params(
                {'must_finish': True})

            self.must_finish_curriculum = self.config_training[
                'curriculum_learning']['must_finish']
            self.logger.info("Curriculum Learning activated")

        else:
            # If not using curriculum learning then it does not have to be finished.
            self.must_finish_curriculum = False
            self.curric_done = True

        ################# VALIDATION PROBLEM #################

        # Build validation task manager.
        self.validation = TaskManager('validation', self.config_validation)
        errors += self.validation.build()

        ###################### PIPELINE ######################

        # Build the pipeline using the loaded configuration.
        self.pipeline = PipelineManager(pipeline_name, self.config_pipeline)
        errors += self.pipeline.build()

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-2)

        # Show pipeline.
        summary_str = self.pipeline.summarize_all_components_header()
        summary_str += self.training.task.summarize_io("training")
        summary_str += self.validation.task.summarize_io("validation")
        summary_str += self.pipeline.summarize_all_components()
        self.logger.info(summary_str)

        # Handshake definitions.
        self.logger.info("Handshaking training pipeline")
        defs_training = self.training.task.output_data_definitions()
        errors += self.pipeline.handshake(defs_training)

        self.logger.info("Handshaking validation pipeline")
        defs_valid = self.validation.task.output_data_definitions()
        errors += self.pipeline.handshake(defs_valid)

        # Check errors.
        if errors > 0:
            self.logger.error(
                'Found {} errors, terminating execution'.format(errors))
            exit(-2)

        ################## MODEL LOAD/FREEZE #################

        # Load the pretrained models params from checkpoint.
        try:
            # Check command line arguments, then check load option in config.
            if self.app_state.args.load_checkpoint != "":
                pipeline_name = self.app_state.args.load_checkpoint
                msg = "command line (--load)"
            elif "load" in self.config_pipeline:
                pipeline_name = self.config_pipeline['load']
                msg = "'pipeline' section of the configuration file"
            else:
                pipeline_name = ""
            # Try to load the model.
            if pipeline_name != "":
                if path.isfile(pipeline_name):
                    # Load parameters from checkpoint.
                    self.pipeline.load(pipeline_name)
                else:
                    raise Exception(
                        "Couldn't load the checkpoint {} indicated in the {}: file does not exist"
                        .format(pipeline_name, msg))
                # If we succeeded, we do not want to load the models from the file anymore!
            else:
                # Try to load the models parameters - one by one, if set so in the configuration file.
                self.pipeline.load_models()

        except KeyError:
            self.logger.error(
                "File {} indicated in the {} seems not to be a valid model checkpoint"
                .format(pipeline_name, msg))
            exit(-5)
        except Exception as e:
            self.logger.error(e)
            # Exit by following the logic: if user wanted to load the model but failed, then continuing the experiment makes no sense.
            exit(-6)

        # Finally, freeze the models (that the user wants to freeze).
        self.pipeline.freeze_models()

        # Log the model summaries.
        summary_str = self.pipeline.summarize_models_header()
        summary_str += self.pipeline.summarize_models()
        self.logger.info(summary_str)

        # Move the models in the pipeline to GPU.
        if self.app_state.args.use_gpu:
            self.pipeline.cuda()

        ################# OPTIMIZER #################

        # Set the optimizer.
        optimizer_conf = dict(self.config_training['optimizer'])
        optimizer_type = optimizer_conf['type']
        del optimizer_conf['type']

        # Check if there are any models in the pipeline.
        if len(
                list(
                    filter(lambda p: p.requires_grad,
                           self.pipeline.parameters()))) == 0:
            self.logger.error(
                'Cannot proceed with training, as there are no trainable models in the pipeline (or all models are frozen)'
            )
            exit(-7)

        # Instantiate the optimizer and filter the model parameters based on if they require gradients.
        self.optimizer = getattr(torch.optim, optimizer_type)(filter(
            lambda p: p.requires_grad, self.pipeline.parameters()),
                                                              **optimizer_conf)

        log_str = 'Optimizer:\n' + '=' * 80 + "\n"
        log_str += "  Type: " + optimizer_type + "\n"
        log_str += "  Params: {}".format(optimizer_conf)

        self.logger.info(log_str)
Beispiel #6
0
    def build(task, config, task_subset_name):
        """
        Static method returning particular sampler, depending on the name \
        provided in the list of parameters & the specified task class.

        :param task: Instance of an object derived from the Task class.
        :type task: ``tasks.Task``

        :param config: Parameters used to instantiate the sampler.
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        :param task_subset_name: Name of task subset (and associated TaskManager object)

        ..note::

            ``config`` should contains the exact (case-sensitive) class name of the sampler to instantiate.


        .. warning::

            ``torch.utils.data.sampler.BatchSampler``, \
            ``torch.utils.data.sampler.DistributedSampler`` are not supported yet.

        .. note::

            ``torch.utils.data.sampler.SubsetRandomSampler`` expects 'indices' to index a subset of the dataset. \
             Currently, the user can specify these indices using one of the following options:

            - Option 1: range.
                >>> indices = range(20)

            - Option 2: range as str.
                >>> range_str = '0, 20'

            - Option 3: list of indices.
                >>> yaml_list = yaml.load('[0, 2, 5, 10]')

            - Option 4: name of the file containing indices.
                >>> filename = "~/data/mnist/training_indices.txt"

        .. note::

            ``torch.utils.data.sampler.WeightedRandomSampler`` expercse additional parameter 'weights'.

        :return: Instance of a given sampler or ``None`` if the section not present or couldn't build the sampler.

        """
        # Initialize logger.
        logger = logging.initialize_logger('SamplerFactory')

        try:
            # Check presence of the typename attribute.
            if 'type' not in config:
                raise ConfigurationError(
                    "The sampler configuration section does not contain the key 'type'"
                )

            # Get the class typename.
            typename = config['type']
            logger.info(
                'Trying to instantiate the {} sampler object'.format(typename))

            ###########################################################################
            # Handle first special case: SubsetRandomSampler.
            if typename == 'SubsetRandomSampler':

                # Check presence of the typename attribute.
                if 'indices' not in config:
                    raise ConfigurationError(
                        "The sampler configuration section does not contain the key 'indices' "
                        "required by SubsetRandomSampler")

                # Get and process the indices.
                indices = config['indices']

                # Analyze the type.
                if type(indices) == str:
                    # Try to open the file.
                    try:
                        # from expanduser()'s doc: If the expansion fails or if the path does not begin
                        # with a tilde, the path is returned unchanged. -> So operation below should be safe.
                        file = open(os.path.expanduser(indices), "r")
                        # Read the file.
                        indices = file.readline()
                        file.close()

                    except Exception:
                        # Ok, this is not a file.
                        pass
                    finally:
                        # Try to process it as a string.
                        # Get the digits.
                        digits = indices.split(',')
                        indices = [int(x) for x in digits]
                else:
                    # Assume that type(indices) is a list of ints.
                    digits = indices

                # Finally, we got the list of digits.
                if len(digits) == 2:
                    # Create a range.
                    indices = range(int(digits[0]), int(digits[1]))
                # Else: use them as they are, including single index.

                # Check if indices are within range.
                if max(indices) >= len(task):
                    raise ConfigurationError(
                        "SubsetRandomSampler cannot work properly when indices are out of range ({}) "
                        "considering that there are {} samples in the task".
                        format(max(indices), len(task)))

                # Create the sampler object.
                sampler = pt_samplers.SubsetRandomSampler(indices)

            ###########################################################################
            # Handle second special case: WeightedRandomSampler.
            elif typename == 'WeightedRandomSampler':

                # Check presence of the attribute.
                if 'weights' not in config:
                    raise ConfigurationError(
                        "The sampler configuration section does not contain the key 'weights' "
                        "required by WeightedRandomSampler")

                # Load weights from file.
                weights = np.fromfile(os.path.expanduser(config['weights']),
                                      dtype=float,
                                      count=-1,
                                      sep=',')

                # Create sampler class.
                sampler = pt_samplers.WeightedRandomSampler(weights,
                                                            len(task),
                                                            replacement=True)

            ###########################################################################
            # Handle third special case: kFoldRandomSampler.
            elif typename == 'kFoldRandomSampler':

                # Check presence of the attribute.
                if 'folds' not in config:
                    raise ConfigurationError(
                        "The sampler configuration section does not contain the key 'folds' "
                        "required by kFoldRandomSampler")

                # Create indices, depending on the fold.
                folds = config["folds"]
                if folds < 2:
                    raise ConfigurationError(
                        "kFoldRandomSampler requires  at least two 'folds'")
                # Get epochs per fold (default: 1).
                epochs_per_fold = config.get("epochs_per_fold", 1)

                # Create the sampler object.
                sampler = ptp_samplers.kFoldRandomSampler(
                    len(task), folds, epochs_per_fold,
                    task_subset_name == 'training')

            ###########################################################################
            # Handle fourd special case: kFoldWeightedRandomSampler.
            elif typename == 'kFoldWeightedRandomSampler':

                # Check presence of the attribute.
                if 'weights' not in config:
                    raise ConfigurationError(
                        "The sampler configuration section does not contain the key 'weights' "
                        "required by kFoldWeightedRandomSampler")

                # Load weights from file.
                weights = np.fromfile(os.path.expanduser(config['weights']),
                                      dtype=float,
                                      count=-1,
                                      sep=',')

                # Check presence of the attribute.
                if 'folds' not in config:
                    raise ConfigurationError(
                        "The sampler configuration section does not contain the key 'folds' "
                        "required by kFoldWeightedRandomSampler")

                # Create indices, depending on the fold.
                folds = config["folds"]
                if folds < 2:
                    raise ConfigurationError(
                        "kFoldRandomSampler requires  at least two 'folds'")
                # Get epochs per fold (default: 1).
                epochs_per_fold = config.get("epochs_per_fold", 1)

                # Create the sampler object.
                sampler = ptp_samplers.kFoldWeightedRandomSampler(
                    weights, len(task), folds, epochs_per_fold,
                    task_subset_name == 'training')

            elif typename in ['BatchSampler', 'DistributedSampler']:
                # Sorry, don't support those. Yet;)
                raise ConfigurationError(
                    "Sampler Factory currently does not support the '{}' sampler. Please pick one of the others "
                    "or use defaults random sampling".format(typename))
            else:
                # Verify that the specified class is in the samplers package.
                if typename not in dir(pt_samplers):
                    raise ConfigurationError(
                        "Could not find the specified class '{}' in the samplers package"
                        .format(typename))

                # Get the sampler class.
                sampler_class = getattr(pt_samplers, typename)
                # Create "regular" sampler.
                sampler = sampler_class(task)

            # Return sampler.
            return sampler

        except ConfigurationError as e:
            logger.error(e)
            # Do not continue with invalid sampler.
            exit(-1)
    def build(problem, config):
        """
        Static method returning particular sampler, depending on the name \
        provided in the list of parameters & the specified problem class.

        :param problem: Instance of an object derived from the Problem class.
        :type problem: ``problems.Problem``

        :param config: Parameters used to instantiate the sampler.
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        ..note::

            ``config`` should contains the exact (case-sensitive) class name of the sampler to instantiate.


        .. warning::

            ``torch.utils.data.sampler.BatchSampler``, \
            ``torch.utils.data.sampler.DistributedSampler`` are not supported yet.

        .. note::

            ``torch.utils.data.sampler.SubsetRandomSampler`` expects 'indices' to index a subset of the dataset. \
             Currently, the user can specify these indices using one of the following options:

            - Option 1: range.
                >>> indices = range(20)

            - Option 2: range as str.
                >>> range_str = '0, 20'

            - Option 3: list of indices.
                >>> yaml_list = yaml.load('[0, 2, 5, 10]')

            - Option 4: name of the file containing indices.
                >>> filename = "~/data/mnist/training_indices.txt"

        .. note::

            ``torch.utils.data.sampler.WeightedRandomSampler`` expercse additional parameter 'weights'.

        :return: Instance of a given sampler or ``None`` if the section not present or couldn't build the sampler.

        """
        # Initialize logger.
        logger = logging.initialize_logger('SamplerFactory')


        # Check if sampler is required, i.e. 'sampler' section is empty.
        if not config:
            logger.info("The sampler configuration section is not present, using default 'random' sampling")
            return None

        try: 
            # Check presence of the name attribute.
            if 'name' not in config:
                raise ConfigurationError("The sampler configuration section does not contain the key 'name'")

            # Get the class name.
            name = config['name']

            # Verify that the specified class is in the samplers package.
            if name not in dir(torch.utils.data.sampler):
                raise ConfigurationError("Could not find the specified class '{}' in the samplers package".format(name))

            # Get the actual class.
            sampler_class = getattr(torch.utils.data.sampler, name)

            # Ok, proceed.
            logger.info('Loading the {} sampler from {}'.format(name, sampler_class.__module__))

            # Handle "special" case.
            if sampler_class.__name__ == 'SubsetRandomSampler':

                # Check presence of the name attribute.
                if 'indices' not in config:
                    raise ConfigurationError("The sampler configuration section does not contain the key 'indices' "
                                    "required by SubsetRandomSampler.")

                indices = config['indices']

                # Analyze the type.
                if type(indices) == str:
                    # Try to open the file.
                    try:
                        # from expanduser()'s doc: If the expansion fails or if the path does not begin
                        # with a tilde, the path is returned unchanged. -> So operation below should be safe.
                        file = open(os.path.expanduser(indices), "r")
                        # Read the file.
                        indices = file.readline() 
                        file.close()

                    except Exception:
                        # Ok, this is not a file.
                        pass
                    finally:
                        # Try to process it as a string.
                        # Get the digits.
                        digits = indices.split(',')
                        indices = [int(x) for x in digits]
                else:
                    # Assume that type(indices) is a list of ints.
                    digits = indices

                # Finally, we got the list of digits.
                if len(digits) == 2:
                    # Create a range.
                    indices = range(int(digits[0]), int(digits[1]))
                # Else: use them as they are

                # Check if indices are within range.
                if max(indices) >= len(problem):
                    logger.error("SubsetRandomSampler cannot work properly when indices are out of range ({}) "
                                 "considering that there are {} samples in the problem!".format(max(indices),
                                                                                                len(problem)))
                    exit(-1)

                # Create the sampler object.
                sampler = sampler_class(indices)

            elif sampler_class.__name__ == 'WeightedRandomSampler':

                # Check presence of the name attribute.
                if 'weights' not in config:
                    raise ConfigurationError("The sampler configuration section does not contain the key 'weights' "
                                    "required by WeightedRandomSampler.")

                # Load weights from file.
                weights = np.fromfile(os.path.expanduser(config['weights']), dtype=float, count=-1, sep=',')
                # Create sampler class.
                sampler = sampler_class(weights, len(problem), replacement=True)

            elif sampler_class.__name__ in ['BatchSampler', 'DistributedSampler']:
                # Sorry, don't support those. Yet;)
                logger.error("Sampler Factory currently does not support {} sampler. Please pick one of the others "
                             "or use defaults random sampling.".format(sampler_class.__name__))
                exit(-2)
            else:
                # Create "regular" sampler.
                sampler = sampler_class(problem)

            # Return sampler.
            return sampler

        except ConfigurationError as e:
            logger.error(e)
            logger.warning("Using default sampling without sampler.")
            return None
Beispiel #8
0
    def setup_experiment(self):
        """
        Sets up experiment of all trainers:

        - Calls base class setup_experiment to parse the command line arguments,

        - Loads the config file(s):

            >>> configs_to_load = self.recurrent_config_parse(flags.config, [])

        - Set up the log directory path:

            >>> os.makedirs(self.log_dir, exist_ok=False)

        - Add a ``FileHandler`` to the logger:

            >>>  self.add_file_handler_to_logger(self.log_file)

        - Set random seeds:

            >>>  self.set_random_seeds(self.config['training'], 'training')

        - Creates the pipeline consisting of many components

        - Creates training problem manager

        - Handles curriculum learning if indicated:

            >>> if 'curriculum_learning' in self.config['training']:
            >>> ...

        - Creates training problem manager

        - Set optimizer:

            >>> self.optimizer = getattr(torch.optim, optimizer_name)

        - Performs testing of compatibility of both training and validation pipelines.

        """
        # Call base method to parse all command line arguments and add default sections.
        super(Trainer, self).setup_experiment()

        # Check if config file was selected.
        if self.app_state.args.config == '':
            print('Please pass configuration file(s) as --c parameter')
            exit(-1)

        # Check the presence of the CUDA-compatible devices.
        if self.app_state.args.use_gpu and (torch.cuda.device_count() == 0):
            self.logger.error("Cannot use GPU as there are no CUDA-compatible devices present in the system!")
            exit(-2)

        # Check if config file exists.            
        root_config = self.app_state.args.config
        if not os.path.isfile(root_config):
            print('Error: Configuration file {} does not exist'.format(root_config))
            exit(-3)
        
        # Extract absolute path to main ptp 'config' directory.
        abs_config_path = os.path.abspath(root_config)
        # Save it in app_state!
        self.app_state.absolute_config_path = abs_config_path[:abs_config_path.find("configs")+8] 
        # Get relative path.
        rel_config_path = abs_config_path[abs_config_path.find("configs")+8:]

        # Get the list of configurations which need to be loaded.
        configs_to_load = config_parse.recurrent_config_parse(rel_config_path, [], self.app_state.absolute_config_path)

        # Read the YAML files one by one - but in reverse order -> overwrite the first indicated config(s)
        config_parse.reverse_order_config_load(self.config, configs_to_load, self.app_state.absolute_config_path)

        # -> At this point, the Param Registry contains the configuration loaded (and overwritten) from several files.
        # Log the resulting training configuration.
        conf_str = 'Loaded (initial) configuration:\n'
        conf_str += '='*80 + '\n'
        conf_str += yaml.safe_dump(self.config.to_dict(), default_flow_style=False)
        conf_str += '='*80 + '\n'
        print(conf_str)

        # Get training problem name.
        try:
            training_problem_type = self.config['training']['problem']['type']
        except KeyError:
            print("Error: Couldn't retrieve the problem 'type' from the 'training' section in the loaded configuration")
            exit(-1)

        # Get validation problem name
        try:
            _ = self.config['validation']['problem']['type']
        except KeyError:
            print("Error: Couldn't retrieve the problem 'type' from the 'validation' section in the loaded configuration")
            exit(-1)

        # Get pipeline name.
        try:
            pipeline_name = self.config['pipeline']['name']
        except KeyError:
            print("Error: Couldn't retrieve the pipeline 'name' from the loaded configuration")
            exit(-1)

        # Prepare the output path for logging
        while True:  # Dirty fix: if log_dir already exists, wait for 1 second and try again
            try:
                time_str = '{0:%Y%m%d_%H%M%S}'.format(datetime.now())
                if self.app_state.args.savetag != '':
                    time_str = time_str + "_" + self.app_state.args.savetag
                self.log_dir = os.path.expanduser(self.app_state.args.expdir) + '/' + training_problem_type + '/' + pipeline_name + '/' + time_str + '/'
                # Lowercase dir.
                self.log_dir = self.log_dir.lower()
                os.makedirs(self.log_dir, exist_ok=False)
            except FileExistsError:
                sleep(1)
            else:
                break

        # Set log dir.
        self.app_state.log_file = self.log_dir + 'trainer.log'
        # Initialize logger in app state.
        self.app_state.logger = logging.initialize_logger("AppState")
        # Add handlers for the logfile to worker logger.
        logging.add_file_handler_to_logger(self.logger)
        self.logger.info("Logger directory set to: {}".format(self.log_dir ))

        # Set cpu/gpu types.
        self.app_state.set_types()

        # Models dir.
        self.checkpoint_dir = self.log_dir + 'checkpoints/'
        os.makedirs(self.checkpoint_dir, exist_ok=False)

        # Set random seeds in the training section.
        self.set_random_seeds('training', self.config['training'])

        # Total number of detected errors.
        errors =0

        ################# TRAINING PROBLEM ################# 

        # Build training problem manager.
        self.training = ProblemManager('training', self.config['training']) 
        errors += self.training.build()
        
        # parse the curriculum learning section in the loaded configuration.
        if 'curriculum_learning' in self.config['training']:

            # Initialize curriculum learning - with values from loaded configuration.
            self.training.problem.curriculum_learning_initialize(self.config['training']['curriculum_learning'])

            # Set initial values of curriculum learning.
            self.curric_done = self.training.problem.curriculum_learning_update_params(0)

            # If the 'must_finish' key is not present in config then then it will be finished by default
            self.config['training']['curriculum_learning'].add_default_params({'must_finish': True})

            self.must_finish_curriculum = self.config['training']['curriculum_learning']['must_finish']
            self.logger.info("Curriculum Learning activated")

        else:
            # If not using curriculum learning then it does not have to be finished.
            self.must_finish_curriculum = False
            self.curric_done = True

        ################# VALIDATION PROBLEM ################# 
        
        # Build validation problem manager.
        self.validation = ProblemManager('validation', self.config['validation'])
        errors += self.validation.build()

        # Generate a single batch used for partial validation.
        if errors == 0:
            self.validation_dict = next(iter(self.validation.dataloader))

        ###################### PIPELINE ######################
        
        # Build the pipeline using the loaded configuration.
        self.pipeline = PipelineManager(pipeline_name, self.config['pipeline'])
        errors += self.pipeline.build()

        # Check errors.
        if errors > 0:
            self.logger.error('Found {} errors, terminating execution'.format(errors))
            exit(-2)

        # Show pipeline.
        summary_str = self.pipeline.summarize_all_components_header()
        summary_str += self.training.problem.summarize_io("training")
        summary_str += self.validation.problem.summarize_io("validation")
        summary_str += self.pipeline.summarize_all_components()
        self.logger.info(summary_str)
        
        # Handshake definitions.
        self.logger.info("Handshaking training pipeline")
        defs_training = self.training.problem.output_data_definitions()
        errors += self.pipeline.handshake(defs_training)

        self.logger.info("Handshaking validation pipeline")
        defs_valid = self.validation.problem.output_data_definitions()
        errors += self.pipeline.handshake(defs_valid)

        # Check errors.
        if errors > 0:
            self.logger.error('Found {} errors, terminating execution'.format(errors))
            exit(-2)

        ################## MODEL LOAD/FREEZE #################

        # Load the pretrained models params from checkpoint.
        try: 
            # Check command line arguments, then check load option in config.
            if self.app_state.args.load_checkpoint != "":
                pipeline_name = self.app_state.args.load_checkpoint
                msg = "command line (--load)"
            elif "load" in self.config['pipeline']:
                pipeline_name = self.config['pipeline']['load']
                msg = "'pipeline' section of the configuration file"
            else:
                pipeline_name = ""
            # Try to load the model.
            if pipeline_name != "":
                if os.path.isfile(pipeline_name):
                    # Load parameters from checkpoint.
                    self.pipeline.load(pipeline_name)
                else:
                    raise Exception("Couldn't load the checkpoint {} indicated in the {}: file does not exist".format(pipeline_name, msg))

            # Try to load the models parameters - one by one, if set so in the configuration file.
            self.pipeline.load_models()

        except KeyError:
            self.logger.error("File {} indicated in the {} seems not to be a valid model checkpoint".format(pipeline_name, msg))
            exit(-5)
        except Exception as e:
            self.logger.error(e)
            # Exit by following the logic: if user wanted to load the model but failed, then continuing the experiment makes no sense.
            exit(-6)

        # Finally, freeze the models (that the user wants to freeze).
        self.pipeline.freeze_models()

        # Log the model summaries.
        summary_str = self.pipeline.summarize_models_header()
        summary_str += self.pipeline.summarize_models()
        self.logger.info(summary_str)

        # Move the models in the pipeline to GPU.
        if self.app_state.args.use_gpu:
            self.pipeline.cuda()        

        ################# OPTIMIZER ################# 

        # Set the optimizer.
        optimizer_conf = dict(self.config['training']['optimizer'])
        optimizer_name = optimizer_conf['name']
        del optimizer_conf['name']

        # Check if there are any models in the pipeline.
        if len(list(filter(lambda p: p.requires_grad, self.pipeline.parameters()))) == 0:
            self.logger.error('Cannot proceed with training, as there are no trainable models in the pipeline (or all models are frozen)')
            exit(-7)

        # Instantiate the optimizer and filter the model parameters based on if they require gradients.
        self.optimizer = getattr(torch.optim, optimizer_name)(
            filter(lambda p: p.requires_grad, self.pipeline.parameters()), **optimizer_conf)

        log_str = 'Optimizer:\n' + '='*80 + "\n"
        log_str += "  Name: " + optimizer_name + "\n"
        log_str += "  Params: {}".format(optimizer_conf)

        self.logger.info(log_str)