def __init__(self, name, config):
        """
        Initializes the classifier.

        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
        :type config: ``ptp.configuration.ConfigInterface``
        """
        # Call constructors of parent classes.
        Model.__init__(self, name, FeedForwardNetwork, config)

        # Get key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_predictions = self.stream_keys["predictions"]

        # Retrieve input size from global variables.
        self.input_size = self.globals["input_size"]
        if type(self.input_size) == list:
            if len(self.input_size) == 1:
                self.input_size = self.input_size[0]
            else:
                raise ConfigurationError("SoftmaxClassifier input size '{}' must be a single dimension (current {})".format(self.global_keys["input_size"], self.input_size))

        # Retrieve output (prediction) size from global params.
        self.prediction_size = self.globals["prediction_size"]
        if type(self.prediction_size) == list:
            if len(self.prediction_size) == 1:
                self.prediction_size = self.prediction_size[0]
            else:
                raise ConfigurationError("SoftmaxClassifier prediction size '{}' must be a single dimension (current {})".format(self.global_keys["prediction_size"], self.prediction_size))
        
        self.logger.info("Initializing softmax classifier with input size = {} and prediction size = {}".format(self.input_size, self.prediction_size))

        # Create the model.
        self.layers = torch.nn.ModuleList()

        # Retrieve number of hidden layers, along with their sizes (numbers of hidden neurons from configuration).
        try:
            hidden_sizes = self.config["hidden_sizes"]
            if type(hidden_sizes) == list:
                # Stack linear layers.
                input_dim = self.input_size
                for hidden_dim in hidden_sizes:
                    # Add linear layer.
                    self.layers.append( torch.nn.Linear(input_dim, hidden_dim) )
                    input_dim = hidden_dim

                # Create activation layer.
                self.activation = torch.nn.ReLU()

                # Retrieve dropout rate value - if set, will put dropout between every layer.
                dropout_rate = self.config["dropout_rate"]

                # Create dropout layer.
                self.dropout = torch.nn.Dropout(dropout_rate)

                # Add output layer.
                self.layers.append( torch.nn.Linear(input_dim, self.prediction_size) )

                self.logger.info("Created {} hidden layers".format(len(self.layers)-1))

            else:
                raise ConfigurationError("SoftmaxClassifier 'hidden_sizes' must contain a list with numbers of neurons in hidden layers (currently {})".format(self.hidden_sizes))

        except KeyError:
            # Not present, in that case create a simple classifier with 1 linear layer.
            self.layers.append( torch.nn.Linear(self.input_size, self.prediction_size) )
        
        # Create the final non-linearity.
        self.use_logsoftmax = self.config["use_logsoftmax"]
        if self.use_logsoftmax:
            self.log_softmax = torch.nn.LogSoftmax(dim=1)
    def build(self, use_logger=True):
        """
        Method creating the pipeline, consisting of:
            - a list components ordered by the priority (dictionary).
            - task (as a separate "link" to object in the list of components, instance of a class derrived from Task class)
            - models (separate list with link to objects in components dict)
            - losses (selarate list with links to objects in components dict)

        :param use_logger: Logs the detected errors (DEFAULT: True)

        :return: number of detected errors.
        """
        errors = 0
        self.__priorities = []

        # Special section names to "skip".
        sections_to_skip = "name load freeze disable".split()
        disabled_components = ''
        # Add components to disable by the ones from configuration file.
        if "disable" in self.config:
            disabled_components = [
                *disabled_components,
                *self.config["disable"].replace(" ", "").split(",")
            ]
        # Add components to disable by the ones from command line arguments.
        if (self.app_state.args
                is not None) and (self.app_state.args.disable != ''):
            disabled_components = [
                *disabled_components, *self.app_state.args.disable.split(",")
            ]

        # Organize all components according to their priorities.
        for c_key, c_config in self.config.items():

            try:
                # Skip "special" pipeline sections.
                if c_key in sections_to_skip:
                    #self.logger.info("Skipping section '{}'".format(c_key))
                    continue
                # Skip "disabled" components.
                if c_key in disabled_components:
                    self.logger.info("Disabling component '{}'".format(c_key))
                    continue

                # Check presence of priority.
                if 'priority' not in c_config:
                    raise KeyError(
                        "Section '{}' does not contain the key 'priority' defining the pipeline order"
                        .format(c_key))

                # Get the priority.
                try:
                    c_priority = float(c_config["priority"])
                except ValueError:
                    raise ConfigurationError(
                        "Priority [{}] in section '{}' is not a floating point number"
                        .format(c_config["priority"], c_key))

                # Check uniqueness of the priority.
                if c_priority in self.__components.keys():
                    raise ConfigurationError(
                        "Found more than one component with the same priority [{}]"
                        .format(c_priority))

                # Ok, got the component name with priority. Save it.
                # Later we will "plug" the adequate component in this place.
                self.__components[c_priority] = c_key

            except ConfigurationError as e:
                if use_logger:
                    self.logger.error(e)
                errors += 1
                continue
            except KeyError as e:
                if use_logger:
                    self.logger.error(e)
                errors += 1
                continue
                # end try/else
            # end for

        if use_logger:
            self.logger.info("Building pipeline with {} components".format(
                len(self.__components)))

        # Do not continue if found errors.
        if errors > 0:
            return errors

        # Sort priorities.
        self.__priorities = sorted(self.__components.keys())

        for c_priority in self.__priorities:
            try:
                # The section "key" will be used as "component" name.
                c_key = self.__components[c_priority]
                # Get section.
                c_config = self.config[c_key]

                if use_logger:
                    self.logger.info(
                        "Creating component '{}' ({}) with priority [{}]".
                        format(c_key, c_config["type"], c_priority))

                # Create component.
                component, class_obj = ComponentFactory.build(c_key, c_config)

                # Check if class is derived (even indirectly) from Task.
                if ComponentFactory.check_inheritance(class_obj,
                                                      ptp.Task.__name__):
                    raise ConfigurationError(
                        "Object '{}' cannot be instantiated as part of pipeline, \
                        as its class type '{}' is derived from Task class!".
                        format(c_key, class_obj.__name__))

                # Add it to dict.
                self.__components[c_priority] = component

                # Check if class is derived (even indirectly) from Model.
                if ComponentFactory.check_inheritance(class_obj,
                                                      ptp.Model.__name__):
                    # Add to list.
                    self.models.append(component)

                # Check if class is derived (even indirectly) from Loss.
                if ComponentFactory.check_inheritance(class_obj,
                                                      ptp.Loss.__name__):
                    # Add to list.
                    self.losses.append(component)

            except ConfigurationError as e:
                if use_logger:
                    self.logger.error(
                        "Detected configuration error while creating the component '{}' instance:\n  {}"
                        .format(c_key, e))
                errors += 1
                continue
            except KeyError as e:
                if use_logger:
                    self.logger.error(
                        "Detected key error while creating the component '{}' instance: required key '{}' is missing"
                        .format(c_key, e))
                errors += 1
                continue
                # end try/else
            # end for

        # Return detected errors.
        return errors
Beispiel #3
0
    def __init__(self, name, config):
        """
        Initializes task object. Calls base constructor. Downloads the dataset if not present and loads the adequate files depending on the mode.

        :param name: Name of the component.

        :param class_type: Class type of the component.

        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
        """
        # Call constructors of parent classes.
        Task.__init__(self, name, GQA, config)

        # Get key mappings of all output streams.
        self.key_sample_ids = self.stream_keys["sample_ids"]
        self.key_images = self.stream_keys["images"]
        self.key_image_ids = self.stream_keys["image_ids"]
        self.key_questions = self.stream_keys["questions"]
        self.key_answers = self.stream_keys["answers"]
        self.key_full_answers = self.stream_keys["full_answers"]

        # Get flag informing whether we want to stream images or not.
        self.stream_images = self.config['stream_images']

        # Check the resize image option.
        if len(self.config['resize_image']) != 2:
            self.logger.error(
                "'resize_image' field must contain 2 values: the desired height and width"
            )
            exit(-1)

        # Output image dimensions.
        self.height = self.config['resize_image'][0]
        self.width = self.config['resize_image'][1]
        self.depth = 3
        self.logger.info(
            "Setting image size to [D  x H x W]: {} x {} x {}".format(
                self.depth, self.height, self.width))

        # Set global variables - all dimensions ASIDE OF BATCH.
        self.globals["image_height"] = self.height
        self.globals["image_width"] = self.width
        self.globals["image_depth"] = self.depth

        # Get image preprocessing.
        self.image_preprocessing = get_value_list_from_dictionary(
            "image_preprocessing", self.config,
            'none | normalize | all'.split(" | "))
        if 'none' in self.image_preprocessing:
            self.image_preprocessing = []
        if 'all' in self.image_preprocessing:
            self.image_preprocessing = ['normalize']
        # Add resize as transformation.
        self.image_preprocessing = ["resize"] + self.image_preprocessing

        self.logger.info("Applied image preprocessing: {}".format(
            self.image_preprocessing))

        # Get the absolute path.
        self.data_folder = os.path.expanduser(self.config['data_folder'])

        # Get split.
        split = get_value_from_dictionary(
            'split', self.config,
            "training_0 | training | validation | test_dev | test".split(
                " | "))
        self.split_image_folder = os.path.join(self.data_folder, "images")

        # Set split-dependent data.
        if split == 'training':
            # Training split folder and file with data question.
            data_files = []
            for i in range(10):
                data_files.append(
                    os.path.join(self.data_folder, "questions1.2",
                                 "train_all_questions",
                                 "train_all_questions_{}.json".format(i)))

        elif split == 'training_0':
            # Validation split folder and file with data question.
            data_files = [
                os.path.join(self.data_folder, "questions1.2",
                             "train_all_questions",
                             "train_all_questions_0.json")
            ]
            self.logger.warning(
                "Please remember that this split constitutes only 10 percent of the whole training set!"
            )

        elif split == 'validation':
            # Validation split folder and file with data question.
            data_files = [
                os.path.join(self.data_folder, "questions1.2",
                             "val_all_questions.json")
            ]
            self.logger.warning("Please use 'test_dev' split for validation!")

        elif split == 'test_dev':
            # Validation split folder and file with data question.
            data_files = [
                os.path.join(self.data_folder, "questions1.2",
                             "testdev_all_questions.json")
            ]

        elif split == 'test':
            # Test split folder and file with data question.
            data_files = [
                os.path.join(self.data_folder, "questions1.2",
                             "test_all_questions.json")
            ]

        else:
            raise ConfigurationError(
                "Split {} not supported yet".format(split))

        # Load dataset.
        self.dataset = self.load_dataset(data_files)

        # Display exemplary sample.
        i = 0
        sample = self.dataset[i]
        # Check if this is a test set.
        self.logger.info(
            "Exemplary sample {} ({}):\n  image_ids: {}\n  question: {}\n  answer: {} ({})"
            .format(i, sample[self.key_sample_ids], sample[self.key_image_ids],
                    sample[self.key_questions], sample[self.key_answers],
                    sample[self.key_full_answers]))
Beispiel #4
0
    def build(self, log=True):
        """
        Method creates a problem on the basis of configuration section.

        :param log: Logs information and the detected errors (DEFAULT: TRUE)

        :return: number of detected errors
        """
        try:
            # Create component.
            component, class_obj = ComponentFactory.build(
                "problem", self.config["problem"])

            # Check if class is derived (even indirectly) from Problem.
            if not ComponentFactory.check_inheritance(class_obj,
                                                      ptp.Problem.__name__):
                raise ConfigurationError(
                    "Class '{}' is not derived from the Problem class!".format(
                        class_obj.__name__))

            # Set problem.
            self.problem = component

            # Try to build the sampler.
            self.sampler = SamplerFactory.build(self.problem,
                                                self.config["sampler"])

            if self.sampler is not None:
                # Set shuffle to False - REQUIRED as those two are exclusive.
                self.config['dataloader'].add_config_params({'shuffle': False})

            # build the DataLoader on top of the validation problem
            self.dataloader = DataLoader(
                dataset=self.problem,
                batch_size=self.config['problem']['batch_size'],
                shuffle=self.config['dataloader']['shuffle'],
                sampler=self.sampler,
                batch_sampler=self.config['dataloader']['batch_sampler'],
                num_workers=self.config['dataloader']['num_workers'],
                collate_fn=self.problem.collate_fn,
                pin_memory=self.config['dataloader']['pin_memory'],
                drop_last=self.config['dataloader']['drop_last'],
                timeout=self.config['dataloader']['timeout'],
                worker_init_fn=self.worker_init_fn)

            # Display sizes.
            if log:
                self.logger.info("Problem for '{}' loaded (size: {})".format(
                    self.name, len(self.problem)))
                if (self.sampler is not None):
                    self.logger.info(
                        "Sampler for '{}' created (size: {})".format(
                            self.name, len(self.sampler)))

            # Ok, success.
            return 0

        except ConfigurationError as e:
            if log:
                self.logger.error(
                    "Detected configuration error while creating the problem instance:\n  {}"
                    .format(e))
            # Return error.
            return 1
        except KeyError as e:
            if log:
                self.logger.error(
                    "Detected key error while creating the problem instance: required key {} is missing"
                    .format(e))
            # Return error.
            return 1
Beispiel #5
0
    def __init__(self, name, config):
        """
        Initializes task object. Calls base constructor. Downloads the dataset if not present and loads the adequate files depending on the mode.

        :param name: Name of the component.

        :param class_type: Class type of the component.

        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
        """
        # Call constructors of parent classes.
        Task.__init__(self, name, CLEVR, config)

        # Get key mappings of all output streams.
        self.key_images = self.stream_keys["images"]
        self.key_image_ids = self.stream_keys["image_ids"]
        self.key_questions = self.stream_keys["questions"]
        self.key_answers = self.stream_keys["answers"]
        self.key_question_type_ids = self.stream_keys["question_type_ids"]
        self.key_question_type_names = self.stream_keys["question_type_names"]

        # Get flag informing whether we want to stream images or not.
        self.stream_images = self.config['stream_images']

        # Check the resize image option.
        if "resize_image" in self.config:
            if len(self.config['resize_image']) != 2:
                self.logger.error(
                    "'resize_image' field must contain 2 values: the desired height and width"
                )
                exit(-1)

            # Output image dimensions.
            self.height = self.config['resize_image'][0]
            self.width = self.config['resize_image'][1]
            self.depth = 3
            resize = True
        else:
            # Use original image dimensions.
            self.height = 480
            self.width = 320
            self.depth = 3
            resize = False
        self.logger.info(
            "Setting image size to [D  x H x W]: {} x {} x {}".format(
                self.depth, self.height, self.width))

        # Set global variables - all dimensions ASIDE OF BATCH.
        self.globals["image_height"] = self.height
        self.globals["image_width"] = self.width
        self.globals["image_depth"] = self.depth

        # Get image preprocessing.
        self.image_preprocessing = get_value_list_from_dictionary(
            "image_preprocessing", self.config,
            'none | normalize | all'.split(" | "))
        if 'none' in self.image_preprocessing:
            self.image_preprocessing = []
        if 'all' in self.image_preprocessing:
            self.image_preprocessing = ['normalize']

        if resize:
            # Add resize as transformation.
            self.image_preprocessing = ["resize"] + self.image_preprocessing
        self.logger.info("Applied image preprocessing: {}".format(
            self.image_preprocessing))

        # Mapping of question subtypes to types (not used, but keeping it just in case).
        #self.question_subtype_to_type_mapping = {
        #    'query_size': 'query_attribute',
        #    'equal_size': 'compare_attribute',
        #    'query_shape': 'query_attribute',
        #    'query_color': 'query_attribute',
        #    'greater_than': 'compare_integer',
        #    'equal_material': 'compare_attribute',
        #    'equal_color': 'compare_attribute',
        #    'equal_shape': 'compare_attribute',
        #    'less_than': 'compare_integer',
        #    'count': 'count',
        #    'exist': 'exist',
        #    'equal_integer': 'compare_integer',
        #    'query_material': 'query_attribute'}

        # Mapping of question subtypes to types.
        self.question_subtype_to_id_mapping = {
            'query_size': 0,
            'equal_size': 1,
            'query_shape': 2,
            'query_color': 3,
            'greater_than': 4,
            'equal_material': 5,
            'equal_color': 6,
            'equal_shape': 7,
            'less_than': 8,
            'count': 9,
            'exist': 10,
            'equal_integer': 11,
            'query_material': 12
        }

        # Mapping of question families to subtypes.
        self.question_family_id_to_subtype_mapping = {
            0: "equal_integer",
            1: "less_than",
            2: "greater_than",
            3: "equal_integer",
            4: "less_than",
            5: "greater_than",
            6: "equal_integer",
            7: "less_than",
            8: "greater_than",
            9: "equal_size",
            10: "equal_color",
            11: "equal_material",
            12: "equal_shape",
            13: "equal_size",
            14: "equal_size",
            15: "equal_size",
            16: "equal_color",
            17: "equal_color",
            18: "equal_color",
            19: "equal_material",
            20: "equal_material",
            21: "equal_material",
            22: "equal_shape",
            23: "equal_shape",
            24: "equal_shape",
            25: "count",
            26: "exist",
            27: "query_size",
            28: "query_shape",
            29: "query_color",
            30: "query_material",
            31: "count",
            32: "query_size",
            33: "query_color",
            34: "query_material",
            35: "query_shape",
            36: "exist",
            37: "exist",
            38: "exist",
            39: "exist",
            40: "count",
            41: "count",
            42: "count",
            43: "count",
            44: "exist",
            45: "exist",
            46: "exist",
            47: "exist",
            48: "count",
            49: "count",
            50: "count",
            51: "count",
            52: "query_color",
            53: "query_material",
            54: "query_shape",
            55: "query_size",
            56: "query_material",
            57: "query_shape",
            58: "query_size",
            59: "query_color",
            60: "query_shape",
            61: "query_size",
            62: "query_color",
            63: "query_material",
            64: "count",
            65: "count",
            66: "count",
            67: "count",
            68: "count",
            69: "count",
            70: "count",
            71: "count",
            72: "count",
            73: "exist",
            74: "query_size",
            75: "query_color",
            76: "query_material",
            77: "query_shape",
            78: "count",
            79: "exist",
            80: "query_size",
            81: "query_color",
            82: "query_material",
            83: "query_shape",
            84: "count",
            85: "exist",
            86: "query_shape",
            87: "query_material",
            88: "query_color",
            89: "query_size"
        }

        # Finally, "merge" those two.
        self.question_family_id_to_subtype_id_mapping = {
            key: self.question_subtype_to_id_mapping[value]
            for key, value in
            self.question_family_id_to_subtype_mapping.items()
        }

        # Get the absolute path.
        self.data_folder = os.path.expanduser(self.config['data_folder'])

        # Get split.
        split = get_value_from_dictionary(
            'split', self.config,
            "training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation"
            .split(" | "))

        # Set split-dependent data.
        if split == 'training':
            # Training split folder and file with data question.
            data_file = os.path.join(self.data_folder, "questions",
                                     'CLEVR_train_questions.json')
            self.split_image_folder = os.path.join(self.data_folder, "images",
                                                   "train")

        elif split == 'validation':
            # Validation split folder and file with data question.
            data_file = os.path.join(self.data_folder, "questions",
                                     'CLEVR_val_questions.json')
            self.split_image_folder = os.path.join(self.data_folder, "images",
                                                   "val")

        elif split == 'test':
            # Test split folder and file with data question.
            data_file = os.path.join(self.data_folder, "questions",
                                     'CLEVR_test_questions.json')
            self.split_image_folder = os.path.join(self.data_folder, "images",
                                                   "test")

        else:  # cogent
            raise ConfigurationError(
                "Split {} not supported yet".format(split))

        # Load dataset.
        self.dataset = self.load_dataset(data_file)

        # Display exemplary sample.
        i = 0
        sample = self.dataset[i]
        # Check if this is a test set.
        if "answer" not in sample.keys():
            sample["answer"] = "<UNK>"
            sample[self.key_question_type_ids] = -1
            sample[self.key_question_type_names] = "<UNK>"
        else:
            sample[
                self.
                key_question_type_ids] = self.question_family_id_to_subtype_id_mapping[
                    sample["question_family_index"]]
            sample[
                self.
                key_question_type_names] = self.question_family_id_to_subtype_mapping[
                    sample["question_family_index"]]

        self.logger.info(
            "Exemplary sample {} ({}):\n  question_type: {} ({})\n  image_ids: {}\n  question: {}\n  answer: {}"
            .format(i, sample["question_index"],
                    sample[self.key_question_type_ids],
                    sample[self.key_question_type_names],
                    sample["image_filename"], sample["question"],
                    sample["answer"]))